build administrativa
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include <libgen.h>
33
34 #include "cpp.h"
35 #include "parser.h"
36
37 #define PSI_CPP_SEARCH
38 #define PSI_CPP_PREDEF
39 #include "php_psi_predef.h"
40
41 #include "php_psi.h"
42
43 HashTable psi_cpp_defaults;
44
45 PHP_MINIT_FUNCTION(psi_cpp);
46 PHP_MINIT_FUNCTION(psi_cpp)
47 {
48 struct psi_parser parser;
49 struct psi_parser_input *predef;
50
51 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
52 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
53
54 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
55 return FAILURE;
56 }
57
58 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
59 psi_parser_dtor(&parser);
60 return FAILURE;
61 }
62
63 if (!psi_parser_parse(&parser, predef)) {
64 psi_parser_input_free(&predef);
65 psi_parser_dtor(&parser);
66 return FAILURE;
67 }
68 psi_parser_input_free(&predef);
69
70 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
71 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
72
73 psi_parser_dtor(&parser);
74
75 return SUCCESS;
76 }
77
78 PHP_MSHUTDOWN_FUNCTION(psi_cpp);
79 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
80 {
81 struct psi_cpp_macro_decl *macro;
82
83 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
84 {
85 psi_cpp_macro_decl_free(&macro);
86 }
87 ZEND_HASH_FOREACH_END();
88
89 zend_hash_destroy(&psi_cpp_defaults);
90
91 return SUCCESS;
92 }
93
94 static void free_cpp_def(zval *p)
95 {
96 if (Z_TYPE_P(p) == IS_PTR) {
97 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
98
99 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
100 psi_cpp_macro_decl_free(&macro);
101 }
102 }
103 }
104
105 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
106 {
107 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
108
109 cpp->parser = P;
110 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
111 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
112 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
113
114 return cpp;
115 }
116
117 static char *include_flavor[] = {
118 "include",
119 "include next",
120 "include once"
121 };
122
123 void psi_cpp_free(struct psi_cpp **cpp_ptr)
124 {
125 if (*cpp_ptr) {
126 struct psi_cpp *cpp = *cpp_ptr;
127
128 *cpp_ptr = NULL;
129 zend_hash_destroy(&cpp->defs);
130 zend_hash_destroy(&cpp->once);
131 free(cpp);
132 }
133 }
134
135 static bool psi_cpp_stage1(struct psi_cpp *cpp)
136 {
137 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
138
139 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
140
141 psi_cpp_tokiter_reset(cpp);
142 while (psi_cpp_tokiter_valid(cpp)) {
143 struct psi_token *token = psi_cpp_tokiter_current(cpp);
144
145 /* strip comments and attributes */
146 if (token->type == PSI_T_COMMENT
147 || token->type == PSI_T_CPP_ATTRIBUTE) {
148 psi_cpp_tokiter_del_cur(cpp, true);
149 continue;
150 }
151
152 /* line continuations */
153 if (token->type == PSI_T_EOL) {
154 if (esc) {
155 psi_cpp_tokiter_del_prev(cpp, true);
156 psi_cpp_tokiter_del_cur(cpp, true);
157 esc = false;
158 continue;
159 }
160 } else if (token->type == PSI_T_BSLASH) {
161 esc = !esc;
162 } else {
163 esc = false;
164 }
165
166 /* this whole turf is needed to distinct between:
167 * #define foo (1,2,3)
168 * #define foo(a,b,c)
169 */
170
171 if (token->type == PSI_T_WHITESPACE) {
172 if (name) {
173 name = false;
174 }
175 ws = true;
176 psi_cpp_tokiter_del_cur(cpp, true);
177 continue;
178 }
179
180 switch (token->type) {
181 case PSI_T_EOL:
182 eol = true;
183 break;
184 case PSI_T_HASH:
185 if (eol) {
186 hash = true;
187 eol = false;
188 }
189 break;
190 case PSI_T_DEFINE:
191 if (hash) {
192 define = true;
193 hash = false;
194 }
195 break;
196 case PSI_T_NAME:
197 if (define) {
198 name = true;
199 define = false;
200 }
201 break;
202 case PSI_T_LPAREN:
203 if (name) {
204 name = false;
205 if (!ws) {
206 /* mask special token for parser */
207 struct psi_token *no_ws = psi_token_copy(token);
208
209 no_ws->type = PSI_T_NO_WHITESPACE;
210 zend_string_release(no_ws->text);
211 no_ws->text = psi_string_init_interned("\xA0", 1, 1);
212 psi_cpp_tokiter_add(cpp, no_ws);
213 continue;
214 }
215 }
216 /* no break */
217 default:
218 name = define = hash = eol = false;
219 break;
220 }
221
222 ws = false;
223 psi_cpp_tokiter_add_cur(cpp);
224 psi_cpp_tokiter_next(cpp);
225 }
226
227 return true;
228 }
229
230 static bool psi_cpp_stage2(struct psi_cpp *cpp)
231 {
232 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
233 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
234
235 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
236
237 psi_cpp_tokiter_reset(cpp);
238 while (psi_cpp_tokiter_valid(cpp)) {
239 struct psi_token *current = psi_cpp_tokiter_current(cpp);
240
241 if (current->type == PSI_T_HASH) {
242 if (is_eol) {
243 do_cpp = true;
244 is_eol = false;
245 }
246 } else if (current->type == PSI_T_EOL) {
247 #if PSI_CPP_DEBUG
248 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
249 #endif
250 is_eol = true;
251 skip_all = false;
252 do_expansion = true;
253 if (!do_cpp) {
254 psi_cpp_tokiter_del_cur(cpp, true);
255 continue;
256 }
257 } else {
258 is_eol = false;
259
260 if (do_cpp) {
261 switch (current->type) {
262 case PSI_T_DEFINE:
263 #if PSI_CPP_DEBUG
264 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
265 #endif
266 do_expansion = false;
267 skip_all = true;
268 break;
269 case PSI_T_DEFINED:
270 skip_paren = true;
271 /* no break */
272 case PSI_T_IFDEF:
273 case PSI_T_IFNDEF:
274 case PSI_T_UNDEF:
275 #if PSI_CPP_DEBUG
276 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
277 #endif
278 do_expansion = false;
279 break;
280 case PSI_T_LPAREN:
281
282 if (!skip_all) {
283 if (skip_paren) {
284 skip_paren = false;
285 } else {
286 do_expansion = true;
287 #if PSI_CPP_DEBUG
288 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
289 #endif
290 }
291 }
292 break;
293 case PSI_T_NAME:
294 break;
295 default:
296 do_expansion = !skip_all;
297 #if PSI_CPP_DEBUG
298 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
299 #endif
300 }
301 }
302 }
303
304 if (cpp->skip) {
305 if (!do_cpp) {
306 #if PSI_CPP_DEBUG
307 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
308 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
309 #endif
310 psi_cpp_tokiter_del_cur(cpp, true);
311 continue;
312 }
313 }
314
315 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
316 bool expanded = false;
317
318 while (psi_cpp_tokiter_expand(cpp)) {
319 expanded = true;
320 }
321 if (expanded) {
322 continue;
323 }
324 }
325
326 if (do_cpp) {
327 parser_tokens = psi_plist_add(parser_tokens, &current);
328
329 if (is_eol) {
330 size_t processed = 0;
331 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
332
333 /* EOL */
334 psi_plist_pop(parser_tokens, NULL);
335 psi_plist_clean(parser_tokens);
336 do_cpp = false;
337
338 if (!parsed) {
339 psi_plist_free(parser_tokens);
340 return false;
341 }
342 } else {
343 /* leave EOLs in the input stream, else we might end up
344 * with a hash not preceded with a new line after include */
345 psi_cpp_tokiter_del_cur(cpp, false);
346 }
347
348 #if PSI_CPP_DEBUG > 1
349 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
350 #endif
351
352 continue;
353 }
354
355 psi_cpp_tokiter_add_cur(cpp);
356 psi_cpp_tokiter_next(cpp);
357 }
358
359 psi_plist_free(parser_tokens);
360
361 return true;
362 }
363
364 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
365 {
366 bool parsed = false;
367 struct psi_cpp temp = *cpp; cpp->level = temp.level;
368
369 cpp->tokens.iter = *tokens;
370 cpp->tokens.next = NULL;
371
372 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
373 parsed = true;
374 }
375
376 if (cpp->tokens.next) {
377 free(cpp->tokens.iter);
378 cpp->tokens.iter = cpp->tokens.next;
379 cpp->tokens.next = NULL;
380 }
381
382 *tokens = cpp->tokens.iter;
383
384 if (temp.tokens.iter) {
385 cpp->tokens.iter = temp.tokens.iter;
386 cpp->tokens.next = temp.tokens.next;
387 }
388 cpp->index = temp.index;
389 cpp->skip = temp.skip;
390 cpp->level = temp.level;
391 cpp->seen = temp.seen;
392
393 return parsed;
394 }
395
396 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
397 {
398 bool defined;
399
400 if (tok->type == PSI_T_NAME) {
401 defined = zend_hash_exists(&cpp->defs, tok->text)
402 || psi_builtin_exists(tok->text);
403 } else {
404 defined = false;
405 }
406
407 #if PSI_CPP_DEBUG
408 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
409 if (defined) {
410 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
411 if (macro) {
412 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
413 }
414 }
415 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
416 #endif
417
418 return defined;
419 }
420
421 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
422 {
423 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
424
425 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
426 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
427 "'%s' redefined", decl->token->text->val);
428 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
429 "'%s' previously defined", old->token->text->val);
430 }
431 #if PSI_CPP_DEBUG
432 if (decl->exp) {
433 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
434 } else {
435 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
436 }
437 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
438 PSI_DEBUG_PRINT(cpp->parser, "\n");
439 #endif
440 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
441 }
442
443 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
444 {
445 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
446 }
447
448 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
449 {
450 struct psi_validate_scope scope = {0};
451
452 scope.cpp = cpp;
453 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
454 return false;
455 }
456 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
457 return false;
458 }
459 return true;
460 }
461
462 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
463 {
464 bool parsed = false;
465 char path[PATH_MAX];
466 struct psi_plist *tokens;
467 struct psi_parser_input *include;
468
469 if (!psi_cpp_has_include(cpp, file, flags, path)) {
470 return false;
471 }
472
473 if (flags & PSI_CPP_INCLUDE_ONCE) {
474 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
475 return true;
476 }
477 }
478
479 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
480 include_flavor[flags], path);
481
482 include = psi_parser_open_file(cpp->parser, path, false);
483 if (!include) {
484 return false;
485 }
486
487 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
488
489 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
490
491 tokens = psi_parser_scan(cpp->parser, include);
492 psi_parser_input_free(&include);
493
494 if (!tokens) {
495 return false;
496 }
497
498 parsed = psi_cpp_process(cpp, &tokens);
499 if (!parsed) {
500 psi_plist_free(tokens);
501 return false;
502 }
503
504 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
505 free(tokens);
506
507 ++cpp->expanded;
508 return true;
509 }
510
511 #ifndef HAVE_EACCESS
512 # define eaccess access
513 #endif
514 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
515 {
516 char temp[PATH_MAX];
517
518 if (!path) {
519 path = temp;
520 }
521
522 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
523 /* first try as is, full or relative path */
524 if (file->text->val[0] == '/') {
525 path = file->text->val;
526 } else {
527 char *dir;
528 size_t len;
529
530 strncpy(path, file->file->val, PATH_MAX);
531
532 dir = dirname(path);
533 len = strlen(dir);
534
535 assert(len + file->text->len + 1 < PATH_MAX);
536
537 memmove(path, dir, len);
538 path[len] = '/';
539 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
540 }
541
542 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
543 include_flavor[flags], path);
544 if (0 == eaccess(path, R_OK)) {
545 return true;
546 }
547 }
548
549 /* look through search paths */
550 if (file->text->val[0] != '/') {
551 const char *sep;
552 int p_len;
553
554 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
555 if ((sep = strchr(cpp->search, ':'))) {
556 cpp->search = sep + 1;
557 } else {
558 /* point to end of string */
559 cpp->search += strlen(cpp->search);
560 }
561 }
562
563 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
564 cpp->search = PSI_G(search_path);
565 }
566
567 do {
568 int d_len;
569
570 sep = strchr(cpp->search, ':');
571 d_len = sep ? sep - cpp->search : strlen(cpp->search);
572
573 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
574 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
575 include_flavor[flags], path);
576 if (0 == eaccess(path, R_OK)) {
577 return true;
578 }
579 }
580
581 if (sep) {
582 cpp->search = sep + 1;
583 }
584 } while (sep);
585 }
586
587 return false;
588 }