26e324f7c5ecd66565ad3b67421e15476aac8bef
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "php_psi.h"
33
34 #include <libgen.h>
35
36 #include "cpp.h"
37 #include "parser.h"
38 #include "debug.h"
39
40 #define PSI_CPP_SEARCH
41 #define PSI_CPP_PREDEF
42 #include "php_psi_predef.h"
43
44 HashTable psi_cpp_defaults;
45
46 PHP_MINIT_FUNCTION(psi_cpp);
47 PHP_MINIT_FUNCTION(psi_cpp)
48 {
49 struct psi_parser parser;
50 struct psi_parser_input *predef;
51
52 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
53 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
54
55 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
56 return FAILURE;
57 }
58
59 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
60 psi_parser_dtor(&parser);
61 return FAILURE;
62 }
63
64 if (!psi_parser_parse(&parser, predef)) {
65 psi_parser_input_free(&predef);
66 psi_parser_dtor(&parser);
67 return FAILURE;
68 }
69 psi_parser_input_free(&predef);
70
71 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
72 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
73
74 psi_parser_dtor(&parser);
75
76 return SUCCESS;
77 }
78
79 PHP_MSHUTDOWN_FUNCTION(psi_cpp);
80 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
81 {
82 struct psi_cpp_macro_decl *macro;
83
84 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
85 {
86 psi_cpp_macro_decl_free(&macro);
87 }
88 ZEND_HASH_FOREACH_END();
89
90 zend_hash_destroy(&psi_cpp_defaults);
91
92 return SUCCESS;
93 }
94
95 static void free_cpp_def(zval *p)
96 {
97 if (Z_TYPE_P(p) == IS_PTR) {
98 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
99
100 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
101 psi_cpp_macro_decl_free(&macro);
102 }
103 }
104 }
105
106 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
107 {
108 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
109
110 cpp->parser = P;
111 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
112 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
113 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
114 zend_hash_init(&cpp->expanding, 0, NULL, NULL, 1);
115
116 return cpp;
117 }
118
119 static char *include_flavor[] = {
120 "include",
121 "include next",
122 "include once"
123 };
124
125 void psi_cpp_free(struct psi_cpp **cpp_ptr)
126 {
127 if (*cpp_ptr) {
128 struct psi_cpp *cpp = *cpp_ptr;
129
130 *cpp_ptr = NULL;
131 zend_hash_destroy(&cpp->defs);
132 zend_hash_destroy(&cpp->once);
133 zend_hash_destroy(&cpp->expanding);
134 free(cpp);
135 }
136 }
137
138 static bool psi_cpp_stage1(struct psi_cpp *cpp)
139 {
140 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
141
142 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
143
144 psi_cpp_tokiter_reset(cpp);
145 while (psi_cpp_tokiter_valid(cpp)) {
146 struct psi_token *token = psi_cpp_tokiter_current(cpp);
147
148 /* strip comments and attributes */
149 if (token->type == PSI_T_COMMENT
150 || token->type == PSI_T_CPP_ATTRIBUTE) {
151 psi_cpp_tokiter_del_cur(cpp, true);
152 continue;
153 }
154
155 /* line continuations */
156 if (token->type == PSI_T_EOL) {
157 if (esc) {
158 psi_cpp_tokiter_del_prev(cpp, true);
159 psi_cpp_tokiter_del_cur(cpp, true);
160 esc = false;
161 continue;
162 }
163 } else if (token->type == PSI_T_BSLASH) {
164 esc = !esc;
165 } else {
166 esc = false;
167 }
168
169 /* this whole turf is needed to distinct between:
170 * #define foo (1,2,3)
171 * #define foo(a,b,c)
172 */
173
174 if (token->type == PSI_T_WHITESPACE) {
175 if (name) {
176 name = false;
177 }
178 ws = true;
179 psi_cpp_tokiter_del_cur(cpp, true);
180 continue;
181 }
182
183 switch (token->type) {
184 case PSI_T_EOL:
185 eol = true;
186 break;
187 case PSI_T_HASH:
188 if (eol) {
189 hash = true;
190 eol = false;
191 }
192 break;
193 case PSI_T_DEFINE:
194 if (hash) {
195 define = true;
196 hash = false;
197 }
198 break;
199 case PSI_T_NAME:
200 if (define) {
201 name = true;
202 define = false;
203 }
204 break;
205 case PSI_T_LPAREN:
206 if (name) {
207 name = false;
208 if (!ws) {
209 /* mask special token for parser */
210 struct psi_token *no_ws = psi_token_copy(token);
211
212 no_ws->type = PSI_T_NO_WHITESPACE;
213 zend_string_release(no_ws->text);
214 no_ws->text = psi_string_init_interned("\xA0", 1, 1);
215 psi_cpp_tokiter_add(cpp, no_ws);
216 continue;
217 }
218 }
219 /* no break */
220 default:
221 name = define = hash = eol = false;
222 break;
223 }
224
225 ws = false;
226 psi_cpp_tokiter_add_cur(cpp);
227 psi_cpp_tokiter_next(cpp);
228 }
229
230 return true;
231 }
232
233 static bool psi_cpp_stage2(struct psi_cpp *cpp)
234 {
235 bool is_eol = true, do_expansion = true, skip_paren = false, skip_all = false;
236
237 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
238
239 psi_cpp_tokiter_reset(cpp);
240 while (psi_cpp_tokiter_valid(cpp)) {
241 struct psi_token *current = psi_cpp_tokiter_current(cpp);
242
243 if (current->type == PSI_T_HASH) {
244 if (is_eol) {
245 cpp->do_cpp = true;
246 is_eol = false;
247 }
248 } else if (current->type == PSI_T_EOL) {
249 #if PSI_CPP_DEBUG
250 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
251 #endif
252 is_eol = true;
253 skip_all = false;
254 do_expansion = true;
255 if (!cpp->do_cpp) {
256 psi_cpp_tokiter_del_cur(cpp, true);
257 continue;
258 }
259 } else {
260 is_eol = false;
261
262 if (cpp->do_cpp) {
263 switch (current->type) {
264 case PSI_T_DEFINE:
265 #if PSI_CPP_DEBUG
266 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
267 #endif
268 do_expansion = false;
269 skip_all = true;
270 break;
271 case PSI_T_DEFINED:
272 skip_paren = true;
273 /* no break */
274 case PSI_T_IFDEF:
275 case PSI_T_IFNDEF:
276 case PSI_T_UNDEF:
277 #if PSI_CPP_DEBUG
278 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
279 #endif
280 do_expansion = false;
281 break;
282 case PSI_T_LPAREN:
283
284 if (!skip_all) {
285 if (skip_paren) {
286 skip_paren = false;
287 } else {
288 do_expansion = true;
289 #if PSI_CPP_DEBUG
290 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
291 #endif
292 }
293 }
294 break;
295 case PSI_T_NAME:
296 break;
297 default:
298 do_expansion = !skip_all;
299 #if PSI_CPP_DEBUG
300 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
301 #endif
302 }
303 }
304 }
305
306 if (cpp->skip) {
307 if (!cpp->do_cpp) {
308 #if PSI_CPP_DEBUG
309 psi_debug_lock(PSI_DATA(cpp->parser));
310 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
311 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
312 psi_debug_unlock(PSI_DATA(cpp->parser));
313 #endif
314 psi_cpp_tokiter_del_cur(cpp, true);
315 continue;
316 }
317 }
318
319 if (do_expansion && psi_cpp_defined(cpp, current)) {
320 bool expanded = false;
321
322 if (psi_cpp_tokiter_expand(cpp)) {
323 expanded = true;
324 }
325 if (expanded) {
326 continue;
327 }
328 }
329
330 psi_cpp_tokiter_add_cur(cpp);
331
332 if (cpp->do_cpp && is_eol) {
333 size_t processed = 0;
334 bool parsed;
335
336 cpp->do_cpp = false;
337
338 parsed = psi_parser_process(cpp->parser, cpp->tokens.exec, &processed);
339
340 /* leave EOLs in the input stream, else we might end up
341 * with a hash not preceded with a new line after include */
342 psi_plist_pop(cpp->tokens.exec, NULL);
343 psi_plist_clean(cpp->tokens.exec);
344
345 if (!parsed) {
346 psi_plist_free(cpp->tokens.exec);
347 return false;
348 }
349
350 #if PSI_CPP_DEBUG > 1
351 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
352 #endif
353 }
354
355 psi_cpp_tokiter_next(cpp);
356 }
357
358 psi_plist_free(cpp->tokens.exec);
359
360 return true;
361 }
362
363 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens,
364 struct psi_token *expanding)
365 {
366 bool parsed = false;
367 struct psi_cpp temp = *cpp;
368
369 cpp->tokens.iter = *tokens;
370 cpp->tokens.next = NULL;
371 cpp->tokens.exec = NULL;
372
373 if (expanding) {
374 zend_hash_add_empty_element(&cpp->expanding, expanding->text);
375 }
376 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
377 parsed = true;
378 }
379 if (expanding) {
380 zend_hash_del(&cpp->expanding, expanding->text);
381 }
382
383 if (cpp->tokens.next) {
384 free(cpp->tokens.iter);
385 cpp->tokens.iter = cpp->tokens.next;
386 cpp->tokens.next = NULL;
387 }
388
389 *tokens = cpp->tokens.iter;
390
391 if (temp.tokens.iter) {
392 cpp->tokens.iter = temp.tokens.iter;
393 cpp->tokens.next = temp.tokens.next;
394 cpp->tokens.exec = temp.tokens.exec;
395 }
396 cpp->index = temp.index;
397 cpp->skip = temp.skip;
398 cpp->level = temp.level;
399 cpp->seen = temp.seen;
400 cpp->do_cpp = temp.do_cpp;
401
402 return parsed;
403 }
404
405 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
406 {
407 bool defined = false;
408
409 if (tok->type == PSI_T_NAME) {
410 if (psi_builtin_exists(tok->text)) {
411 defined = true;
412 } else if (!zend_hash_exists(&cpp->expanding, tok->text)) {
413 struct psi_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
414
415 if (macro) {
416 defined = true;
417 }
418 }
419 #if PSI_CPP_DEBUG
420 psi_debug_lock(PSI_DATA(cpp->parser));
421 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
422 if (defined) {
423 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
424 if (macro) {
425 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
426 }
427 } else {
428 zend_string *key;
429
430 PSI_DEBUG_PRINT(cpp->parser, " expanding=");
431 ZEND_HASH_FOREACH_STR_KEY(&cpp->expanding, key)
432 {
433 PSI_DEBUG_PRINT(cpp->parser, "%s,", key->val);
434 }
435 ZEND_HASH_FOREACH_END();
436 PSI_DEBUG_PRINT(cpp->parser, "\t");
437 }
438 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
439 psi_debug_unlock(PSI_DATA(cpp->parser));
440 #endif
441 }
442
443 return defined;
444 }
445
446 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
447 {
448 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
449
450 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
451 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
452 "'%s' redefined", decl->token->text->val);
453 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
454 "'%s' previously defined", old->token->text->val);
455 }
456 #if PSI_CPP_DEBUG
457 psi_debug_lock(PSI_DATA(cpp->parser));
458 if (decl->exp) {
459 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
460 } else {
461 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
462 }
463 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
464 PSI_DEBUG_PRINT(cpp->parser, "\n");
465 psi_debug_unlock(PSI_DATA(cpp->parser));
466 #endif
467 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
468 }
469
470 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
471 {
472 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
473 }
474
475 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
476 {
477 struct psi_validate_scope scope = {0};
478
479 scope.cpp = cpp;
480 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
481 return false;
482 }
483 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
484 return false;
485 }
486 return true;
487 }
488
489 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
490 {
491 bool parsed = false;
492 char path[PATH_MAX];
493 struct psi_plist *tokens;
494 struct psi_parser_input *include;
495
496 if (!psi_cpp_has_include(cpp, file, flags, path)) {
497 return false;
498 }
499
500 if (flags & PSI_CPP_INCLUDE_ONCE) {
501 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
502 return true;
503 }
504 }
505
506 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
507 include_flavor[flags], path);
508
509 include = psi_parser_open_file(cpp->parser, path, false);
510 if (!include) {
511 return false;
512 }
513
514 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
515
516 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
517
518 tokens = psi_parser_scan(cpp->parser, include);
519 psi_parser_input_free(&include);
520
521 if (!tokens) {
522 return false;
523 }
524
525 parsed = psi_cpp_process(cpp, &tokens, NULL);
526 if (!parsed) {
527 psi_plist_free(tokens);
528 return false;
529 }
530
531 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
532 free(tokens);
533
534 ++cpp->expanded;
535 return true;
536 }
537
538 #ifndef HAVE_EACCESS
539 # define eaccess access
540 #endif
541 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
542 {
543 char temp[PATH_MAX];
544
545 if (!path) {
546 path = temp;
547 }
548
549 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
550 /* first try as is, full or relative path */
551 if (file->text->val[0] == '/') {
552 path = file->text->val;
553 } else {
554 char *dir;
555 size_t len;
556
557 strncpy(path, file->file->val, PATH_MAX);
558
559 dir = dirname(path);
560 len = strlen(dir);
561
562 assert(len + file->text->len + 1 < PATH_MAX);
563
564 memmove(path, dir, len);
565 path[len] = '/';
566 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
567 }
568
569 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
570 include_flavor[flags], path);
571 if (0 == eaccess(path, R_OK)) {
572 return true;
573 }
574 }
575
576 /* look through search paths */
577 if (file->text->val[0] != '/') {
578 const char *sep;
579 int p_len;
580
581 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
582 if ((sep = strchr(cpp->search, ':'))) {
583 cpp->search = sep + 1;
584 } else {
585 /* point to end of string */
586 cpp->search += strlen(cpp->search);
587 }
588 }
589
590 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
591 cpp->search = PSI_G(search_path);
592 }
593
594 do {
595 int d_len;
596
597 sep = strchr(cpp->search, ':');
598 d_len = sep ? sep - cpp->search : strlen(cpp->search);
599
600 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
601 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
602 include_flavor[flags], path);
603 if (0 == eaccess(path, R_OK)) {
604 return true;
605 }
606 }
607
608 if (sep) {
609 cpp->search = sep + 1;
610 }
611 } while (sep);
612 }
613
614 return false;
615 }