ae899490e00f504ede567147db6b4326435e9ebc
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "php_psi.h"
33
34 #include <libgen.h>
35
36 #include "cpp.h"
37 #include "parser.h"
38 #include "debug.h"
39
40 #define PSI_CPP_SEARCH
41 #define PSI_CPP_PREDEF
42 #include "php_psi_predef.h"
43
44 static HashTable psi_cpp_defaults;
45 static HashTable psi_cpp_pragmas;
46
47 typedef bool (*psi_cpp_pragma_func)(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl);
48
49 static bool psi_cpp_pragma_once(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
50 {
51 return NULL != zend_hash_add_empty_element(&cpp->once, decl->token->file);
52 }
53
54 static bool psi_cpp_pragma_lib(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
55 {
56 struct psi_token *lib = NULL;
57 char *libname;
58
59 if (!psi_plist_get(decl->tokens, 0, &lib)
60 || !lib || lib->type != PSI_T_QUOTED_STRING) {
61 return false;
62 }
63
64 cpp->parser->file.libnames = psi_plist_add(cpp->parser->file.libnames,
65 &libname);
66 return true;
67 }
68
69 PHP_MINIT_FUNCTION(psi_cpp);
70 PHP_MINIT_FUNCTION(psi_cpp)
71 {
72 struct psi_parser parser;
73 struct psi_parser_input *predef;
74
75 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
76 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
77
78 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
79 return FAILURE;
80 }
81
82 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
83 psi_parser_dtor(&parser);
84 return FAILURE;
85 }
86
87 if (!psi_parser_parse(&parser, predef)) {
88 psi_parser_input_free(&predef);
89 psi_parser_dtor(&parser);
90 return FAILURE;
91 }
92 psi_parser_input_free(&predef);
93
94 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
95 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
96
97 psi_parser_dtor(&parser);
98
99 #define PSI_CPP_PRAGMA(name) \
100 zend_hash_str_add_ptr(&psi_cpp_pragmas, #name, strlen(#name), psi_cpp_pragma_ ## name)
101 zend_hash_init(&psi_cpp_pragmas, 0, NULL, NULL, 1);
102 PSI_CPP_PRAGMA(once);
103 PSI_CPP_PRAGMA(lib);
104
105 return SUCCESS;
106 }
107
108 PHP_MSHUTDOWN_FUNCTION(psi_cpp);
109 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
110 {
111 struct psi_cpp_macro_decl *macro;
112
113 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
114 {
115 psi_cpp_macro_decl_free(&macro);
116 }
117 ZEND_HASH_FOREACH_END();
118
119 zend_hash_destroy(&psi_cpp_defaults);
120
121 return SUCCESS;
122 }
123
124 static void free_cpp_def(zval *p)
125 {
126 if (Z_TYPE_P(p) == IS_PTR) {
127 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
128
129 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
130 psi_cpp_macro_decl_free(&macro);
131 }
132 }
133 }
134
135 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
136 {
137 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
138
139 cpp->parser = P;
140 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
141 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
142 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
143 zend_hash_init(&cpp->expanding, 0, NULL, NULL, 1);
144
145 return cpp;
146 }
147
148 static char *include_flavor[] = {
149 "include",
150 "include next",
151 "include once"
152 };
153
154 void psi_cpp_free(struct psi_cpp **cpp_ptr)
155 {
156 if (*cpp_ptr) {
157 struct psi_cpp *cpp = *cpp_ptr;
158
159 *cpp_ptr = NULL;
160 zend_hash_destroy(&cpp->defs);
161 zend_hash_destroy(&cpp->once);
162 zend_hash_destroy(&cpp->expanding);
163 free(cpp);
164 }
165 }
166
167 static bool psi_cpp_stage1(struct psi_cpp *cpp)
168 {
169 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
170
171 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
172
173 psi_cpp_tokiter_reset(cpp);
174 while (psi_cpp_tokiter_valid(cpp)) {
175 struct psi_token *token = psi_cpp_tokiter_current(cpp);
176
177 /* strip comments and attributes */
178 if (token->type == PSI_T_COMMENT
179 || token->type == PSI_T_CPP_ATTRIBUTE) {
180 psi_cpp_tokiter_del_cur(cpp, true);
181 continue;
182 }
183
184 /* line continuations */
185 if (token->type == PSI_T_EOL) {
186 if (esc) {
187 psi_cpp_tokiter_del_prev(cpp, true);
188 psi_cpp_tokiter_del_cur(cpp, true);
189 esc = false;
190 continue;
191 }
192 } else if (token->type == PSI_T_BSLASH) {
193 esc = !esc;
194 } else {
195 esc = false;
196 }
197
198 /* this whole turf is needed to distinct between:
199 * #define foo (1,2,3)
200 * #define foo(a,b,c)
201 */
202
203 if (token->type == PSI_T_WHITESPACE) {
204 if (name) {
205 name = false;
206 }
207 ws = true;
208 psi_cpp_tokiter_del_cur(cpp, true);
209 continue;
210 }
211
212 switch (token->type) {
213 case PSI_T_EOL:
214 eol = true;
215 break;
216 case PSI_T_HASH:
217 if (eol) {
218 hash = true;
219 eol = false;
220 }
221 break;
222 case PSI_T_DEFINE:
223 if (hash) {
224 define = true;
225 hash = false;
226 }
227 break;
228 case PSI_T_NAME:
229 if (define) {
230 name = true;
231 define = false;
232 }
233 break;
234 case PSI_T_LPAREN:
235 if (name) {
236 name = false;
237 if (!ws) {
238 /* mask special token for parser */
239 struct psi_token *no_ws = psi_token_copy(token);
240
241 no_ws->type = PSI_T_NO_WHITESPACE;
242 zend_string_release(no_ws->text);
243 no_ws->text = psi_string_init_interned("\xA0", 1, 1);
244 psi_cpp_tokiter_add(cpp, no_ws);
245 continue;
246 }
247 }
248 /* no break */
249 default:
250 name = define = hash = eol = false;
251 break;
252 }
253
254 ws = false;
255 psi_cpp_tokiter_add_cur(cpp);
256 psi_cpp_tokiter_next(cpp);
257 }
258
259 return true;
260 }
261
262 static bool psi_cpp_stage2(struct psi_cpp *cpp)
263 {
264 bool is_eol = true, do_expansion = true, skip_paren = false, skip_all = false;
265
266 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
267
268 psi_cpp_tokiter_reset(cpp);
269 while (psi_cpp_tokiter_valid(cpp)) {
270 struct psi_token *current = psi_cpp_tokiter_current(cpp);
271
272 if (current->type == PSI_T_HASH) {
273 if (is_eol) {
274 cpp->do_cpp = true;
275 is_eol = false;
276 }
277 } else if (current->type == PSI_T_EOL) {
278 #if PSI_CPP_DEBUG
279 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
280 #endif
281 is_eol = true;
282 skip_all = false;
283 do_expansion = true;
284 if (!cpp->do_cpp) {
285 psi_cpp_tokiter_del_cur(cpp, true);
286 continue;
287 }
288 } else {
289 is_eol = false;
290
291 if (cpp->do_cpp) {
292 switch (current->type) {
293 case PSI_T_DEFINE:
294 #if PSI_CPP_DEBUG
295 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
296 #endif
297 do_expansion = false;
298 skip_all = true;
299 break;
300 case PSI_T_DEFINED:
301 skip_paren = true;
302 /* no break */
303 case PSI_T_IFDEF:
304 case PSI_T_IFNDEF:
305 case PSI_T_UNDEF:
306 #if PSI_CPP_DEBUG
307 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
308 #endif
309 do_expansion = false;
310 break;
311 case PSI_T_LPAREN:
312
313 if (!skip_all) {
314 if (skip_paren) {
315 skip_paren = false;
316 } else {
317 do_expansion = true;
318 #if PSI_CPP_DEBUG
319 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
320 #endif
321 }
322 }
323 break;
324 case PSI_T_NAME:
325 break;
326 default:
327 do_expansion = !skip_all;
328 #if PSI_CPP_DEBUG
329 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
330 #endif
331 }
332 }
333 }
334
335 if (cpp->skip) {
336 if (!cpp->do_cpp) {
337 #if PSI_CPP_DEBUG
338 PSI_DEBUG_LOCK(cpp->parser,
339 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
340 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
341 );
342 #endif
343 psi_cpp_tokiter_del_cur(cpp, true);
344 continue;
345 }
346 }
347
348 if (do_expansion && psi_cpp_defined(cpp, current)) {
349 if (psi_cpp_tokiter_expand(cpp)) {
350 continue;
351 }
352 }
353
354 psi_cpp_tokiter_add_cur(cpp);
355
356 if (cpp->do_cpp && is_eol) {
357 size_t processed = 0;
358 bool parsed;
359
360 cpp->do_cpp = false;
361 parsed = psi_parser_process(cpp->parser, cpp->tokens.exec, &processed);
362 psi_plist_clean(cpp->tokens.exec);
363
364 if (!parsed) {
365 psi_plist_free(cpp->tokens.exec);
366 return false;
367 }
368
369 #if PSI_CPP_DEBUG > 1
370 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
371 #endif
372 }
373
374 psi_cpp_tokiter_next(cpp);
375 }
376
377 psi_plist_free(cpp->tokens.exec);
378 cpp->tokens.exec = NULL;
379
380 return true;
381 }
382
383 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens,
384 struct psi_token *expanding)
385 {
386 bool parsed = false;
387 struct psi_cpp temp = *cpp;
388
389 cpp->tokens.iter = *tokens;
390 cpp->tokens.next = NULL;
391 cpp->tokens.exec = NULL;
392
393 if (expanding) {
394 zend_hash_add_empty_element(&cpp->expanding, expanding->text);
395 }
396 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
397 parsed = true;
398 }
399 if (expanding) {
400 zend_hash_del(&cpp->expanding, expanding->text);
401 }
402
403 *tokens = cpp->tokens.next;
404 psi_plist_free(cpp->tokens.iter);
405 if (cpp->tokens.exec) {
406 assert(!psi_plist_count(cpp->tokens.exec));
407 psi_plist_free(cpp->tokens.exec);
408 }
409
410 cpp->tokens = temp.tokens;
411 cpp->index = temp.index;
412 cpp->skip = temp.skip;
413 cpp->level = temp.level;
414 cpp->seen = temp.seen;
415 cpp->do_cpp = temp.do_cpp;
416
417 return parsed;
418 }
419
420 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
421 {
422 bool defined = false;
423
424 if (tok->type == PSI_T_NAME) {
425 if (psi_builtin_exists(tok->text)) {
426 defined = true;
427 } else if (!zend_hash_exists(&cpp->expanding, tok->text)) {
428 defined = zend_hash_exists(&cpp->defs, tok->text);
429 }
430 #if PSI_CPP_DEBUG
431 PSI_DEBUG_LOCK(cpp->parser,
432 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
433 if (defined) {
434 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
435 if (macro) {
436 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
437 }
438 } else {
439 zend_string *key;
440
441 PSI_DEBUG_PRINT(cpp->parser, " expanding=");
442 ZEND_HASH_FOREACH_STR_KEY(&cpp->expanding, key)
443 {
444 PSI_DEBUG_PRINT(cpp->parser, "%s,", key->val);
445 }
446 ZEND_HASH_FOREACH_END();
447 PSI_DEBUG_PRINT(cpp->parser, "\t");
448 }
449 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
450 );
451 #endif
452 }
453
454 return defined;
455 }
456
457 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
458 {
459 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
460
461 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
462 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
463 "'%s' redefined", decl->token->text->val);
464 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
465 "'%s' previously defined", old->token->text->val);
466 }
467 #if PSI_CPP_DEBUG
468 PSI_DEBUG_LOCK(cpp->parser,
469 if (decl->exp) {
470 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
471 } else {
472 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
473 }
474 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
475 PSI_DEBUG_PRINT(cpp->parser, "\n");
476 );
477 #endif
478 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
479 }
480
481 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
482 {
483 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
484 }
485
486 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
487 {
488 struct psi_validate_scope scope = {0};
489
490 scope.cpp = cpp;
491 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
492 return false;
493 }
494 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
495 return false;
496 }
497 return true;
498 }
499
500 bool psi_cpp_pragma(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
501 {
502 psi_cpp_pragma_func *fn;
503
504 fn = zend_hash_find_ptr(&psi_cpp_pragmas, decl->token->text);
505 if (!fn) {
506 return false;
507 }
508
509 return fn(cpp, decl);
510 }
511
512 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
513 {
514 bool parsed = false;
515 char path[PATH_MAX];
516 struct psi_plist *tokens;
517 struct psi_parser_input *include;
518
519 if (!psi_cpp_has_include(cpp, file, flags, path)) {
520 return false;
521 }
522
523 if (flags & PSI_CPP_INCLUDE_ONCE) {
524 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
525 return true;
526 }
527 }
528
529 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
530 include_flavor[flags], path);
531
532 include = psi_parser_open_file(cpp->parser, path, false);
533 if (!include) {
534 return false;
535 }
536
537 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
538
539 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
540
541 tokens = psi_parser_scan(cpp->parser, include);
542 psi_parser_input_free(&include);
543
544 if (!tokens) {
545 return false;
546 }
547
548 ++cpp->include_level;
549 parsed = psi_cpp_process(cpp, &tokens, NULL);
550 --cpp->include_level;
551
552 if (!parsed) {
553 psi_plist_free(tokens);
554 return false;
555 }
556
557 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
558 free(tokens);
559
560 ++cpp->expanded;
561 return true;
562 }
563
564 #ifndef HAVE_EACCESS
565 # define eaccess access
566 #endif
567 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
568 {
569 char temp[PATH_MAX];
570
571 if (!path) {
572 path = temp;
573 }
574
575 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
576 /* first try as is, full or relative path */
577 if (file->text->val[0] == '/') {
578 path = file->text->val;
579 } else {
580 char *dir;
581 size_t len;
582
583 strncpy(path, file->file->val, PATH_MAX);
584
585 dir = dirname(path);
586 len = strlen(dir);
587
588 assert(len + file->text->len + 1 < PATH_MAX);
589
590 memmove(path, dir, len);
591 path[len] = '/';
592 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
593 }
594
595 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
596 include_flavor[flags], path);
597 if (0 == eaccess(path, R_OK)) {
598 return true;
599 }
600 }
601
602 /* look through search paths */
603 if (file->text->val[0] != '/') {
604 const char *sep;
605 int p_len;
606
607 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
608 if ((sep = strchr(cpp->search, ':'))) {
609 cpp->search = sep + 1;
610 } else {
611 /* point to end of string */
612 cpp->search += strlen(cpp->search);
613 }
614 }
615
616 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
617 cpp->search = PSI_G(search_path);
618 }
619
620 do {
621 int d_len;
622
623 sep = strchr(cpp->search, ':');
624 d_len = sep ? sep - cpp->search : strlen(cpp->search);
625
626 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
627 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
628 include_flavor[flags], path);
629 if (0 == eaccess(path, R_OK)) {
630 return true;
631 }
632 }
633
634 if (sep) {
635 cpp->search = sep + 1;
636 }
637 } while (sep);
638 }
639
640 return false;
641 }