#pragma lib
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "php_psi.h"
33
34 #include <libgen.h>
35
36 #include "cpp.h"
37 #include "parser.h"
38 #include "debug.h"
39
40 #define PSI_CPP_SEARCH
41 #define PSI_CPP_PREDEF
42 #include "php_psi_predef.h"
43
44 static HashTable psi_cpp_defaults;
45 static HashTable psi_cpp_pragmas;
46
47 typedef bool (*psi_cpp_pragma_func)(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl);
48
49 static bool psi_cpp_pragma_once(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
50 {
51 return NULL != zend_hash_add_empty_element(&cpp->once, decl->token->file);
52 }
53
54 static bool psi_cpp_pragma_lib(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
55 {
56 struct psi_token *lib = NULL;
57 zend_string *libname;
58
59 if (!psi_plist_get(decl->tokens, 0, &lib)
60 || !lib || lib->type != PSI_T_QUOTED_STRING) {
61 return false;
62 }
63
64 libname = zend_string_copy(lib->text);
65 cpp->parser->file.libnames = psi_plist_add(cpp->parser->file.libnames,
66 &libname);
67 return true;
68 }
69
70 PHP_MINIT_FUNCTION(psi_cpp);
71 PHP_MINIT_FUNCTION(psi_cpp)
72 {
73 struct psi_parser parser;
74 struct psi_parser_input *predef;
75
76 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
77 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
78
79 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
80 return FAILURE;
81 }
82
83 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
84 psi_parser_dtor(&parser);
85 return FAILURE;
86 }
87
88 if (!psi_parser_parse(&parser, predef)) {
89 psi_parser_input_free(&predef);
90 psi_parser_dtor(&parser);
91 return FAILURE;
92 }
93 psi_parser_input_free(&predef);
94
95 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
96 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
97
98 psi_parser_dtor(&parser);
99
100 #define PSI_CPP_PRAGMA(name) \
101 zend_hash_str_add_ptr(&psi_cpp_pragmas, #name, strlen(#name), psi_cpp_pragma_ ## name)
102 zend_hash_init(&psi_cpp_pragmas, 0, NULL, NULL, 1);
103 PSI_CPP_PRAGMA(once);
104 PSI_CPP_PRAGMA(lib);
105
106 return SUCCESS;
107 }
108
109 PHP_MSHUTDOWN_FUNCTION(psi_cpp);
110 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
111 {
112 struct psi_cpp_macro_decl *macro;
113
114 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
115 {
116 psi_cpp_macro_decl_free(&macro);
117 }
118 ZEND_HASH_FOREACH_END();
119
120 zend_hash_destroy(&psi_cpp_defaults);
121
122 return SUCCESS;
123 }
124
125 static void free_cpp_def(zval *p)
126 {
127 if (Z_TYPE_P(p) == IS_PTR) {
128 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
129
130 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
131 psi_cpp_macro_decl_free(&macro);
132 }
133 }
134 }
135
136 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
137 {
138 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
139
140 cpp->parser = P;
141 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
142 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
143 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
144 zend_hash_init(&cpp->expanding, 0, NULL, NULL, 1);
145
146 return cpp;
147 }
148
149 static char *include_flavor[] = {
150 "include",
151 "include next",
152 "include once"
153 };
154
155 void psi_cpp_free(struct psi_cpp **cpp_ptr)
156 {
157 if (*cpp_ptr) {
158 struct psi_cpp *cpp = *cpp_ptr;
159
160 *cpp_ptr = NULL;
161 zend_hash_destroy(&cpp->defs);
162 zend_hash_destroy(&cpp->once);
163 zend_hash_destroy(&cpp->expanding);
164 free(cpp);
165 }
166 }
167
168 static bool psi_cpp_stage1(struct psi_cpp *cpp)
169 {
170 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
171
172 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
173
174 psi_cpp_tokiter_reset(cpp);
175 while (psi_cpp_tokiter_valid(cpp)) {
176 struct psi_token *token = psi_cpp_tokiter_current(cpp);
177
178 /* strip comments and attributes */
179 if (token->type == PSI_T_COMMENT
180 || token->type == PSI_T_CPP_ATTRIBUTE) {
181 psi_cpp_tokiter_del_cur(cpp, true);
182 continue;
183 }
184
185 /* line continuations */
186 if (token->type == PSI_T_EOL) {
187 if (esc) {
188 psi_cpp_tokiter_del_prev(cpp, true);
189 psi_cpp_tokiter_del_cur(cpp, true);
190 esc = false;
191 continue;
192 }
193 } else if (token->type == PSI_T_BSLASH) {
194 esc = !esc;
195 } else {
196 esc = false;
197 }
198
199 /* this whole turf is needed to distinct between:
200 * #define foo (1,2,3)
201 * #define foo(a,b,c)
202 */
203
204 if (token->type == PSI_T_WHITESPACE) {
205 if (name) {
206 name = false;
207 }
208 ws = true;
209 psi_cpp_tokiter_del_cur(cpp, true);
210 continue;
211 }
212
213 switch (token->type) {
214 case PSI_T_EOL:
215 eol = true;
216 break;
217 case PSI_T_HASH:
218 if (eol) {
219 hash = true;
220 eol = false;
221 }
222 break;
223 case PSI_T_DEFINE:
224 if (hash) {
225 define = true;
226 hash = false;
227 }
228 break;
229 case PSI_T_NAME:
230 if (define) {
231 name = true;
232 define = false;
233 }
234 break;
235 case PSI_T_LPAREN:
236 if (name) {
237 name = false;
238 if (!ws) {
239 /* mask special token for parser */
240 struct psi_token *no_ws = psi_token_copy(token);
241
242 no_ws->type = PSI_T_NO_WHITESPACE;
243 zend_string_release(no_ws->text);
244 no_ws->text = psi_string_init_interned("\xA0", 1, 1);
245 psi_cpp_tokiter_add(cpp, no_ws);
246 continue;
247 }
248 }
249 /* no break */
250 default:
251 name = define = hash = eol = false;
252 break;
253 }
254
255 ws = false;
256 psi_cpp_tokiter_add_cur(cpp);
257 psi_cpp_tokiter_next(cpp);
258 }
259
260 return true;
261 }
262
263 static bool psi_cpp_stage2(struct psi_cpp *cpp)
264 {
265 bool is_eol = true, do_expansion = true, skip_paren = false, skip_all = false;
266
267 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
268
269 psi_cpp_tokiter_reset(cpp);
270 while (psi_cpp_tokiter_valid(cpp)) {
271 struct psi_token *current = psi_cpp_tokiter_current(cpp);
272
273 if (current->type == PSI_T_HASH) {
274 if (is_eol) {
275 cpp->do_cpp = true;
276 is_eol = false;
277 }
278 } else if (current->type == PSI_T_EOL) {
279 #if PSI_CPP_DEBUG
280 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
281 #endif
282 is_eol = true;
283 skip_all = false;
284 do_expansion = true;
285 if (!cpp->do_cpp) {
286 psi_cpp_tokiter_del_cur(cpp, true);
287 continue;
288 }
289 } else {
290 is_eol = false;
291
292 if (cpp->do_cpp) {
293 switch (current->type) {
294 case PSI_T_DEFINE:
295 #if PSI_CPP_DEBUG
296 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
297 #endif
298 do_expansion = false;
299 skip_all = true;
300 break;
301 case PSI_T_DEFINED:
302 skip_paren = true;
303 /* no break */
304 case PSI_T_IFDEF:
305 case PSI_T_IFNDEF:
306 case PSI_T_UNDEF:
307 #if PSI_CPP_DEBUG
308 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
309 #endif
310 do_expansion = false;
311 break;
312 case PSI_T_LPAREN:
313
314 if (!skip_all) {
315 if (skip_paren) {
316 skip_paren = false;
317 } else {
318 do_expansion = true;
319 #if PSI_CPP_DEBUG
320 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
321 #endif
322 }
323 }
324 break;
325 case PSI_T_NAME:
326 break;
327 default:
328 do_expansion = !skip_all;
329 #if PSI_CPP_DEBUG
330 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
331 #endif
332 }
333 }
334 }
335
336 if (cpp->skip) {
337 if (!cpp->do_cpp) {
338 #if PSI_CPP_DEBUG
339 PSI_DEBUG_LOCK(cpp->parser,
340 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
341 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
342 );
343 #endif
344 psi_cpp_tokiter_del_cur(cpp, true);
345 continue;
346 }
347 }
348
349 if (do_expansion && psi_cpp_defined(cpp, current)) {
350 if (psi_cpp_tokiter_expand(cpp)) {
351 continue;
352 }
353 }
354
355 psi_cpp_tokiter_add_cur(cpp);
356
357 if (cpp->do_cpp && is_eol) {
358 size_t processed = 0;
359 bool parsed;
360
361 cpp->do_cpp = false;
362 parsed = psi_parser_process(cpp->parser, cpp->tokens.exec, &processed);
363 psi_plist_clean(cpp->tokens.exec);
364
365 if (!parsed) {
366 psi_plist_free(cpp->tokens.exec);
367 return false;
368 }
369
370 #if PSI_CPP_DEBUG > 1
371 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
372 #endif
373 }
374
375 psi_cpp_tokiter_next(cpp);
376 }
377
378 psi_plist_free(cpp->tokens.exec);
379 cpp->tokens.exec = NULL;
380
381 return true;
382 }
383
384 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens,
385 struct psi_token *expanding)
386 {
387 bool parsed = false;
388 struct psi_cpp temp = *cpp;
389
390 cpp->tokens.iter = *tokens;
391 cpp->tokens.next = NULL;
392 cpp->tokens.exec = NULL;
393
394 if (expanding) {
395 zend_hash_add_empty_element(&cpp->expanding, expanding->text);
396 }
397 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
398 parsed = true;
399 }
400 if (expanding) {
401 zend_hash_del(&cpp->expanding, expanding->text);
402 }
403
404 *tokens = cpp->tokens.next;
405 psi_plist_free(cpp->tokens.iter);
406 if (cpp->tokens.exec) {
407 assert(!psi_plist_count(cpp->tokens.exec));
408 psi_plist_free(cpp->tokens.exec);
409 }
410
411 cpp->tokens = temp.tokens;
412 cpp->index = temp.index;
413 cpp->skip = temp.skip;
414 cpp->level = temp.level;
415 cpp->seen = temp.seen;
416 cpp->do_cpp = temp.do_cpp;
417
418 return parsed;
419 }
420
421 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
422 {
423 bool defined = false;
424
425 if (tok->type == PSI_T_NAME) {
426 if (psi_builtin_exists(tok->text)) {
427 defined = true;
428 } else if (!zend_hash_exists(&cpp->expanding, tok->text)) {
429 defined = zend_hash_exists(&cpp->defs, tok->text);
430 }
431 #if PSI_CPP_DEBUG
432 PSI_DEBUG_LOCK(cpp->parser,
433 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
434 if (defined) {
435 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
436 if (macro) {
437 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
438 }
439 } else {
440 zend_string *key;
441
442 PSI_DEBUG_PRINT(cpp->parser, " expanding=");
443 ZEND_HASH_FOREACH_STR_KEY(&cpp->expanding, key)
444 {
445 PSI_DEBUG_PRINT(cpp->parser, "%s,", key->val);
446 }
447 ZEND_HASH_FOREACH_END();
448 PSI_DEBUG_PRINT(cpp->parser, "\t");
449 }
450 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
451 );
452 #endif
453 }
454
455 return defined;
456 }
457
458 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
459 {
460 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
461
462 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
463 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
464 "'%s' redefined", decl->token->text->val);
465 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
466 "'%s' previously defined", old->token->text->val);
467 }
468 #if PSI_CPP_DEBUG
469 PSI_DEBUG_LOCK(cpp->parser,
470 if (decl->exp) {
471 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
472 } else {
473 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
474 }
475 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
476 PSI_DEBUG_PRINT(cpp->parser, "\n");
477 );
478 #endif
479 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
480 }
481
482 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
483 {
484 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
485 }
486
487 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
488 {
489 struct psi_validate_scope scope = {0};
490
491 scope.cpp = cpp;
492 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
493 return false;
494 }
495 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
496 return false;
497 }
498 return true;
499 }
500
501 bool psi_cpp_pragma(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
502 {
503 psi_cpp_pragma_func fn;
504
505 fn = zend_hash_find_ptr(&psi_cpp_pragmas, decl->token->text);
506 if (!fn) {
507 return false;
508 }
509
510 return fn(cpp, decl);
511 }
512
513 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
514 {
515 bool parsed = false;
516 char path[PATH_MAX];
517 struct psi_plist *tokens;
518 struct psi_parser_input *include;
519
520 if (!psi_cpp_has_include(cpp, file, flags, path)) {
521 return false;
522 }
523
524 if (flags & PSI_CPP_INCLUDE_ONCE) {
525 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
526 return true;
527 }
528 }
529
530 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
531 include_flavor[flags], path);
532
533 include = psi_parser_open_file(cpp->parser, path, false);
534 if (!include) {
535 return false;
536 }
537
538 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
539
540 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
541
542 tokens = psi_parser_scan(cpp->parser, include);
543 psi_parser_input_free(&include);
544
545 if (!tokens) {
546 return false;
547 }
548
549 ++cpp->include_level;
550 parsed = psi_cpp_process(cpp, &tokens, NULL);
551 --cpp->include_level;
552
553 if (!parsed) {
554 psi_plist_free(tokens);
555 return false;
556 }
557
558 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
559 free(tokens);
560
561 ++cpp->expanded;
562 return true;
563 }
564
565 #ifndef HAVE_EACCESS
566 # define eaccess access
567 #endif
568 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
569 {
570 char temp[PATH_MAX];
571
572 if (!path) {
573 path = temp;
574 }
575
576 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
577 /* first try as is, full or relative path */
578 if (file->text->val[0] == '/') {
579 path = file->text->val;
580 } else {
581 char *dir;
582 size_t len;
583
584 strncpy(path, file->file->val, PATH_MAX);
585
586 dir = dirname(path);
587 len = strlen(dir);
588
589 assert(len + file->text->len + 1 < PATH_MAX);
590
591 memmove(path, dir, len);
592 path[len] = '/';
593 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
594 }
595
596 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
597 include_flavor[flags], path);
598 if (0 == eaccess(path, R_OK)) {
599 return true;
600 }
601 }
602
603 /* look through search paths */
604 if (file->text->val[0] != '/') {
605 const char *sep;
606 int p_len;
607
608 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
609 if ((sep = strchr(cpp->search, ':'))) {
610 cpp->search = sep + 1;
611 } else {
612 /* point to end of string */
613 cpp->search += strlen(cpp->search);
614 }
615 }
616
617 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
618 cpp->search = PSI_G(search_path);
619 }
620
621 do {
622 int d_len;
623
624 sep = strchr(cpp->search, ':');
625 d_len = sep ? sep - cpp->search : strlen(cpp->search);
626
627 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
628 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
629 include_flavor[flags], path);
630 if (0 == eaccess(path, R_OK)) {
631 return true;
632 }
633 }
634
635 if (sep) {
636 cpp->search = sep + 1;
637 }
638 } while (sep);
639 }
640
641 return false;
642 }