eaf43f800dce273f81c9724e73bb8236777d9d23
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "php_psi.h"
33
34 #include <libgen.h>
35
36 #include "cpp.h"
37 #include "parser.h"
38 #include "debug.h"
39
40 #define PSI_CPP_SEARCH
41 #define PSI_CPP_PREDEF
42 #include "php_psi_predef.h"
43
44 HashTable psi_cpp_defaults;
45
46 PHP_MINIT_FUNCTION(psi_cpp);
47 PHP_MINIT_FUNCTION(psi_cpp)
48 {
49 struct psi_parser parser;
50 struct psi_parser_input *predef;
51
52 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
53 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
54
55 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
56 return FAILURE;
57 }
58
59 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
60 psi_parser_dtor(&parser);
61 return FAILURE;
62 }
63
64 if (!psi_parser_parse(&parser, predef)) {
65 psi_parser_input_free(&predef);
66 psi_parser_dtor(&parser);
67 return FAILURE;
68 }
69 psi_parser_input_free(&predef);
70
71 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
72 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
73
74 psi_parser_dtor(&parser);
75
76 return SUCCESS;
77 }
78
79 PHP_MSHUTDOWN_FUNCTION(psi_cpp);
80 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
81 {
82 struct psi_cpp_macro_decl *macro;
83
84 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
85 {
86 psi_cpp_macro_decl_free(&macro);
87 }
88 ZEND_HASH_FOREACH_END();
89
90 zend_hash_destroy(&psi_cpp_defaults);
91
92 return SUCCESS;
93 }
94
95 static void free_cpp_def(zval *p)
96 {
97 if (Z_TYPE_P(p) == IS_PTR) {
98 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
99
100 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
101 psi_cpp_macro_decl_free(&macro);
102 }
103 }
104 }
105
106 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
107 {
108 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
109
110 cpp->parser = P;
111 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
112 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
113 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
114
115 return cpp;
116 }
117
118 static char *include_flavor[] = {
119 "include",
120 "include next",
121 "include once"
122 };
123
124 void psi_cpp_free(struct psi_cpp **cpp_ptr)
125 {
126 if (*cpp_ptr) {
127 struct psi_cpp *cpp = *cpp_ptr;
128
129 *cpp_ptr = NULL;
130 zend_hash_destroy(&cpp->defs);
131 zend_hash_destroy(&cpp->once);
132 free(cpp);
133 }
134 }
135
136 static bool psi_cpp_stage1(struct psi_cpp *cpp)
137 {
138 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
139
140 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
141
142 psi_cpp_tokiter_reset(cpp);
143 while (psi_cpp_tokiter_valid(cpp)) {
144 struct psi_token *token = psi_cpp_tokiter_current(cpp);
145
146 /* strip comments and attributes */
147 if (token->type == PSI_T_COMMENT
148 || token->type == PSI_T_CPP_ATTRIBUTE) {
149 psi_cpp_tokiter_del_cur(cpp, true);
150 continue;
151 }
152
153 /* line continuations */
154 if (token->type == PSI_T_EOL) {
155 if (esc) {
156 psi_cpp_tokiter_del_prev(cpp, true);
157 psi_cpp_tokiter_del_cur(cpp, true);
158 esc = false;
159 continue;
160 }
161 } else if (token->type == PSI_T_BSLASH) {
162 esc = !esc;
163 } else {
164 esc = false;
165 }
166
167 /* this whole turf is needed to distinct between:
168 * #define foo (1,2,3)
169 * #define foo(a,b,c)
170 */
171
172 if (token->type == PSI_T_WHITESPACE) {
173 if (name) {
174 name = false;
175 }
176 ws = true;
177 psi_cpp_tokiter_del_cur(cpp, true);
178 continue;
179 }
180
181 switch (token->type) {
182 case PSI_T_EOL:
183 eol = true;
184 break;
185 case PSI_T_HASH:
186 if (eol) {
187 hash = true;
188 eol = false;
189 }
190 break;
191 case PSI_T_DEFINE:
192 if (hash) {
193 define = true;
194 hash = false;
195 }
196 break;
197 case PSI_T_NAME:
198 if (define) {
199 name = true;
200 define = false;
201 }
202 break;
203 case PSI_T_LPAREN:
204 if (name) {
205 name = false;
206 if (!ws) {
207 /* mask special token for parser */
208 struct psi_token *no_ws = psi_token_copy(token);
209
210 no_ws->type = PSI_T_NO_WHITESPACE;
211 zend_string_release(no_ws->text);
212 no_ws->text = psi_string_init_interned("\xA0", 1, 1);
213 psi_cpp_tokiter_add(cpp, no_ws);
214 continue;
215 }
216 }
217 /* no break */
218 default:
219 name = define = hash = eol = false;
220 break;
221 }
222
223 ws = false;
224 psi_cpp_tokiter_add_cur(cpp);
225 psi_cpp_tokiter_next(cpp);
226 }
227
228 return true;
229 }
230
231 static bool psi_cpp_stage2(struct psi_cpp *cpp)
232 {
233 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
234 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
235
236 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
237
238 psi_cpp_tokiter_reset(cpp);
239 while (psi_cpp_tokiter_valid(cpp)) {
240 struct psi_token *current = psi_cpp_tokiter_current(cpp);
241
242 if (current->type == PSI_T_HASH) {
243 if (is_eol) {
244 do_cpp = true;
245 is_eol = false;
246 }
247 } else if (current->type == PSI_T_EOL) {
248 #if PSI_CPP_DEBUG
249 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
250 #endif
251 is_eol = true;
252 skip_all = false;
253 do_expansion = true;
254 if (!do_cpp) {
255 psi_cpp_tokiter_del_cur(cpp, true);
256 continue;
257 }
258 } else {
259 is_eol = false;
260
261 if (do_cpp) {
262 switch (current->type) {
263 case PSI_T_DEFINE:
264 #if PSI_CPP_DEBUG
265 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
266 #endif
267 do_expansion = false;
268 skip_all = true;
269 break;
270 case PSI_T_DEFINED:
271 skip_paren = true;
272 /* no break */
273 case PSI_T_IFDEF:
274 case PSI_T_IFNDEF:
275 case PSI_T_UNDEF:
276 #if PSI_CPP_DEBUG
277 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
278 #endif
279 do_expansion = false;
280 break;
281 case PSI_T_LPAREN:
282
283 if (!skip_all) {
284 if (skip_paren) {
285 skip_paren = false;
286 } else {
287 do_expansion = true;
288 #if PSI_CPP_DEBUG
289 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
290 #endif
291 }
292 }
293 break;
294 case PSI_T_NAME:
295 break;
296 default:
297 do_expansion = !skip_all;
298 #if PSI_CPP_DEBUG
299 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
300 #endif
301 }
302 }
303 }
304
305 if (cpp->skip) {
306 if (!do_cpp) {
307 #if PSI_CPP_DEBUG
308 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
309 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
310 #endif
311 psi_cpp_tokiter_del_cur(cpp, true);
312 continue;
313 }
314 }
315
316 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
317 bool expanded = false;
318
319 while (psi_cpp_tokiter_expand(cpp)) {
320 expanded = true;
321 }
322 if (expanded) {
323 continue;
324 }
325 }
326
327 if (do_cpp) {
328 parser_tokens = psi_plist_add(parser_tokens, &current);
329
330 if (is_eol) {
331 size_t processed = 0;
332 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
333
334 /* EOL */
335 psi_plist_pop(parser_tokens, NULL);
336 psi_plist_clean(parser_tokens);
337 do_cpp = false;
338
339 if (!parsed) {
340 psi_plist_free(parser_tokens);
341 return false;
342 }
343 } else {
344 /* leave EOLs in the input stream, else we might end up
345 * with a hash not preceded with a new line after include */
346 psi_cpp_tokiter_del_cur(cpp, false);
347 }
348
349 #if PSI_CPP_DEBUG > 1
350 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
351 #endif
352
353 continue;
354 }
355
356 psi_cpp_tokiter_add_cur(cpp);
357 psi_cpp_tokiter_next(cpp);
358 }
359
360 psi_plist_free(parser_tokens);
361
362 return true;
363 }
364
365 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
366 {
367 bool parsed = false;
368 struct psi_cpp temp = *cpp; cpp->level = temp.level;
369
370 cpp->tokens.iter = *tokens;
371 cpp->tokens.next = NULL;
372
373 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
374 parsed = true;
375 }
376
377 if (cpp->tokens.next) {
378 free(cpp->tokens.iter);
379 cpp->tokens.iter = cpp->tokens.next;
380 cpp->tokens.next = NULL;
381 }
382
383 *tokens = cpp->tokens.iter;
384
385 if (temp.tokens.iter) {
386 cpp->tokens.iter = temp.tokens.iter;
387 cpp->tokens.next = temp.tokens.next;
388 }
389 cpp->index = temp.index;
390 cpp->skip = temp.skip;
391 cpp->level = temp.level;
392 cpp->seen = temp.seen;
393
394 return parsed;
395 }
396
397 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
398 {
399 bool defined;
400
401 if (tok->type == PSI_T_NAME) {
402 defined = zend_hash_exists(&cpp->defs, tok->text)
403 || psi_builtin_exists(tok->text);
404 } else {
405 defined = false;
406 }
407
408 #if PSI_CPP_DEBUG
409 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
410 if (defined) {
411 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
412 if (macro) {
413 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
414 }
415 }
416 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
417 #endif
418
419 return defined;
420 }
421
422 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
423 {
424 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
425
426 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
427 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
428 "'%s' redefined", decl->token->text->val);
429 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
430 "'%s' previously defined", old->token->text->val);
431 }
432 #if PSI_CPP_DEBUG
433 if (decl->exp) {
434 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
435 } else {
436 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
437 }
438 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
439 PSI_DEBUG_PRINT(cpp->parser, "\n");
440 #endif
441 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
442 }
443
444 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
445 {
446 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
447 }
448
449 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
450 {
451 struct psi_validate_scope scope = {0};
452
453 scope.cpp = cpp;
454 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
455 return false;
456 }
457 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
458 return false;
459 }
460 return true;
461 }
462
463 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
464 {
465 bool parsed = false;
466 char path[PATH_MAX];
467 struct psi_plist *tokens;
468 struct psi_parser_input *include;
469
470 if (!psi_cpp_has_include(cpp, file, flags, path)) {
471 return false;
472 }
473
474 if (flags & PSI_CPP_INCLUDE_ONCE) {
475 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
476 return true;
477 }
478 }
479
480 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
481 include_flavor[flags], path);
482
483 include = psi_parser_open_file(cpp->parser, path, false);
484 if (!include) {
485 return false;
486 }
487
488 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
489
490 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
491
492 tokens = psi_parser_scan(cpp->parser, include);
493 psi_parser_input_free(&include);
494
495 if (!tokens) {
496 return false;
497 }
498
499 parsed = psi_cpp_process(cpp, &tokens);
500 if (!parsed) {
501 psi_plist_free(tokens);
502 return false;
503 }
504
505 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
506 free(tokens);
507
508 ++cpp->expanded;
509 return true;
510 }
511
512 #ifndef HAVE_EACCESS
513 # define eaccess access
514 #endif
515 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
516 {
517 char temp[PATH_MAX];
518
519 if (!path) {
520 path = temp;
521 }
522
523 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
524 /* first try as is, full or relative path */
525 if (file->text->val[0] == '/') {
526 path = file->text->val;
527 } else {
528 char *dir;
529 size_t len;
530
531 strncpy(path, file->file->val, PATH_MAX);
532
533 dir = dirname(path);
534 len = strlen(dir);
535
536 assert(len + file->text->len + 1 < PATH_MAX);
537
538 memmove(path, dir, len);
539 path[len] = '/';
540 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
541 }
542
543 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
544 include_flavor[flags], path);
545 if (0 == eaccess(path, R_OK)) {
546 return true;
547 }
548 }
549
550 /* look through search paths */
551 if (file->text->val[0] != '/') {
552 const char *sep;
553 int p_len;
554
555 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
556 if ((sep = strchr(cpp->search, ':'))) {
557 cpp->search = sep + 1;
558 } else {
559 /* point to end of string */
560 cpp->search += strlen(cpp->search);
561 }
562 }
563
564 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
565 cpp->search = PSI_G(search_path);
566 }
567
568 do {
569 int d_len;
570
571 sep = strchr(cpp->search, ':');
572 d_len = sep ? sep - cpp->search : strlen(cpp->search);
573
574 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
575 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
576 include_flavor[flags], path);
577 if (0 == eaccess(path, R_OK)) {
578 return true;
579 }
580 }
581
582 if (sep) {
583 cpp->search = sep + 1;
584 }
585 } while (sep);
586 }
587
588 return false;
589 }