basic support for builtins
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 HashTable psi_cpp_defaults;
40
41 PHP_MINIT_FUNCTION(psi_cpp)
42 {
43 struct psi_parser parser;
44 struct psi_parser_input *predef;
45
46 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
47 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
48
49 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
50 return FAILURE;
51 }
52
53 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
54 psi_parser_dtor(&parser);
55 return FAILURE;
56 }
57
58 if (!psi_parser_parse(&parser, predef)) {
59 psi_parser_input_free(&predef);
60 psi_parser_dtor(&parser);
61 return FAILURE;
62 }
63 psi_parser_input_free(&predef);
64
65 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
66 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
67
68 psi_parser_dtor(&parser);
69
70 return SUCCESS;
71 }
72
73 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
74 {
75 struct psi_cpp_macro_decl *macro;
76
77 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
78 {
79 psi_cpp_macro_decl_free(&macro);
80 }
81 ZEND_HASH_FOREACH_END();
82
83 zend_hash_destroy(&psi_cpp_defaults);
84
85 return SUCCESS;
86 }
87
88 static void free_cpp_def(zval *p)
89 {
90 if (Z_TYPE_P(p) == IS_PTR) {
91 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
92
93 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
94 psi_cpp_macro_decl_free(&macro);
95 }
96 }
97 }
98
99 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
100 {
101 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
102
103 cpp->parser = P;
104 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
105 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
106 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
107
108 return cpp;
109 }
110
111 static char *include_flavor[] = {
112 "include",
113 "include next",
114 "include once"
115 };
116
117 #if PSI_CPP_DEBUG > 1
118 static int dump_def(zval *p)
119 {
120 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
121
122 if (decl) {
123 dprintf(2, "PSI: CPP decl -> #define ");
124 psi_cpp_macro_decl_dump(2, decl);
125 dprintf(2, "\n");
126 }
127 return ZEND_HASH_APPLY_KEEP;
128 }
129 #endif
130
131 void psi_cpp_free(struct psi_cpp **cpp_ptr)
132 {
133 if (*cpp_ptr) {
134 struct psi_cpp *cpp = *cpp_ptr;
135
136 #if PSI_CPP_DEBUG > 1
137 zend_hash_apply(&cpp->defs, dump_def);
138 #endif
139 *cpp_ptr = NULL;
140 zend_hash_destroy(&cpp->defs);
141 zend_hash_destroy(&cpp->once);
142 free(cpp);
143 }
144 }
145
146 static bool psi_cpp_stage1(struct psi_cpp *cpp)
147 {
148 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
149
150 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
151
152 psi_cpp_tokiter_reset(cpp);
153 while (psi_cpp_tokiter_valid(cpp)) {
154 struct psi_token *token = psi_cpp_tokiter_current(cpp);
155
156 /* strip comments and attributes */
157 if (token->type == PSI_T_COMMENT
158 || token->type == PSI_T_CPP_ATTRIBUTE) {
159 psi_cpp_tokiter_del_cur(cpp, true);
160 continue;
161 }
162
163 /* line continuations */
164 if (token->type == PSI_T_EOL) {
165 if (esc) {
166 psi_cpp_tokiter_del_prev(cpp, true);
167 psi_cpp_tokiter_del_cur(cpp, true);
168 esc = false;
169 continue;
170 }
171 } else if (token->type == PSI_T_BSLASH) {
172 esc = !esc;
173 } else {
174 esc = false;
175 }
176
177 /* this whole turf is needed to distinct between:
178 * #define foo (1,2,3)
179 * #define foo(a,b,c)
180 */
181
182 if (token->type == PSI_T_WHITESPACE) {
183 if (name) {
184 name = false;
185 }
186 ws = true;
187 psi_cpp_tokiter_del_cur(cpp, true);
188 continue;
189 }
190
191 switch (token->type) {
192 case PSI_T_EOL:
193 eol = true;
194 break;
195 case PSI_T_HASH:
196 if (eol) {
197 hash = true;
198 eol = false;
199 }
200 break;
201 case PSI_T_DEFINE:
202 if (hash) {
203 define = true;
204 hash = false;
205 }
206 break;
207 case PSI_T_NAME:
208 if (define) {
209 name = true;
210 define = false;
211 }
212 break;
213 case PSI_T_LPAREN:
214 if (name) {
215 name = false;
216 if (!ws) {
217 /* mask special token for parser */
218 struct psi_token *no_ws = psi_token_copy(token);
219
220 no_ws->type = PSI_T_NO_WHITESPACE;
221 zend_string_release(no_ws->text);
222 no_ws->text = zend_string_init_interned("\xA0", 1, 1);
223 psi_cpp_tokiter_add(cpp, no_ws);
224 continue;
225 }
226 }
227 /* no break */
228 default:
229 name = define = hash = eol = false;
230 break;
231 }
232
233 ws = false;
234 psi_cpp_tokiter_add_cur(cpp);
235 psi_cpp_tokiter_next(cpp);
236 }
237
238 return true;
239 }
240
241 static bool psi_cpp_stage2(struct psi_cpp *cpp)
242 {
243 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
244 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
245
246 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
247
248 psi_cpp_tokiter_reset(cpp);
249 while (psi_cpp_tokiter_valid(cpp)) {
250 struct psi_token *current = psi_cpp_tokiter_current(cpp);
251
252 if (current->type == PSI_T_HASH) {
253 if (is_eol) {
254 do_cpp = true;
255 is_eol = false;
256 }
257 } else if (current->type == PSI_T_EOL) {
258 #if PSI_CPP_DEBUG
259 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
260 #endif
261 is_eol = true;
262 skip_all = false;
263 do_expansion = true;
264 if (!do_cpp) {
265 psi_cpp_tokiter_del_cur(cpp, true);
266 continue;
267 }
268 } else {
269 is_eol = false;
270
271 if (do_cpp) {
272 switch (current->type) {
273 case PSI_T_DEFINE:
274 #if PSI_CPP_DEBUG
275 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
276 #endif
277 do_expansion = false;
278 skip_all = true;
279 break;
280 case PSI_T_DEFINED:
281 skip_paren = true;
282 /* no break */
283 case PSI_T_IFDEF:
284 case PSI_T_IFNDEF:
285 case PSI_T_UNDEF:
286 #if PSI_CPP_DEBUG
287 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
288 #endif
289 do_expansion = false;
290 break;
291 case PSI_T_LPAREN:
292
293 if (!skip_all) {
294 if (skip_paren) {
295 skip_paren = false;
296 } else {
297 do_expansion = true;
298 #if PSI_CPP_DEBUG
299 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
300 #endif
301 }
302 }
303 break;
304 case PSI_T_NAME:
305 break;
306 default:
307 do_expansion = !skip_all;
308 #if PSI_CPP_DEBUG
309 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
310 #endif
311 }
312 }
313 }
314
315 if (cpp->skip) {
316 if (!do_cpp) {
317 #if PSI_CPP_DEBUG
318 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
319 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
320 #endif
321 psi_cpp_tokiter_del_cur(cpp, true);
322 continue;
323 }
324 }
325
326 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
327 bool expanded = false;
328
329 while (psi_cpp_tokiter_expand(cpp)) {
330 expanded = true;
331 }
332 if (expanded) {
333 continue;
334 }
335 }
336
337 if (do_cpp) {
338 parser_tokens = psi_plist_add(parser_tokens, &current);
339
340 if (is_eol) {
341 size_t processed = 0;
342 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
343
344 /* EOL */
345 psi_plist_pop(parser_tokens, NULL);
346 psi_plist_clean(parser_tokens);
347 do_cpp = false;
348
349 if (!parsed) {
350 psi_plist_free(parser_tokens);
351 return false;
352 }
353 } else {
354 /* leave EOLs in the input stream, else we might end up
355 * with a hash not preceded with a new line after include */
356 psi_cpp_tokiter_del_cur(cpp, false);
357 }
358
359 #if PSI_CPP_DEBUG > 1
360 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
361 #endif
362
363 continue;
364 }
365
366 psi_cpp_tokiter_add_cur(cpp);
367 psi_cpp_tokiter_next(cpp);
368 }
369
370 psi_plist_free(parser_tokens);
371
372 return true;
373 }
374
375 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
376 {
377 bool parsed = false;
378 struct psi_cpp temp = *cpp; cpp->level = temp.level;
379
380 cpp->tokens.iter = *tokens;
381 cpp->tokens.next = NULL;
382
383 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
384 parsed = true;
385 }
386
387 if (cpp->tokens.next) {
388 free(cpp->tokens.iter);
389 cpp->tokens.iter = cpp->tokens.next;
390 cpp->tokens.next = NULL;
391 }
392
393 *tokens = cpp->tokens.iter;
394
395 if (temp.tokens.iter) {
396 cpp->tokens.iter = temp.tokens.iter;
397 cpp->tokens.next = temp.tokens.next;
398 }
399 cpp->index = temp.index;
400 cpp->skip = temp.skip;
401 cpp->level = temp.level;
402 cpp->seen = temp.seen;
403
404 return parsed;
405 }
406
407 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
408 {
409 bool defined;
410
411 if (tok->type == PSI_T_NAME) {
412 defined = zend_hash_exists(&cpp->defs, tok->text)
413 || psi_builtin_exists(tok->text);
414 } else {
415 defined = false;
416 }
417
418 #if PSI_CPP_DEBUG
419 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
420 if (defined) {
421 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
422 if (macro) {
423 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
424 }
425 }
426 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
427 #endif
428
429 return defined;
430 }
431
432 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
433 {
434 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
435
436 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
437 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
438 "'%s' redefined", decl->token->text->val);
439 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
440 "'%s' previously defined", old->token->text->val);
441 }
442 #if PSI_CPP_DEBUG
443 if (decl->exp) {
444 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
445 } else {
446 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
447 }
448 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
449 PSI_DEBUG_PRINT(cpp->parser, "\n");
450 #endif
451 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
452 }
453
454 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
455 {
456 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
457 }
458
459 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
460 {
461 struct psi_validate_scope scope = {0};
462
463 scope.cpp = cpp;
464 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
465 return false;
466 }
467 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
468 return false;
469 }
470 return true;
471 }
472
473 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
474 {
475 bool parsed = false;
476 char path[PATH_MAX];
477 struct psi_plist *tokens;
478 struct psi_parser_input *include;
479
480 if (!psi_cpp_has_include(cpp, file, flags, path)) {
481 return false;
482 }
483
484 if (flags & PSI_CPP_INCLUDE_ONCE) {
485 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
486 return true;
487 }
488 }
489
490 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
491 include_flavor[flags], path);
492
493 include = psi_parser_open_file(cpp->parser, path, false);
494 if (!include) {
495 return false;
496 }
497
498 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
499
500 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
501
502 tokens = psi_parser_scan(cpp->parser, include);
503 psi_parser_input_free(&include);
504
505 if (!tokens) {
506 return false;
507 }
508
509 parsed = psi_cpp_process(cpp, &tokens);
510 if (!parsed) {
511 psi_plist_free(tokens);
512 return false;
513 }
514
515 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
516 free(tokens);
517
518 ++cpp->expanded;
519 return true;
520 }
521
522 #ifndef HAVE_EACCESS
523 # define eaccess access
524 #endif
525 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
526 {
527 char temp[PATH_MAX];
528
529 if (!path) {
530 path = temp;
531 }
532
533 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
534 /* first try as is, full or relative path */
535 if (file->text->val[0] == '/') {
536 path = file->text->val;
537 } else {
538 char *dir;
539 size_t len;
540
541 strncpy(path, file->file->val, PATH_MAX);
542
543 dir = dirname(path);
544 len = strlen(dir);
545
546 assert(len + file->text->len + 1 < PATH_MAX);
547
548 memmove(path, dir, len);
549 path[len] = '/';
550 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
551 }
552
553 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
554 include_flavor[flags], path);
555 if (0 == eaccess(path, R_OK)) {
556 return true;
557 }
558 }
559
560 /* look through search paths */
561 if (file->text->val[0] != '/') {
562 const char *sep;
563 int p_len;
564
565 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
566 if ((sep = strchr(cpp->search, ':'))) {
567 cpp->search = sep + 1;
568 } else {
569 /* point to end of string */
570 cpp->search += strlen(cpp->search);
571 }
572 }
573
574 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
575 cpp->search = PSI_G(search_path);
576 }
577
578 do {
579 int d_len;
580
581 sep = strchr(cpp->search, ':');
582 d_len = sep ? sep - cpp->search : strlen(cpp->search);
583
584 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
585 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
586 include_flavor[flags], path);
587 if (0 == eaccess(path, R_OK)) {
588 return true;
589 }
590 }
591
592 if (sep) {
593 cpp->search = sep + 1;
594 }
595 } while (sep);
596 }
597
598 return false;
599 }