0f048b4209d0cfd01c554a1d61e9feffe098af30
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 HashTable psi_cpp_defaults;
40
41 PHP_MINIT_FUNCTION(psi_cpp)
42 {
43 struct psi_parser parser;
44 struct psi_parser_input *predef;
45
46 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
47 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
48
49 if (!psi_parser_init(&parser, NULL, 0)) {
50 return FAILURE;
51 }
52
53 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
54 psi_parser_dtor(&parser);
55 return FAILURE;
56 }
57
58 if (!psi_parser_parse(&parser, predef)) {
59 psi_parser_input_free(&predef);
60 psi_parser_dtor(&parser);
61 return FAILURE;
62 }
63 psi_parser_input_free(&predef);
64
65 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
66 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
67
68 psi_parser_dtor(&parser);
69
70 return SUCCESS;
71 }
72
73 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
74 {
75 struct psi_cpp_macro_decl *macro;
76
77 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
78 {
79 psi_cpp_macro_decl_free(&macro);
80 }
81 ZEND_HASH_FOREACH_END();
82
83 zend_hash_destroy(&psi_cpp_defaults);
84
85 return SUCCESS;
86 }
87
88 static void free_cpp_def(zval *p)
89 {
90 if (Z_TYPE_P(p) == IS_PTR) {
91 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
92
93 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
94 psi_cpp_macro_decl_free(&macro);
95 }
96 }
97 }
98
99 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
100 {
101 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
102
103 cpp->parser = P;
104 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
105 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
106 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
107
108 return cpp;
109 }
110
111 #if PSI_CPP_DEBUG
112 static int dump_def(zval *p)
113 {
114 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
115
116 if (decl) {
117 fflush(stderr);
118 dprintf(2, "PSI: CPP decl -> #define ");
119 psi_cpp_macro_decl_dump(2, decl);
120 dprintf(2, "\n");
121 }
122 return ZEND_HASH_APPLY_KEEP;
123 }
124 #endif
125
126 void psi_cpp_free(struct psi_cpp **cpp_ptr)
127 {
128 if (*cpp_ptr) {
129 struct psi_cpp *cpp = *cpp_ptr;
130
131 #if PSI_CPP_DEBUG
132 zend_hash_apply(&cpp->defs, dump_def);
133 #endif
134 *cpp_ptr = NULL;
135 zend_hash_destroy(&cpp->defs);
136 zend_hash_destroy(&cpp->once);
137 free(cpp);
138 }
139 }
140
141 static bool psi_cpp_stage1(struct psi_cpp *cpp)
142 {
143 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
144
145 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
146
147 psi_cpp_tokiter_reset(cpp);
148 while (psi_cpp_tokiter_valid(cpp)) {
149 struct psi_token *token = psi_cpp_tokiter_current(cpp);
150
151 /* strip comments and attributes */
152 if (token->type == PSI_T_COMMENT
153 || token->type == PSI_T_CPP_ATTRIBUTE) {
154 psi_cpp_tokiter_del_cur(cpp, true);
155 continue;
156 }
157
158 /* line continuations */
159 if (token->type == PSI_T_EOL) {
160 if (esc) {
161 psi_cpp_tokiter_del_prev(cpp, true);
162 psi_cpp_tokiter_del_cur(cpp, true);
163 esc = false;
164 continue;
165 }
166 } else if (token->type == PSI_T_BSLASH) {
167 esc = !esc;
168 } else {
169 esc = false;
170 }
171
172 /* this whole turf is needed to distinct between:
173 * #define foo (1,2,3)
174 * #define foo(a,b,c)
175 */
176
177 if (token->type == PSI_T_WHITESPACE) {
178 if (name) {
179 name = false;
180 }
181 ws = true;
182 psi_cpp_tokiter_del_cur(cpp, true);
183 continue;
184 }
185
186 switch (token->type) {
187 case PSI_T_EOL:
188 eol = true;
189 break;
190 case PSI_T_HASH:
191 if (eol) {
192 hash = true;
193 eol = false;
194 }
195 break;
196 case PSI_T_DEFINE:
197 if (hash) {
198 define = true;
199 hash = false;
200 }
201 break;
202 case PSI_T_NAME:
203 if (define) {
204 name = true;
205 define = false;
206 }
207 break;
208 case PSI_T_LPAREN:
209 if (name) {
210 name = false;
211 if (!ws) {
212 /* mask special token for parser */
213 struct psi_token *no_ws = psi_token_copy(token);
214
215 no_ws->type = PSI_T_NO_WHITESPACE;
216 zend_string_release(no_ws->text);
217 no_ws->text = zend_string_init_interned("\xA0", 1, 1);
218 psi_cpp_tokiter_add(cpp, no_ws);
219 continue;
220 }
221 }
222 /* no break */
223 default:
224 name = define = hash = eol = false;
225 break;
226 }
227
228 ws = false;
229 psi_cpp_tokiter_add_cur(cpp);
230 psi_cpp_tokiter_next(cpp);
231 }
232
233 return true;
234 }
235
236 static bool psi_cpp_stage2(struct psi_cpp *cpp)
237 {
238 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
239 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
240
241 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
242
243 psi_cpp_tokiter_reset(cpp);
244 while (psi_cpp_tokiter_valid(cpp)) {
245 struct psi_token *current = psi_cpp_tokiter_current(cpp);
246
247 if (current->type == PSI_T_HASH) {
248 if (is_eol) {
249 do_cpp = true;
250 is_eol = false;
251 }
252 } else if (current->type == PSI_T_EOL) {
253 #if PSI_CPP_DEBUG
254 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
255 #endif
256 is_eol = true;
257 skip_all = false;
258 do_expansion = true;
259 if (!do_cpp) {
260 psi_cpp_tokiter_del_cur(cpp, true);
261 continue;
262 }
263 } else {
264 is_eol = false;
265
266 if (do_cpp) {
267 switch (current->type) {
268 case PSI_T_DEFINE:
269 #if PSI_CPP_DEBUG
270 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
271 #endif
272 do_expansion = false;
273 skip_all = true;
274 break;
275 case PSI_T_DEFINED:
276 skip_paren = true;
277 /* no break */
278 case PSI_T_IFDEF:
279 case PSI_T_IFNDEF:
280 case PSI_T_UNDEF:
281 #if PSI_CPP_DEBUG
282 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
283 #endif
284 do_expansion = false;
285 break;
286 case PSI_T_LPAREN:
287
288 if (!skip_all) {
289 if (skip_paren) {
290 skip_paren = false;
291 } else {
292 do_expansion = true;
293 #if PSI_CPP_DEBUG
294 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
295 #endif
296 }
297 }
298 break;
299 case PSI_T_NAME:
300 break;
301 default:
302 do_expansion = !skip_all;
303 #if PSI_CPP_DEBUG
304 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
305 #endif
306 }
307 }
308 }
309
310 if (cpp->skip) {
311 if (!do_cpp) {
312 #if PSI_CPP_DEBUG
313 fprintf(stderr, "PSI: CPP skip ");
314 psi_token_dump(2, current);
315 #endif
316 psi_cpp_tokiter_del_cur(cpp, true);
317 continue;
318 }
319 }
320
321 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
322 bool expanded = false;
323
324 while (psi_cpp_tokiter_expand(cpp)) {
325 expanded = true;
326 }
327 if (expanded) {
328 continue;
329 }
330 }
331
332 if (do_cpp) {
333 parser_tokens = psi_plist_add(parser_tokens, &current);
334
335 if (is_eol) {
336 size_t processed = 0;
337 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
338
339 /* EOL */
340 psi_plist_pop(parser_tokens, NULL);
341 psi_plist_clean(parser_tokens);
342 do_cpp = false;
343
344 if (!parsed) {
345 psi_plist_free(parser_tokens);
346 return false;
347 }
348 } else {
349 /* leave EOLs in the input stream, else we might end up
350 * with a hash not preceded with a new line after include */
351 psi_cpp_tokiter_del_cur(cpp, false);
352 }
353
354 #if PSI_CPP_DEBUG > 1
355 psi_cpp_tokiter_dump(2, cpp);
356 #endif
357
358 continue;
359 }
360
361 psi_cpp_tokiter_add_cur(cpp);
362 psi_cpp_tokiter_next(cpp);
363 }
364
365 psi_plist_free(parser_tokens);
366
367 return true;
368 }
369
370 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
371 {
372 bool parsed = false;
373 struct psi_cpp temp = *cpp;
374
375 cpp->tokens.iter = *tokens;
376 cpp->tokens.next = NULL;
377
378 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
379 parsed = true;
380 }
381
382 if (cpp->tokens.next) {
383 free(cpp->tokens.iter);
384 cpp->tokens.iter = cpp->tokens.next;
385 cpp->tokens.next = NULL;
386 }
387
388 *tokens = cpp->tokens.iter;
389
390 if (temp.tokens.iter) {
391 cpp->tokens.iter = temp.tokens.iter;
392 cpp->tokens.next = temp.tokens.next;
393 cpp->index = temp.index;
394 }
395
396 return parsed;
397 }
398
399 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
400 {
401 bool defined;
402
403 if (tok->type == PSI_T_NAME) {
404 defined = zend_hash_exists(&cpp->defs, tok->text);
405 } else {
406 defined = false;
407 }
408
409 #if PSI_CPP_DEBUG
410 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
411 if (defined) {
412 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
413 fprintf(stderr, " @ %s:%u ", macro->token->file->val, macro->token->line);
414 }
415 psi_token_dump(2, tok);
416 #endif
417
418 return defined;
419 }
420
421 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
422 {
423 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
424
425 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
426 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
427 "'%s' redefined", decl->token->text->val);
428 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
429 "'%s' previously defined", old->token->text->val);
430 }
431 #if PSI_CPP_DEBUG
432 if (decl->exp) {
433 fprintf(stderr, "PSI: CPP MACRO num_exp -> %s ", decl->token->text->val);
434 } else {
435 fprintf(stderr, "PSI: CPP MACRO decl -> %s ", decl->token->text->val);
436 }
437 psi_cpp_macro_decl_dump(2, decl);
438 fprintf(stderr, "\n");
439 #endif
440 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
441 }
442
443 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
444 {
445 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
446 }
447
448 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
449 {
450 struct psi_validate_scope scope = {0};
451
452 scope.defs = &cpp->defs;
453 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
454 return false;
455 }
456 if (!psi_num_exp_get_long(exp->data.num, NULL, &cpp->defs)) {
457 return false;
458 }
459 return true;
460 }
461
462 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
463 {
464 struct psi_parser_input *include;
465
466 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
467
468 include = psi_parser_open_file(cpp->parser, path, false);
469 if (include) {
470 struct psi_plist *tokens;
471
472 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
473
474 tokens = psi_parser_scan(cpp->parser, include);
475 if (tokens) {
476 *parsed = psi_cpp_process(cpp, &tokens);
477
478 if (*parsed) {
479 size_t num_tokens = psi_plist_count(tokens);
480
481 ++cpp->expanded;
482 psi_cpp_tokiter_add_range(cpp, num_tokens, psi_plist_eles(tokens));
483 free(tokens);
484 } else {
485 psi_plist_free(tokens);
486 }
487 }
488 psi_parser_input_free(&include);
489
490 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
491 return true;
492 }
493 return false;
494 }
495
496 static inline void include_path(const struct psi_token *file, char **path)
497 {
498 if (file->text->val[0] == '/') {
499 *path = file->text->val;
500 } else {
501 char *dir;
502 size_t len;
503
504 strncpy(*path, file->file->val, PATH_MAX);
505
506 dir = dirname(*path);
507 len = strlen(dir);
508
509 assert(len + file->text->len + 1 < PATH_MAX);
510
511 memmove(*path, dir, len);
512 (*path)[len] = '/';
513 memcpy(&(*path)[len + 1], file->text->val, file->text->len + 1);
514 }
515 }
516
517 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
518 {
519 bool parsed = false;
520
521 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
522 /* first try as is, full or relative path */
523 char temp[PATH_MAX], *path = temp;
524
525 include_path(file, &path);
526
527 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, strlen(path))) {
528 return true;
529 }
530 if (try_include(cpp, path, &parsed)) {
531 /* found */
532 return parsed;
533 }
534 }
535
536 /* look through search paths */
537 if (file->text->val[0] != '/') {
538 char path[PATH_MAX];
539 const char *sep;
540 int p_len;
541
542 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
543 if ((sep = strchr(cpp->search, ':'))) {
544 cpp->search = sep + 1;
545 } else {
546 /* point to end of string */
547 cpp->search += strlen(cpp->search);
548 }
549 }
550
551 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
552 cpp->search = PSI_G(search_path);
553 }
554
555 do {
556 int d_len;
557
558 sep = strchr(cpp->search, ':');
559 d_len = sep ? sep - cpp->search : strlen(cpp->search);
560
561 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
562 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
563 return true;
564 }
565 if (try_include(cpp, path, &parsed)) {
566 break;
567 }
568 }
569
570 if (sep) {
571 cpp->search = sep + 1;
572 }
573 } while (sep);
574 }
575
576 return parsed;
577 }