05efac150ebaed144a05750338f6b49119e2b7e9
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 HashTable psi_cpp_defaults;
40
41 PHP_MINIT_FUNCTION(psi_cpp)
42 {
43 struct psi_parser parser;
44 struct psi_parser_input *predef;
45
46 if (!psi_parser_init(&parser, NULL, 0)) {
47 return FAILURE;
48 }
49
50 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
51 psi_parser_dtor(&parser);
52 return FAILURE;
53 }
54
55 if (!psi_parser_parse(&parser, predef)) {
56 psi_parser_input_free(&predef);
57 psi_parser_dtor(&parser);
58 return FAILURE;
59 }
60 psi_parser_input_free(&predef);
61
62 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
63 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
64
65 psi_parser_dtor(&parser);
66
67 return SUCCESS;
68 }
69
70 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
71 {
72 struct psi_cpp_macro_decl *macro;
73
74 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
75 {
76 psi_cpp_macro_decl_free(&macro);
77 }
78 ZEND_HASH_FOREACH_END();
79
80 zend_hash_destroy(&psi_cpp_defaults);
81
82 return SUCCESS;
83 }
84
85 static void free_cpp_def(zval *p)
86 {
87 if (Z_TYPE_P(p) == IS_PTR) {
88 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
89
90 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
91 psi_cpp_macro_decl_free(&macro);
92 }
93 }
94 }
95
96 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
97 {
98 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
99
100 cpp->parser = P;
101 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
102 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
103 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
104
105 return cpp;
106 }
107
108 #if PSI_CPP_DEBUG
109 static int dump_def(zval *p)
110 {
111 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
112
113 if (decl) {
114 fflush(stderr);
115 dprintf(2, "PSI: CPP decl -> #define ");
116 psi_cpp_macro_decl_dump(2, decl);
117 dprintf(2, "\n");
118 }
119 return ZEND_HASH_APPLY_KEEP;
120 }
121 #endif
122
123 void psi_cpp_free(struct psi_cpp **cpp_ptr)
124 {
125 if (*cpp_ptr) {
126 struct psi_cpp *cpp = *cpp_ptr;
127
128 #if PSI_CPP_DEBUG
129 zend_hash_apply(&cpp->defs, dump_def);
130 #endif
131 *cpp_ptr = NULL;
132 zend_hash_destroy(&cpp->defs);
133 zend_hash_destroy(&cpp->once);
134 free(cpp);
135 }
136 }
137
138 static bool psi_cpp_stage1(struct psi_cpp *cpp)
139 {
140 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
141
142 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
143
144 psi_cpp_tokiter_reset(cpp);
145 while (psi_cpp_tokiter_valid(cpp)) {
146 struct psi_token *token = psi_cpp_tokiter_current(cpp);
147
148 /* strip comments and attributes */
149 if (token->type == PSI_T_COMMENT
150 || token->type == PSI_T_CPP_ATTRIBUTE) {
151 psi_cpp_tokiter_del_cur(cpp, true);
152 continue;
153 }
154
155 /* line continuations */
156 if (token->type == PSI_T_EOL) {
157 if (esc) {
158 psi_cpp_tokiter_del_prev(cpp, true);
159 psi_cpp_tokiter_del_cur(cpp, true);
160 esc = false;
161 continue;
162 }
163 } else if (token->type == PSI_T_BSLASH) {
164 esc = !esc;
165 } else {
166 esc = false;
167 }
168
169 /* this whole turf is needed to distinct between:
170 * #define foo (1,2,3)
171 * #define foo(a,b,c)
172 */
173
174 if (token->type == PSI_T_WHITESPACE) {
175 if (name) {
176 name = false;
177 }
178 ws = true;
179 psi_cpp_tokiter_del_cur(cpp, true);
180 continue;
181 }
182
183 switch (token->type) {
184 case PSI_T_EOL:
185 eol = true;
186 break;
187 case PSI_T_HASH:
188 if (eol) {
189 hash = true;
190 eol = false;
191 }
192 break;
193 case PSI_T_DEFINE:
194 if (hash) {
195 define = true;
196 hash = false;
197 }
198 break;
199 case PSI_T_NAME:
200 if (define) {
201 name = true;
202 define = false;
203 }
204 break;
205 case PSI_T_LPAREN:
206 if (name) {
207 name = false;
208 if (!ws) {
209 /* mask special token for parser */
210 struct psi_token *no_ws = psi_token_copy(token);
211
212 no_ws->type = PSI_T_NO_WHITESPACE;
213 zend_string_release(no_ws->text);
214 no_ws->text = zend_string_init_interned("\xA0", 1, 1);
215 psi_cpp_tokiter_add(cpp, no_ws);
216 continue;
217 }
218 }
219 /* no break */
220 default:
221 name = define = hash = eol = false;
222 break;
223 }
224
225 ws = false;
226 psi_cpp_tokiter_add_cur(cpp);
227 psi_cpp_tokiter_next(cpp);
228 }
229
230 return true;
231 }
232
233 static bool psi_cpp_stage2(struct psi_cpp *cpp)
234 {
235 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
236 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
237
238 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
239
240 psi_cpp_tokiter_reset(cpp);
241 while (psi_cpp_tokiter_valid(cpp)) {
242 struct psi_token *current = psi_cpp_tokiter_current(cpp);
243
244 if (current->type == PSI_T_HASH) {
245 if (is_eol) {
246 do_cpp = true;
247 is_eol = false;
248 }
249 } else if (current->type == PSI_T_EOL) {
250 #if PSI_CPP_DEBUG
251 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
252 #endif
253 is_eol = true;
254 skip_all = false;
255 do_expansion = true;
256 if (!do_cpp) {
257 psi_cpp_tokiter_del_cur(cpp, true);
258 continue;
259 }
260 } else {
261 is_eol = false;
262
263 if (do_cpp) {
264 switch (current->type) {
265 case PSI_T_DEFINE:
266 #if PSI_CPP_DEBUG
267 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
268 #endif
269 do_expansion = false;
270 skip_all = true;
271 break;
272 case PSI_T_DEFINED:
273 skip_paren = true;
274 /* no break */
275 case PSI_T_IFDEF:
276 case PSI_T_IFNDEF:
277 case PSI_T_UNDEF:
278 #if PSI_CPP_DEBUG
279 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
280 #endif
281 do_expansion = false;
282 break;
283 case PSI_T_LPAREN:
284
285 if (!skip_all) {
286 if (skip_paren) {
287 skip_paren = false;
288 } else {
289 do_expansion = true;
290 #if PSI_CPP_DEBUG
291 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
292 #endif
293 }
294 }
295 break;
296 case PSI_T_NAME:
297 break;
298 default:
299 do_expansion = !skip_all;
300 #if PSI_CPP_DEBUG
301 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
302 #endif
303 }
304 }
305 }
306
307 if (cpp->skip) {
308 if (!do_cpp) {
309 #if PSI_CPP_DEBUG
310 fprintf(stderr, "PSI: CPP skip ");
311 psi_token_dump(2, current);
312 #endif
313 psi_cpp_tokiter_del_cur(cpp, true);
314 continue;
315 }
316 }
317
318 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
319 bool expanded = false;
320
321 while (psi_cpp_tokiter_expand(cpp)) {
322 expanded = true;
323 }
324 if (expanded) {
325 continue;
326 }
327 }
328
329 if (do_cpp) {
330 parser_tokens = psi_plist_add(parser_tokens, &current);
331
332 if (is_eol) {
333 size_t processed = 0;
334 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
335
336 /* EOL */
337 psi_plist_pop(parser_tokens, NULL);
338 psi_plist_clean(parser_tokens);
339 do_cpp = false;
340
341 if (!parsed) {
342 psi_plist_free(parser_tokens);
343 return false;
344 }
345 } else {
346 /* leave EOLs in the input stream, else we might end up
347 * with a hash not preceded with a new line after include */
348 psi_cpp_tokiter_del_cur(cpp, false);
349 }
350
351 #if PSI_CPP_DEBUG > 1
352 psi_cpp_tokiter_dump(2, cpp);
353 #endif
354
355 continue;
356 }
357
358 psi_cpp_tokiter_add_cur(cpp);
359 psi_cpp_tokiter_next(cpp);
360 }
361
362 psi_plist_free(parser_tokens);
363
364 return true;
365 }
366
367 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
368 {
369 bool parsed = false;
370 struct psi_cpp temp = *cpp;
371
372 cpp->tokens.iter = *tokens;
373 cpp->tokens.next = NULL;
374
375 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
376 parsed = true;
377 }
378
379 if (cpp->tokens.next) {
380 free(cpp->tokens.iter);
381 cpp->tokens.iter = cpp->tokens.next;
382 cpp->tokens.next = NULL;
383 }
384
385 *tokens = cpp->tokens.iter;
386
387 if (temp.tokens.iter) {
388 cpp->tokens.iter = temp.tokens.iter;
389 cpp->tokens.next = temp.tokens.next;
390 cpp->index = temp.index;
391 }
392
393 return parsed;
394 }
395
396 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
397 {
398 bool defined;
399
400 if (tok->type == PSI_T_NAME) {
401 defined = zend_hash_exists(&cpp->defs, tok->text);
402 } else {
403 defined = false;
404 }
405
406 #if PSI_CPP_DEBUG
407 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
408 if (defined) {
409 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
410 fprintf(stderr, " @ %s:%u ", macro->token->file->val, macro->token->line);
411 }
412 psi_token_dump(2, tok);
413 #endif
414
415 return defined;
416 }
417
418 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
419 {
420 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
421
422 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
423 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
424 "'%s' redefined", decl->token->text->val);
425 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
426 "'%s' previously defined", old->token->text->val);
427 }
428 #if PSI_CPP_DEBUG
429 if (decl->exp) {
430 fprintf(stderr, "PSI: CPP MACRO num_exp -> %s ", decl->token->text->val);
431 } else {
432 fprintf(stderr, "PSI: CPP MACRO decl -> %s ", decl->token->text->val);
433 }
434 psi_cpp_macro_decl_dump(2, decl);
435 fprintf(stderr, "\n");
436 #endif
437 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
438 }
439
440 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
441 {
442 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
443 }
444
445 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
446 {
447 struct psi_validate_scope scope = {0};
448
449 scope.defs = &cpp->defs;
450 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
451 return false;
452 }
453 if (!psi_num_exp_get_long(exp->data.num, NULL, &cpp->defs)) {
454 return false;
455 }
456 return true;
457 }
458
459 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
460 {
461 struct psi_parser_input *include;
462
463 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
464
465 include = psi_parser_open_file(cpp->parser, path, false);
466 if (include) {
467 struct psi_plist *tokens;
468
469 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
470
471 tokens = psi_parser_scan(cpp->parser, include);
472 if (tokens) {
473 *parsed = psi_cpp_process(cpp, &tokens);
474
475 if (*parsed) {
476 size_t num_tokens = psi_plist_count(tokens);
477
478 ++cpp->expanded;
479 psi_cpp_tokiter_add_range(cpp, num_tokens, psi_plist_eles(tokens));
480 free(tokens);
481 } else {
482 psi_plist_free(tokens);
483 }
484 }
485 psi_parser_input_free(&include);
486
487 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
488 return true;
489 }
490 return false;
491 }
492
493 static inline void include_path(const struct psi_token *file, char **path)
494 {
495 if (file->text->val[0] == '/') {
496 *path = file->text->val;
497 } else {
498 char *dir;
499 size_t len;
500
501 strncpy(*path, file->file->val, PATH_MAX);
502
503 dir = dirname(*path);
504 len = strlen(dir);
505
506 assert(len + file->text->len + 1 < PATH_MAX);
507
508 memmove(*path, dir, len);
509 (*path)[len] = '/';
510 memcpy(&(*path)[len + 1], file->text->val, file->text->len + 1);
511 }
512 }
513
514 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
515 {
516 bool parsed = false;
517
518 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
519 /* first try as is, full or relative path */
520 char temp[PATH_MAX], *path = temp;
521
522 include_path(file, &path);
523
524 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, strlen(path))) {
525 return true;
526 }
527 if (try_include(cpp, path, &parsed)) {
528 /* found */
529 return parsed;
530 }
531 }
532
533 /* look through search paths */
534 if (file->text->val[0] != '/') {
535 char path[PATH_MAX];
536 const char *sep;
537 int p_len;
538
539 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
540 if ((sep = strchr(cpp->search, ':'))) {
541 cpp->search = sep + 1;
542 } else {
543 /* point to end of string */
544 cpp->search += strlen(cpp->search);
545 }
546 }
547
548 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
549 cpp->search = PSI_G(search_path);
550 }
551
552 do {
553 int d_len;
554
555 sep = strchr(cpp->search, ':');
556 d_len = sep ? sep - cpp->search : strlen(cpp->search);
557
558 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
559 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
560 return true;
561 }
562 if (try_include(cpp, path, &parsed)) {
563 break;
564 }
565 }
566
567 if (sep) {
568 cpp->search = sep + 1;
569 }
570 } while (sep);
571 }
572
573 return parsed;
574 }