commit after reset fuckup
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 HashTable psi_cpp_defaults;
40
41 PHP_MINIT_FUNCTION(psi_cpp)
42 {
43 struct psi_parser parser;
44 struct psi_parser_input *predef;
45
46 PSI_G(search_path) = pemalloc(strlen(PSI_G(directory)) + strlen(psi_cpp_search) + 1 + 1, 1);
47 sprintf(PSI_G(search_path), "%s:%s", PSI_G(directory), psi_cpp_search);
48
49 if (!psi_parser_init(&parser, psi_error_wrapper, PSI_SILENT)) {
50 return FAILURE;
51 }
52
53 if (!(predef = psi_parser_open_string(&parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
54 psi_parser_dtor(&parser);
55 return FAILURE;
56 }
57
58 if (!psi_parser_parse(&parser, predef)) {
59 psi_parser_input_free(&predef);
60 psi_parser_dtor(&parser);
61 return FAILURE;
62 }
63 psi_parser_input_free(&predef);
64
65 zend_hash_init(&psi_cpp_defaults, 0, NULL, NULL, 1);
66 zend_hash_copy(&psi_cpp_defaults, &parser.preproc->defs, NULL);
67
68 psi_parser_dtor(&parser);
69
70 return SUCCESS;
71 }
72
73 PHP_MSHUTDOWN_FUNCTION(psi_cpp)
74 {
75 struct psi_cpp_macro_decl *macro;
76
77 ZEND_HASH_FOREACH_PTR(&psi_cpp_defaults, macro)
78 {
79 psi_cpp_macro_decl_free(&macro);
80 }
81 ZEND_HASH_FOREACH_END();
82
83 zend_hash_destroy(&psi_cpp_defaults);
84
85 return SUCCESS;
86 }
87
88 static void free_cpp_def(zval *p)
89 {
90 if (Z_TYPE_P(p) == IS_PTR) {
91 struct psi_cpp_macro_decl *macro = Z_PTR_P(p);
92
93 if (!zend_hash_exists(&psi_cpp_defaults, macro->token->text)) {
94 psi_cpp_macro_decl_free(&macro);
95 }
96 }
97 }
98
99 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
100 {
101 struct psi_cpp *cpp = pecalloc(1, sizeof(*cpp), 1);
102
103 cpp->parser = P;
104 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
105 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
106 zend_hash_copy(&cpp->defs, &psi_cpp_defaults, NULL);
107
108 return cpp;
109 }
110
111 static char *include_flavor[] = {
112 "include",
113 "include next",
114 "include once"
115 };
116
117 void psi_cpp_free(struct psi_cpp **cpp_ptr)
118 {
119 if (*cpp_ptr) {
120 struct psi_cpp *cpp = *cpp_ptr;
121
122 *cpp_ptr = NULL;
123 zend_hash_destroy(&cpp->defs);
124 zend_hash_destroy(&cpp->once);
125 free(cpp);
126 }
127 }
128
129 static bool psi_cpp_stage1(struct psi_cpp *cpp)
130 {
131 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
132
133 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
134
135 psi_cpp_tokiter_reset(cpp);
136 while (psi_cpp_tokiter_valid(cpp)) {
137 struct psi_token *token = psi_cpp_tokiter_current(cpp);
138
139 /* strip comments and attributes */
140 if (token->type == PSI_T_COMMENT
141 || token->type == PSI_T_CPP_ATTRIBUTE) {
142 psi_cpp_tokiter_del_cur(cpp, true);
143 continue;
144 }
145
146 /* line continuations */
147 if (token->type == PSI_T_EOL) {
148 if (esc) {
149 psi_cpp_tokiter_del_prev(cpp, true);
150 psi_cpp_tokiter_del_cur(cpp, true);
151 esc = false;
152 continue;
153 }
154 } else if (token->type == PSI_T_BSLASH) {
155 esc = !esc;
156 } else {
157 esc = false;
158 }
159
160 /* this whole turf is needed to distinct between:
161 * #define foo (1,2,3)
162 * #define foo(a,b,c)
163 */
164
165 if (token->type == PSI_T_WHITESPACE) {
166 if (name) {
167 name = false;
168 }
169 ws = true;
170 psi_cpp_tokiter_del_cur(cpp, true);
171 continue;
172 }
173
174 switch (token->type) {
175 case PSI_T_EOL:
176 eol = true;
177 break;
178 case PSI_T_HASH:
179 if (eol) {
180 hash = true;
181 eol = false;
182 }
183 break;
184 case PSI_T_DEFINE:
185 if (hash) {
186 define = true;
187 hash = false;
188 }
189 break;
190 case PSI_T_NAME:
191 if (define) {
192 name = true;
193 define = false;
194 }
195 break;
196 case PSI_T_LPAREN:
197 if (name) {
198 name = false;
199 if (!ws) {
200 /* mask special token for parser */
201 struct psi_token *no_ws = psi_token_copy(token);
202
203 no_ws->type = PSI_T_NO_WHITESPACE;
204 zend_string_release(no_ws->text);
205 no_ws->text = zend_string_init_interned("\xA0", 1, 1);
206 psi_cpp_tokiter_add(cpp, no_ws);
207 continue;
208 }
209 }
210 /* no break */
211 default:
212 name = define = hash = eol = false;
213 break;
214 }
215
216 ws = false;
217 psi_cpp_tokiter_add_cur(cpp);
218 psi_cpp_tokiter_next(cpp);
219 }
220
221 return true;
222 }
223
224 static bool psi_cpp_stage2(struct psi_cpp *cpp)
225 {
226 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
227 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
228
229 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
230
231 psi_cpp_tokiter_reset(cpp);
232 while (psi_cpp_tokiter_valid(cpp)) {
233 struct psi_token *current = psi_cpp_tokiter_current(cpp);
234
235 if (current->type == PSI_T_HASH) {
236 if (is_eol) {
237 do_cpp = true;
238 is_eol = false;
239 }
240 } else if (current->type == PSI_T_EOL) {
241 #if PSI_CPP_DEBUG
242 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
243 #endif
244 is_eol = true;
245 skip_all = false;
246 do_expansion = true;
247 if (!do_cpp) {
248 psi_cpp_tokiter_del_cur(cpp, true);
249 continue;
250 }
251 } else {
252 is_eol = false;
253
254 if (do_cpp) {
255 switch (current->type) {
256 case PSI_T_DEFINE:
257 #if PSI_CPP_DEBUG
258 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
259 #endif
260 do_expansion = false;
261 skip_all = true;
262 break;
263 case PSI_T_DEFINED:
264 skip_paren = true;
265 /* no break */
266 case PSI_T_IFDEF:
267 case PSI_T_IFNDEF:
268 case PSI_T_UNDEF:
269 #if PSI_CPP_DEBUG
270 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
271 #endif
272 do_expansion = false;
273 break;
274 case PSI_T_LPAREN:
275
276 if (!skip_all) {
277 if (skip_paren) {
278 skip_paren = false;
279 } else {
280 do_expansion = true;
281 #if PSI_CPP_DEBUG
282 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
283 #endif
284 }
285 }
286 break;
287 case PSI_T_NAME:
288 break;
289 default:
290 do_expansion = !skip_all;
291 #if PSI_CPP_DEBUG
292 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
293 #endif
294 }
295 }
296 }
297
298 if (cpp->skip) {
299 if (!do_cpp) {
300 #if PSI_CPP_DEBUG
301 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP skip ");
302 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, current);
303 #endif
304 psi_cpp_tokiter_del_cur(cpp, true);
305 continue;
306 }
307 }
308
309 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
310 bool expanded = false;
311
312 while (psi_cpp_tokiter_expand(cpp)) {
313 expanded = true;
314 }
315 if (expanded) {
316 continue;
317 }
318 }
319
320 if (do_cpp) {
321 parser_tokens = psi_plist_add(parser_tokens, &current);
322
323 if (is_eol) {
324 size_t processed = 0;
325 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
326
327 /* EOL */
328 psi_plist_pop(parser_tokens, NULL);
329 psi_plist_clean(parser_tokens);
330 do_cpp = false;
331
332 if (!parsed) {
333 psi_plist_free(parser_tokens);
334 return false;
335 }
336 } else {
337 /* leave EOLs in the input stream, else we might end up
338 * with a hash not preceded with a new line after include */
339 psi_cpp_tokiter_del_cur(cpp, false);
340 }
341
342 #if PSI_CPP_DEBUG > 1
343 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_tokiter_dump, cpp);
344 #endif
345
346 continue;
347 }
348
349 psi_cpp_tokiter_add_cur(cpp);
350 psi_cpp_tokiter_next(cpp);
351 }
352
353 psi_plist_free(parser_tokens);
354
355 return true;
356 }
357
358 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
359 {
360 bool parsed = false;
361 struct psi_cpp temp = *cpp; cpp->level = temp.level;
362
363 cpp->tokens.iter = *tokens;
364 cpp->tokens.next = NULL;
365
366 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
367 parsed = true;
368 }
369
370 if (cpp->tokens.next) {
371 free(cpp->tokens.iter);
372 cpp->tokens.iter = cpp->tokens.next;
373 cpp->tokens.next = NULL;
374 }
375
376 *tokens = cpp->tokens.iter;
377
378 if (temp.tokens.iter) {
379 cpp->tokens.iter = temp.tokens.iter;
380 cpp->tokens.next = temp.tokens.next;
381 }
382 cpp->index = temp.index;
383 cpp->skip = temp.skip;
384 cpp->level = temp.level;
385 cpp->seen = temp.seen;
386
387 return parsed;
388 }
389
390 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
391 {
392 bool defined;
393
394 if (tok->type == PSI_T_NAME) {
395 defined = zend_hash_exists(&cpp->defs, tok->text)
396 || psi_builtin_exists(tok->text);
397 } else {
398 defined = false;
399 }
400
401 #if PSI_CPP_DEBUG
402 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP defined -> %s ", defined ? "true" : "false");
403 if (defined) {
404 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
405 if (macro) {
406 PSI_DEBUG_PRINT(cpp->parser, " @ %s:%u ", macro->token->file->val, macro->token->line);
407 }
408 }
409 PSI_DEBUG_DUMP(cpp->parser, psi_token_dump, tok);
410 #endif
411
412 return defined;
413 }
414
415 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
416 {
417 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
418
419 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
420 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
421 "'%s' redefined", decl->token->text->val);
422 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
423 "'%s' previously defined", old->token->text->val);
424 }
425 #if PSI_CPP_DEBUG
426 if (decl->exp) {
427 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO num_exp -> ");
428 } else {
429 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP MACRO decl -> ");
430 }
431 PSI_DEBUG_DUMP(cpp->parser, psi_cpp_macro_decl_dump, decl);
432 PSI_DEBUG_PRINT(cpp->parser, "\n");
433 #endif
434 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
435 }
436
437 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
438 {
439 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
440 }
441
442 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
443 {
444 struct psi_validate_scope scope = {0};
445
446 scope.cpp = cpp;
447 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
448 return false;
449 }
450 if (!psi_num_exp_get_long(exp->data.num, NULL, cpp)) {
451 return false;
452 }
453 return true;
454 }
455
456 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
457 {
458 bool parsed = false;
459 char path[PATH_MAX];
460 struct psi_plist *tokens;
461 struct psi_parser_input *include;
462
463 if (!psi_cpp_has_include(cpp, file, flags, path)) {
464 return false;
465 }
466
467 if (flags & PSI_CPP_INCLUDE_ONCE) {
468 if (zend_hash_str_exists(&cpp->once, path, strlen(path))) {
469 return true;
470 }
471 }
472
473 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s opening %s\n",
474 include_flavor[flags], path);
475
476 include = psi_parser_open_file(cpp->parser, path, false);
477 if (!include) {
478 return false;
479 }
480
481 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
482
483 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
484
485 tokens = psi_parser_scan(cpp->parser, include);
486 psi_parser_input_free(&include);
487
488 if (!tokens) {
489 return false;
490 }
491
492 parsed = psi_cpp_process(cpp, &tokens);
493 if (!parsed) {
494 psi_plist_free(tokens);
495 return false;
496 }
497
498 psi_cpp_tokiter_add_range(cpp, psi_plist_count(tokens), psi_plist_eles(tokens));
499 free(tokens);
500
501 ++cpp->expanded;
502 return true;
503 }
504
505 #ifndef HAVE_EACCESS
506 # define eaccess access
507 #endif
508 bool psi_cpp_has_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags, char *path)
509 {
510 char temp[PATH_MAX];
511
512 if (!path) {
513 path = temp;
514 }
515
516 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
517 /* first try as is, full or relative path */
518 if (file->text->val[0] == '/') {
519 path = file->text->val;
520 } else {
521 char *dir;
522 size_t len;
523
524 strncpy(path, file->file->val, PATH_MAX);
525
526 dir = dirname(path);
527 len = strlen(dir);
528
529 assert(len + file->text->len + 1 < PATH_MAX);
530
531 memmove(path, dir, len);
532 path[len] = '/';
533 memcpy(&(path)[len + 1], file->text->val, file->text->len + 1);
534 }
535
536 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
537 include_flavor[flags], path);
538 if (0 == eaccess(path, R_OK)) {
539 return true;
540 }
541 }
542
543 /* look through search paths */
544 if (file->text->val[0] != '/') {
545 const char *sep;
546 int p_len;
547
548 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
549 if ((sep = strchr(cpp->search, ':'))) {
550 cpp->search = sep + 1;
551 } else {
552 /* point to end of string */
553 cpp->search += strlen(cpp->search);
554 }
555 }
556
557 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
558 cpp->search = PSI_G(search_path);
559 }
560
561 do {
562 int d_len;
563
564 sep = strchr(cpp->search, ':');
565 d_len = sep ? sep - cpp->search : strlen(cpp->search);
566
567 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
568 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s trying %s\n",
569 include_flavor[flags], path);
570 if (0 == eaccess(path, R_OK)) {
571 return true;
572 }
573 }
574
575 if (sep) {
576 cpp->search = sep + 1;
577 }
578 } while (sep);
579 }
580
581 return false;
582 }