cpp: avoid errenous multiple expansions
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include "cpp.h"
29 #include "parser.h"
30
31 #define PSI_CPP_SEARCH
32 #define PSI_CPP_PREDEF
33 #include "php_psi_cpp.h"
34
35 #include "php_psi.h"
36
37 static void free_cpp_def(zval *p)
38 {
39 if (Z_TYPE_P(p) == IS_PTR) {
40 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
41 }
42 }
43
44 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
45 {
46 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
47
48 cpp->parser = P;
49 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
50 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
51
52 return cpp;
53 }
54
55 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
56 {
57 struct psi_parser_input *predef;
58
59 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
60 bool parsed = psi_parser_parse(cpp->parser, predef);
61 free(predef);
62 return parsed;
63 }
64
65 return false;
66 }
67
68 static int dump_def(zval *p)
69 {
70 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
71
72 if (decl) {
73 dprintf(2, "#define ");
74 psi_cpp_macro_decl_dump(2, decl);
75 dprintf(2, "\n");
76 }
77 return ZEND_HASH_APPLY_KEEP;
78 }
79
80 void psi_cpp_free(struct psi_cpp **cpp_ptr)
81 {
82 if (*cpp_ptr) {
83 struct psi_cpp *cpp = *cpp_ptr;
84
85 *cpp_ptr = NULL;
86 if (cpp->parser->flags & PSI_DEBUG) {
87 fprintf(stderr, "PSI: CPP decls:\n");
88 zend_hash_apply(&cpp->defs, dump_def);
89 }
90 zend_hash_destroy(&cpp->defs);
91 zend_hash_destroy(&cpp->once);
92 free(cpp);
93 }
94 }
95
96 static bool psi_cpp_stage1(struct psi_cpp *cpp)
97 {
98 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
99
100 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
101
102 psi_cpp_tokiter_reset(cpp);
103 while (psi_cpp_tokiter_valid(cpp)) {
104 struct psi_token *token = psi_cpp_tokiter_current(cpp);
105
106 /* strip comments and attributes */
107 if (token->type == PSI_T_COMMENT
108 || token->type == PSI_T_CPP_ATTRIBUTE) {
109 psi_cpp_tokiter_del_cur(cpp, true);
110 continue;
111 }
112
113 /* line continuations */
114 if (token->type == PSI_T_EOL) {
115 if (esc) {
116 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
117 psi_cpp_tokiter_prev(cpp);
118 esc = false;
119 continue;
120 }
121 } else if (token->type == PSI_T_BSLASH) {
122 esc = !esc;
123 } else {
124 esc = false;
125 }
126
127 /* this whole turf is needed to distinct between:
128 * #define foo (1,2,3)
129 * #define foo(a,b,c)
130 */
131
132 if (token->type == PSI_T_WHITESPACE) {
133 if (name) {
134 name = false;
135 }
136 ws = true;
137 psi_cpp_tokiter_del_cur(cpp, true);
138 continue;
139 }
140
141 switch (token->type) {
142 case PSI_T_EOL:
143 eol = true;
144 break;
145 case PSI_T_HASH:
146 if (eol) {
147 hash = true;
148 eol = false;
149 }
150 break;
151 case PSI_T_DEFINE:
152 if (hash) {
153 define = true;
154 hash = false;
155 }
156 break;
157 case PSI_T_NAME:
158 if (define) {
159 name = true;
160 define = false;
161 }
162 break;
163 case PSI_T_LPAREN:
164 if (name) {
165 name = false;
166 if (!ws) {
167 /* mask special token for parser */
168 struct psi_token *no_ws = psi_token_copy(token);
169
170 no_ws->type = PSI_T_NO_WHITESPACE;
171 no_ws->text[0] = '\xA0';
172 psi_cpp_tokiter_ins_cur(cpp, no_ws);
173 continue;
174 }
175 }
176 /* no break */
177 default:
178 name = define = hash = eol = false;
179 break;
180 }
181
182 ws = false;
183 psi_cpp_tokiter_next(cpp);
184 }
185
186 return true;
187 }
188
189 static bool psi_cpp_stage2(struct psi_cpp *cpp)
190 {
191 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
192 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
193
194 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
195
196 psi_cpp_tokiter_reset(cpp);
197 while (psi_cpp_tokiter_valid(cpp)) {
198 struct psi_token *current = psi_cpp_tokiter_current(cpp);
199
200 if (current->type == PSI_T_HASH) {
201 if (is_eol) {
202 do_cpp = true;
203 is_eol = false;
204 }
205 } else if (current->type == PSI_T_EOL) {
206 #if PSI_CPP_DEBUG
207 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
208 #endif
209 is_eol = true;
210 skip_all = false;
211 do_expansion = true;
212 if (!do_cpp) {
213 psi_cpp_tokiter_del_cur(cpp, true);
214 continue;
215 }
216 } else {
217 is_eol = false;
218
219 if (do_cpp) {
220 switch (current->type) {
221 case PSI_T_DEFINE:
222 #if PSI_CPP_DEBUG
223 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
224 #endif
225 do_expansion = false;
226 skip_all = true;
227 break;
228 case PSI_T_DEFINED:
229 skip_paren = true;
230 /* no break */
231 case PSI_T_IFDEF:
232 case PSI_T_IFNDEF:
233 case PSI_T_UNDEF:
234 #if PSI_CPP_DEBUG
235 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
236 #endif
237 do_expansion = false;
238 break;
239 case PSI_T_LPAREN:
240
241 if (!skip_all) {
242 if (skip_paren) {
243 skip_paren = false;
244 } else {
245 do_expansion = true;
246 #if PSI_CPP_DEBUG
247 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
248 #endif
249 }
250 }
251 break;
252 case PSI_T_NAME:
253 break;
254 default:
255 do_expansion = !skip_all;
256 #if PSI_CPP_DEBUG
257 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
258 #endif
259 }
260 }
261 }
262
263 if (cpp->skip) {
264 /* FIXME: del_range */
265 if (!do_cpp) {
266 #if PSI_CPP_DEBUG
267 fprintf(stderr, "PSI: CPP skip ");
268 psi_token_dump(2, current);
269 #endif
270 psi_cpp_tokiter_del_cur(cpp, true);
271 continue;
272 }
273 }
274
275 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
276 bool expanded = false;
277
278 while (psi_cpp_tokiter_expand(cpp)) {
279 expanded = true;
280 }
281 if (expanded) {
282 continue;
283 }
284 }
285
286 if (do_cpp) {
287 parser_tokens = psi_plist_add(parser_tokens, &current);
288
289 if (is_eol) {
290 size_t processed = 0;
291 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
292
293 /* EOL */
294 psi_plist_pop(parser_tokens, NULL);
295 psi_plist_clean(parser_tokens);
296 do_cpp = false;
297
298 if (!parsed) {
299 psi_plist_free(parser_tokens);
300 return false;
301 }
302 } else {
303 /* leave EOLs in the input stream, else we might end up
304 * with a hash not preceded with a new line after include */
305 psi_cpp_tokiter_del_cur(cpp, false);
306 }
307
308 #if PSI_CPP_DEBUG > 1
309 psi_cpp_tokiter_dump(2, cpp);
310 #endif
311
312 continue;
313 }
314
315 psi_cpp_tokiter_next(cpp);
316 }
317
318 psi_plist_free(parser_tokens);
319
320 return true;
321 }
322
323 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
324 {
325 bool parsed = false;
326 struct psi_cpp temp = *cpp;
327
328 cpp->tokens = *tokens;
329 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
330 parsed = true;
331 }
332 *tokens = cpp->tokens;
333
334 if (temp.tokens) {
335 cpp->tokens = temp.tokens;
336 cpp->index = temp.index;
337 }
338
339 return parsed;
340 }
341
342 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
343 {
344 bool defined;
345
346 if (tok->type == PSI_T_NAME) {
347 defined = zend_hash_str_exists(&cpp->defs, tok->text, tok->size);
348 } else {
349 defined = false;
350 }
351
352 #if PSI_CPP_DEBUG
353 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
354 if (defined) {
355 struct psi_cpp_macro_decl *macro = zend_hash_str_find_ptr(&cpp->defs, tok->text, tok->size);
356 fprintf(stderr, " @ %s:%u ", macro->token->file, macro->token->line);
357 }
358 psi_token_dump(2, tok);
359 #endif
360
361 return defined;
362 }
363
364 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
365 {
366 struct psi_cpp_macro_decl *old = zend_hash_str_find_ptr(&cpp->defs, decl->token->text, decl->token->size);
367
368 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
369 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
370 "'%s' redefined", decl->token->text);
371 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
372 "'%s' previously defined", old->token->text);
373 }
374 zend_hash_str_update_ptr(&cpp->defs, decl->token->text, decl->token->size, decl);
375 }
376
377 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
378 {
379 return SUCCESS == zend_hash_str_del(&cpp->defs, tok->text, tok->size);
380 }
381
382 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
383 {
384 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, NULL, NULL, NULL, NULL, NULL)) {
385 return false;
386 }
387 if (!psi_long_num_exp(exp->data.num, NULL, &cpp->defs)) {
388 return false;
389 }
390 return true;
391 }
392
393 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
394 {
395 struct psi_parser_input *include;
396
397 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
398
399 include = psi_parser_open_file(cpp->parser, path, false);
400 if (include) {
401 struct psi_plist *tokens;
402
403 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
404
405 tokens = psi_parser_scan(cpp->parser, include);
406 if (tokens) {
407 *parsed = psi_cpp_process(cpp, &tokens);
408
409 if (*parsed) {
410 size_t num_tokens = psi_plist_count(tokens);
411
412 ++cpp->expanded;
413 psi_cpp_tokiter_ins_range(cpp, cpp->index,
414 num_tokens, psi_plist_eles(tokens));
415 /* skip already processed tokens */
416 cpp->index += num_tokens;
417 free(tokens);
418 } else {
419 psi_plist_free(tokens);
420 }
421 }
422 free(include);
423
424 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
425 return true;
426 }
427 return false;
428 }
429
430 bool psi_cpp_include(struct psi_cpp *cpp, const char *file, unsigned flags)
431 {
432 bool parsed = false;
433 int f_len = strlen(file);
434
435 if (!(flags & PSI_CPP_INCLUDE_NEXT) || *file == '/') {
436 /* first try as is, full or relative path */
437 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, file, f_len)) {
438 return true;
439 }
440 if (try_include(cpp, file, &parsed)) {
441 /* found */
442 return parsed;
443 }
444 }
445
446 /* look through search paths */
447 if (*file != '/') {
448 char path[PATH_MAX];
449 const char *sep;
450 int p_len;
451
452 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
453 if ((sep = strchr(cpp->search, ':'))) {
454 cpp->search = sep + 1;
455 } else {
456 /* point to end of string */
457 cpp->search += strlen(cpp->search);
458 }
459 }
460
461 if (!(flags & PSI_CPP_INCLUDE_NEXT) || !cpp->search) {
462 cpp->search = PSI_G(search_path);
463 }
464
465 do {
466 int d_len;
467
468 sep = strchr(cpp->search, ':');
469 d_len = sep ? sep - cpp->search : strlen(cpp->search);
470
471 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, f_len, file))) {
472 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
473 return true;
474 }
475 if (try_include(cpp, path, &parsed)) {
476 break;
477 }
478 }
479
480 if (sep) {
481 cpp->search = sep + 1;
482 }
483 } while (sep);
484 }
485
486 return parsed;
487 }