parser: accept trailing comma in enums; __restrict for arrays
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 static void free_cpp_def(zval *p)
40 {
41 if (Z_TYPE_P(p) == IS_PTR) {
42 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
43 }
44 }
45
46 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
47 {
48 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
49
50 cpp->parser = P;
51 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
52 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
53
54 return cpp;
55 }
56
57 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
58 {
59 struct psi_parser_input *predef;
60
61 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
62 bool parsed = psi_parser_parse(cpp->parser, predef);
63 free(predef);
64 return parsed;
65 }
66
67 return false;
68 }
69
70 static int dump_def(zval *p)
71 {
72 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
73
74 if (decl) {
75 dprintf(2, "#define ");
76 psi_cpp_macro_decl_dump(2, decl);
77 dprintf(2, "\n");
78 }
79 return ZEND_HASH_APPLY_KEEP;
80 }
81
82 void psi_cpp_free(struct psi_cpp **cpp_ptr)
83 {
84 if (*cpp_ptr) {
85 struct psi_cpp *cpp = *cpp_ptr;
86
87 *cpp_ptr = NULL;
88 if (cpp->parser->flags & PSI_DEBUG) {
89 fprintf(stderr, "PSI: CPP decls:\n");
90 zend_hash_apply(&cpp->defs, dump_def);
91 }
92 zend_hash_destroy(&cpp->defs);
93 zend_hash_destroy(&cpp->once);
94 free(cpp);
95 }
96 }
97
98 static bool psi_cpp_stage1(struct psi_cpp *cpp)
99 {
100 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
101
102 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
103
104 psi_cpp_tokiter_reset(cpp);
105 while (psi_cpp_tokiter_valid(cpp)) {
106 struct psi_token *token = psi_cpp_tokiter_current(cpp);
107
108 /* strip comments and attributes */
109 if (token->type == PSI_T_COMMENT
110 || token->type == PSI_T_CPP_ATTRIBUTE) {
111 psi_cpp_tokiter_del_cur(cpp, true);
112 continue;
113 }
114
115 /* line continuations */
116 if (token->type == PSI_T_EOL) {
117 if (esc) {
118 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
119 psi_cpp_tokiter_prev(cpp);
120 esc = false;
121 continue;
122 }
123 } else if (token->type == PSI_T_BSLASH) {
124 esc = !esc;
125 } else {
126 esc = false;
127 }
128
129 /* this whole turf is needed to distinct between:
130 * #define foo (1,2,3)
131 * #define foo(a,b,c)
132 */
133
134 if (token->type == PSI_T_WHITESPACE) {
135 if (name) {
136 name = false;
137 }
138 ws = true;
139 psi_cpp_tokiter_del_cur(cpp, true);
140 continue;
141 }
142
143 switch (token->type) {
144 case PSI_T_EOL:
145 eol = true;
146 break;
147 case PSI_T_HASH:
148 if (eol) {
149 hash = true;
150 eol = false;
151 }
152 break;
153 case PSI_T_DEFINE:
154 if (hash) {
155 define = true;
156 hash = false;
157 }
158 break;
159 case PSI_T_NAME:
160 if (define) {
161 name = true;
162 define = false;
163 }
164 break;
165 case PSI_T_LPAREN:
166 if (name) {
167 name = false;
168 if (!ws) {
169 /* mask special token for parser */
170 struct psi_token *no_ws = psi_token_copy(token);
171
172 no_ws->type = PSI_T_NO_WHITESPACE;
173 no_ws->text[0] = '\xA0';
174 psi_cpp_tokiter_ins_cur(cpp, no_ws);
175 continue;
176 }
177 }
178 /* no break */
179 default:
180 name = define = hash = eol = false;
181 break;
182 }
183
184 ws = false;
185 psi_cpp_tokiter_next(cpp);
186 }
187
188 return true;
189 }
190
191 static bool psi_cpp_stage2(struct psi_cpp *cpp)
192 {
193 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
194 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
195
196 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
197
198 psi_cpp_tokiter_reset(cpp);
199 while (psi_cpp_tokiter_valid(cpp)) {
200 struct psi_token *current = psi_cpp_tokiter_current(cpp);
201
202 if (current->type == PSI_T_HASH) {
203 if (is_eol) {
204 do_cpp = true;
205 is_eol = false;
206 }
207 } else if (current->type == PSI_T_EOL) {
208 #if PSI_CPP_DEBUG
209 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
210 #endif
211 is_eol = true;
212 skip_all = false;
213 do_expansion = true;
214 if (!do_cpp) {
215 psi_cpp_tokiter_del_cur(cpp, true);
216 continue;
217 }
218 } else {
219 is_eol = false;
220
221 if (do_cpp) {
222 switch (current->type) {
223 case PSI_T_DEFINE:
224 #if PSI_CPP_DEBUG
225 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
226 #endif
227 do_expansion = false;
228 skip_all = true;
229 break;
230 case PSI_T_DEFINED:
231 skip_paren = true;
232 /* no break */
233 case PSI_T_IFDEF:
234 case PSI_T_IFNDEF:
235 case PSI_T_UNDEF:
236 #if PSI_CPP_DEBUG
237 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
238 #endif
239 do_expansion = false;
240 break;
241 case PSI_T_LPAREN:
242
243 if (!skip_all) {
244 if (skip_paren) {
245 skip_paren = false;
246 } else {
247 do_expansion = true;
248 #if PSI_CPP_DEBUG
249 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
250 #endif
251 }
252 }
253 break;
254 case PSI_T_NAME:
255 break;
256 default:
257 do_expansion = !skip_all;
258 #if PSI_CPP_DEBUG
259 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
260 #endif
261 }
262 }
263 }
264
265 if (cpp->skip) {
266 /* FIXME: del_range */
267 if (!do_cpp) {
268 #if PSI_CPP_DEBUG
269 fprintf(stderr, "PSI: CPP skip ");
270 psi_token_dump(2, current);
271 #endif
272 psi_cpp_tokiter_del_cur(cpp, true);
273 continue;
274 }
275 }
276
277 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
278 bool expanded = false;
279
280 while (psi_cpp_tokiter_expand(cpp)) {
281 expanded = true;
282 }
283 if (expanded) {
284 continue;
285 }
286 }
287
288 if (do_cpp) {
289 parser_tokens = psi_plist_add(parser_tokens, &current);
290
291 if (is_eol) {
292 size_t processed = 0;
293 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
294
295 /* EOL */
296 psi_plist_pop(parser_tokens, NULL);
297 psi_plist_clean(parser_tokens);
298 do_cpp = false;
299
300 if (!parsed) {
301 psi_plist_free(parser_tokens);
302 return false;
303 }
304 } else {
305 /* leave EOLs in the input stream, else we might end up
306 * with a hash not preceded with a new line after include */
307 psi_cpp_tokiter_del_cur(cpp, false);
308 }
309
310 #if PSI_CPP_DEBUG > 1
311 psi_cpp_tokiter_dump(2, cpp);
312 #endif
313
314 continue;
315 }
316
317 psi_cpp_tokiter_next(cpp);
318 }
319
320 psi_plist_free(parser_tokens);
321
322 return true;
323 }
324
325 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
326 {
327 bool parsed = false;
328 struct psi_cpp temp = *cpp;
329
330 cpp->tokens = *tokens;
331 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
332 parsed = true;
333 }
334 *tokens = cpp->tokens;
335
336 if (temp.tokens) {
337 cpp->tokens = temp.tokens;
338 cpp->index = temp.index;
339 }
340
341 return parsed;
342 }
343
344 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
345 {
346 bool defined;
347
348 if (tok->type == PSI_T_NAME) {
349 defined = zend_hash_str_exists(&cpp->defs, tok->text, tok->size);
350 } else {
351 defined = false;
352 }
353
354 #if PSI_CPP_DEBUG
355 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
356 if (defined) {
357 struct psi_cpp_macro_decl *macro = zend_hash_str_find_ptr(&cpp->defs, tok->text, tok->size);
358 fprintf(stderr, " @ %s:%u ", macro->token->file, macro->token->line);
359 }
360 psi_token_dump(2, tok);
361 #endif
362
363 return defined;
364 }
365
366 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
367 {
368 struct psi_cpp_macro_decl *old = zend_hash_str_find_ptr(&cpp->defs, decl->token->text, decl->token->size);
369
370 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
371 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
372 "'%s' redefined", decl->token->text);
373 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
374 "'%s' previously defined", old->token->text);
375 }
376 zend_hash_str_update_ptr(&cpp->defs, decl->token->text, decl->token->size, decl);
377 }
378
379 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
380 {
381 return SUCCESS == zend_hash_str_del(&cpp->defs, tok->text, tok->size);
382 }
383
384 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
385 {
386 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, NULL, NULL, NULL, NULL, NULL)) {
387 return false;
388 }
389 if (!psi_long_num_exp(exp->data.num, NULL, &cpp->defs)) {
390 return false;
391 }
392 return true;
393 }
394
395 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
396 {
397 struct psi_parser_input *include;
398
399 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
400
401 include = psi_parser_open_file(cpp->parser, path, false);
402 if (include) {
403 struct psi_plist *tokens;
404
405 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
406
407 tokens = psi_parser_scan(cpp->parser, include);
408 if (tokens) {
409 *parsed = psi_cpp_process(cpp, &tokens);
410
411 if (*parsed) {
412 size_t num_tokens = psi_plist_count(tokens);
413
414 ++cpp->expanded;
415 psi_cpp_tokiter_ins_range(cpp, cpp->index,
416 num_tokens, psi_plist_eles(tokens));
417 /* skip already processed tokens */
418 cpp->index += num_tokens;
419 free(tokens);
420 } else {
421 psi_plist_free(tokens);
422 }
423 }
424 free(include);
425
426 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
427 return true;
428 }
429 return false;
430 }
431
432 static inline void include_path(const struct psi_token *file, char **path)
433 {
434 if (*file->text == '/') {
435 *path = file->text;
436 } else {
437 char *dir;
438 size_t len;
439
440 strncpy(*path, file->file, PATH_MAX);
441
442 dir = dirname(*path);
443 len = strlen(dir);
444
445 assert(len + file->size + 1 < PATH_MAX);
446
447 memmove(*path, dir, len);
448 (*path)[len] = '/';
449 memcpy(&(*path)[len + 1], file->text, file->size + 1);
450 }
451 }
452
453 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
454 {
455 bool parsed = false;
456 int f_len = strlen(file->text);
457
458 if (!(flags & PSI_CPP_INCLUDE_NEXT) || *file->text == '/') {
459 /* first try as is, full or relative path */
460 char temp[PATH_MAX], *path = temp;
461
462 include_path(file, &path);
463
464 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, f_len)) {
465 return true;
466 }
467 if (try_include(cpp, path, &parsed)) {
468 /* found */
469 return parsed;
470 }
471 }
472
473 /* look through search paths */
474 if (*file->text != '/') {
475 char path[PATH_MAX];
476 const char *sep;
477 int p_len;
478
479 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
480 if ((sep = strchr(cpp->search, ':'))) {
481 cpp->search = sep + 1;
482 } else {
483 /* point to end of string */
484 cpp->search += strlen(cpp->search);
485 }
486 }
487
488 if (!(flags & PSI_CPP_INCLUDE_NEXT) || !cpp->search) {
489 cpp->search = PSI_G(search_path);
490 }
491
492 do {
493 int d_len;
494
495 sep = strchr(cpp->search, ':');
496 d_len = sep ? sep - cpp->search : strlen(cpp->search);
497
498 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, f_len, file->text))) {
499 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
500 return true;
501 }
502 if (try_include(cpp, path, &parsed)) {
503 break;
504 }
505 }
506
507 if (sep) {
508 cpp->search = sep + 1;
509 }
510 } while (sep);
511 }
512
513 return parsed;
514 }