2a53df84339bbd0edfdaa5f750fd1ce316787075
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 static void free_cpp_def(zval *p)
40 {
41 if (Z_TYPE_P(p) == IS_PTR) {
42 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
43 }
44 }
45
46 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
47 {
48 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
49
50 cpp->parser = P;
51 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
52 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
53
54 return cpp;
55 }
56
57 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
58 {
59 struct psi_parser_input *predef;
60
61 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
62 bool parsed = psi_parser_parse(cpp->parser, predef);
63 psi_parser_input_free(&predef);
64 return parsed;
65 }
66
67 return false;
68 }
69
70 #if PSI_CPP_DEBUG
71 static int dump_def(zval *p)
72 {
73 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
74
75 if (decl) {
76 dprintf(2, "#define ");
77 psi_cpp_macro_decl_dump(2, decl);
78 dprintf(2, "\n");
79 }
80 return ZEND_HASH_APPLY_KEEP;
81 }
82 #endif
83
84 void psi_cpp_free(struct psi_cpp **cpp_ptr)
85 {
86 if (*cpp_ptr) {
87 struct psi_cpp *cpp = *cpp_ptr;
88
89 #if PSI_CPP_DEBUG
90 fprintf(stderr, "PSI: CPP decls:\n");
91 zend_hash_apply(&cpp->defs, dump_def);
92 #endif
93 *cpp_ptr = NULL;
94 zend_hash_destroy(&cpp->defs);
95 zend_hash_destroy(&cpp->once);
96 free(cpp);
97 }
98 }
99
100 static bool psi_cpp_stage1(struct psi_cpp *cpp)
101 {
102 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
103
104 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
105
106 psi_cpp_tokiter_reset(cpp);
107 while (psi_cpp_tokiter_valid(cpp)) {
108 struct psi_token *token = psi_cpp_tokiter_current(cpp);
109
110 /* strip comments and attributes */
111 if (token->type == PSI_T_COMMENT
112 || token->type == PSI_T_CPP_ATTRIBUTE) {
113 psi_cpp_tokiter_del_cur(cpp, true);
114 continue;
115 }
116
117 /* line continuations */
118 if (token->type == PSI_T_EOL) {
119 if (esc) {
120 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
121 psi_cpp_tokiter_prev(cpp);
122 esc = false;
123 continue;
124 }
125 } else if (token->type == PSI_T_BSLASH) {
126 esc = !esc;
127 } else {
128 esc = false;
129 }
130
131 /* this whole turf is needed to distinct between:
132 * #define foo (1,2,3)
133 * #define foo(a,b,c)
134 */
135
136 if (token->type == PSI_T_WHITESPACE) {
137 if (name) {
138 name = false;
139 }
140 ws = true;
141 psi_cpp_tokiter_del_cur(cpp, true);
142 continue;
143 }
144
145 switch (token->type) {
146 case PSI_T_EOL:
147 eol = true;
148 break;
149 case PSI_T_HASH:
150 if (eol) {
151 hash = true;
152 eol = false;
153 }
154 break;
155 case PSI_T_DEFINE:
156 if (hash) {
157 define = true;
158 hash = false;
159 }
160 break;
161 case PSI_T_NAME:
162 if (define) {
163 name = true;
164 define = false;
165 }
166 break;
167 case PSI_T_LPAREN:
168 if (name) {
169 name = false;
170 if (!ws) {
171 /* mask special token for parser */
172 struct psi_token *no_ws = psi_token_copy(token);
173
174 no_ws->type = PSI_T_NO_WHITESPACE;
175 zend_string_release(no_ws->text);
176 no_ws->text = zend_string_init("\xA0", 1, 1);
177 psi_cpp_tokiter_ins_cur(cpp, no_ws);
178 continue;
179 }
180 }
181 /* no break */
182 default:
183 name = define = hash = eol = false;
184 break;
185 }
186
187 ws = false;
188 psi_cpp_tokiter_next(cpp);
189 }
190
191 return true;
192 }
193
194 static bool psi_cpp_stage2(struct psi_cpp *cpp)
195 {
196 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
197 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
198
199 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
200
201 psi_cpp_tokiter_reset(cpp);
202 while (psi_cpp_tokiter_valid(cpp)) {
203 struct psi_token *current = psi_cpp_tokiter_current(cpp);
204
205 if (current->type == PSI_T_HASH) {
206 if (is_eol) {
207 do_cpp = true;
208 is_eol = false;
209 }
210 } else if (current->type == PSI_T_EOL) {
211 #if PSI_CPP_DEBUG
212 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
213 #endif
214 is_eol = true;
215 skip_all = false;
216 do_expansion = true;
217 if (!do_cpp) {
218 psi_cpp_tokiter_del_cur(cpp, true);
219 continue;
220 }
221 } else {
222 is_eol = false;
223
224 if (do_cpp) {
225 switch (current->type) {
226 case PSI_T_DEFINE:
227 #if PSI_CPP_DEBUG
228 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
229 #endif
230 do_expansion = false;
231 skip_all = true;
232 break;
233 case PSI_T_DEFINED:
234 skip_paren = true;
235 /* no break */
236 case PSI_T_IFDEF:
237 case PSI_T_IFNDEF:
238 case PSI_T_UNDEF:
239 #if PSI_CPP_DEBUG
240 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
241 #endif
242 do_expansion = false;
243 break;
244 case PSI_T_LPAREN:
245
246 if (!skip_all) {
247 if (skip_paren) {
248 skip_paren = false;
249 } else {
250 do_expansion = true;
251 #if PSI_CPP_DEBUG
252 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
253 #endif
254 }
255 }
256 break;
257 case PSI_T_NAME:
258 break;
259 default:
260 do_expansion = !skip_all;
261 #if PSI_CPP_DEBUG
262 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
263 #endif
264 }
265 }
266 }
267
268 if (cpp->skip) {
269 /* FIXME: del_range */
270 if (!do_cpp) {
271 #if PSI_CPP_DEBUG
272 fprintf(stderr, "PSI: CPP skip ");
273 psi_token_dump(2, current);
274 #endif
275 psi_cpp_tokiter_del_cur(cpp, true);
276 continue;
277 }
278 }
279
280 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
281 bool expanded = false;
282
283 while (psi_cpp_tokiter_expand(cpp)) {
284 expanded = true;
285 }
286 if (expanded) {
287 continue;
288 }
289 }
290
291 if (do_cpp) {
292 parser_tokens = psi_plist_add(parser_tokens, &current);
293
294 if (is_eol) {
295 size_t processed = 0;
296 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
297
298 /* EOL */
299 psi_plist_pop(parser_tokens, NULL);
300 psi_plist_clean(parser_tokens);
301 do_cpp = false;
302
303 if (!parsed) {
304 psi_plist_free(parser_tokens);
305 return false;
306 }
307 } else {
308 /* leave EOLs in the input stream, else we might end up
309 * with a hash not preceded with a new line after include */
310 psi_cpp_tokiter_del_cur(cpp, false);
311 }
312
313 #if PSI_CPP_DEBUG > 1
314 psi_cpp_tokiter_dump(2, cpp);
315 #endif
316
317 continue;
318 }
319
320 psi_cpp_tokiter_next(cpp);
321 }
322
323 psi_plist_free(parser_tokens);
324
325 return true;
326 }
327
328 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
329 {
330 bool parsed = false;
331 struct psi_cpp temp = *cpp;
332
333 cpp->tokens = *tokens;
334 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
335 parsed = true;
336 }
337 *tokens = cpp->tokens;
338
339 if (temp.tokens) {
340 cpp->tokens = temp.tokens;
341 cpp->index = temp.index;
342 }
343
344 return parsed;
345 }
346
347 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
348 {
349 bool defined;
350
351 if (tok->type == PSI_T_NAME) {
352 defined = zend_hash_exists(&cpp->defs, tok->text);
353 } else {
354 defined = false;
355 }
356
357 #if PSI_CPP_DEBUG
358 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
359 if (defined) {
360 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
361 fprintf(stderr, " @ %s:%u ", macro->token->file->val, macro->token->line);
362 }
363 psi_token_dump(2, tok);
364 #endif
365
366 return defined;
367 }
368
369 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
370 {
371 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
372
373 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
374 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
375 "'%s' redefined", decl->token->text->val);
376 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
377 "'%s' previously defined", old->token->text->val);
378 }
379 #if PSI_CPP_DEBUG
380 if (decl->exp) {
381 fprintf(stderr, "MACRO: num_exp: ", decl->token->text);
382 } else if (decl->tokens) {
383 fprintf(stderr, "MACRO: decl : ", decl->token->text);
384 }
385 psi_cpp_macro_decl_dump(2, decl);
386 fprintf(stderr, "\n");
387 #endif
388 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
389 }
390
391 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
392 {
393 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
394 }
395
396 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
397 {
398 struct psi_validate_scope scope = {0};
399
400 scope.defs = &cpp->defs;
401 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
402 return false;
403 }
404 if (!psi_num_exp_get_long(exp->data.num, NULL, &cpp->defs)) {
405 return false;
406 }
407 return true;
408 }
409
410 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
411 {
412 struct psi_parser_input *include;
413
414 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
415
416 include = psi_parser_open_file(cpp->parser, path, false);
417 if (include) {
418 struct psi_plist *tokens;
419
420 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
421
422 tokens = psi_parser_scan(cpp->parser, include);
423 if (tokens) {
424 *parsed = psi_cpp_process(cpp, &tokens);
425
426 if (*parsed) {
427 size_t num_tokens = psi_plist_count(tokens);
428
429 ++cpp->expanded;
430 psi_cpp_tokiter_ins_range(cpp, cpp->index,
431 num_tokens, psi_plist_eles(tokens));
432 /* skip already processed tokens */
433 cpp->index += num_tokens;
434 free(tokens);
435 } else {
436 psi_plist_free(tokens);
437 }
438 }
439 psi_parser_input_free(&include);
440
441 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
442 return true;
443 }
444 return false;
445 }
446
447 static inline void include_path(const struct psi_token *file, char **path)
448 {
449 if (file->text->val[0] == '/') {
450 *path = file->text->val;
451 } else {
452 char *dir;
453 size_t len;
454
455 strncpy(*path, file->file->val, PATH_MAX);
456
457 dir = dirname(*path);
458 len = strlen(dir);
459
460 assert(len + file->text->len + 1 < PATH_MAX);
461
462 memmove(*path, dir, len);
463 (*path)[len] = '/';
464 memcpy(&(*path)[len + 1], file->text->val, file->text->len + 1);
465 }
466 }
467
468 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
469 {
470 bool parsed = false;
471
472 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
473 /* first try as is, full or relative path */
474 char temp[PATH_MAX], *path = temp;
475
476 include_path(file, &path);
477
478 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, strlen(path))) {
479 return true;
480 }
481 if (try_include(cpp, path, &parsed)) {
482 /* found */
483 return parsed;
484 }
485 }
486
487 /* look through search paths */
488 if (file->text->val[0] != '/') {
489 char path[PATH_MAX];
490 const char *sep;
491 int p_len;
492
493 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
494 if ((sep = strchr(cpp->search, ':'))) {
495 cpp->search = sep + 1;
496 } else {
497 /* point to end of string */
498 cpp->search += strlen(cpp->search);
499 }
500 }
501
502 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
503 cpp->search = PSI_G(search_path);
504 }
505
506 do {
507 int d_len;
508
509 sep = strchr(cpp->search, ':');
510 d_len = sep ? sep - cpp->search : strlen(cpp->search);
511
512 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
513 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
514 return true;
515 }
516 if (try_include(cpp, path, &parsed)) {
517 break;
518 }
519 }
520
521 if (sep) {
522 cpp->search = sep + 1;
523 }
524 } while (sep);
525 }
526
527 return parsed;
528 }