interned strings
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <libgen.h>
29
30 #include "cpp.h"
31 #include "parser.h"
32
33 #define PSI_CPP_SEARCH
34 #define PSI_CPP_PREDEF
35 #include "php_psi_cpp.h"
36
37 #include "php_psi.h"
38
39 static void free_cpp_def(zval *p)
40 {
41 if (Z_TYPE_P(p) == IS_PTR) {
42 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
43 }
44 }
45
46 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
47 {
48 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
49
50 cpp->parser = P;
51 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
52 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
53
54 return cpp;
55 }
56
57 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
58 {
59 struct psi_parser_input *predef;
60
61 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
62 bool parsed = psi_parser_parse(cpp->parser, predef);
63 psi_parser_input_free(&predef);
64 return parsed;
65 }
66
67 return false;
68 }
69
70 #if PSI_CPP_DEBUG
71 static int dump_def(zval *p)
72 {
73 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
74
75 if (decl) {
76 fflush(stderr);
77 dprintf(2, "PSI: CPP decl -> #define ");
78 psi_cpp_macro_decl_dump(2, decl);
79 dprintf(2, "\n");
80 }
81 return ZEND_HASH_APPLY_KEEP;
82 }
83 #endif
84
85 void psi_cpp_free(struct psi_cpp **cpp_ptr)
86 {
87 if (*cpp_ptr) {
88 struct psi_cpp *cpp = *cpp_ptr;
89
90 #if PSI_CPP_DEBUG
91 zend_hash_apply(&cpp->defs, dump_def);
92 #endif
93 *cpp_ptr = NULL;
94 zend_hash_destroy(&cpp->defs);
95 zend_hash_destroy(&cpp->once);
96 free(cpp);
97 }
98 }
99
100 static bool psi_cpp_stage1(struct psi_cpp *cpp)
101 {
102 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
103
104 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage1");
105
106 psi_cpp_tokiter_reset(cpp);
107 while (psi_cpp_tokiter_valid(cpp)) {
108 struct psi_token *token = psi_cpp_tokiter_current(cpp);
109
110 /* strip comments and attributes */
111 if (token->type == PSI_T_COMMENT
112 || token->type == PSI_T_CPP_ATTRIBUTE) {
113 psi_cpp_tokiter_del_cur(cpp, true);
114 continue;
115 }
116
117 /* line continuations */
118 if (token->type == PSI_T_EOL) {
119 if (esc) {
120 psi_cpp_tokiter_del_prev(cpp, true);
121 psi_cpp_tokiter_del_cur(cpp, true);
122 esc = false;
123 continue;
124 }
125 } else if (token->type == PSI_T_BSLASH) {
126 esc = !esc;
127 } else {
128 esc = false;
129 }
130
131 /* this whole turf is needed to distinct between:
132 * #define foo (1,2,3)
133 * #define foo(a,b,c)
134 */
135
136 if (token->type == PSI_T_WHITESPACE) {
137 if (name) {
138 name = false;
139 }
140 ws = true;
141 psi_cpp_tokiter_del_cur(cpp, true);
142 continue;
143 }
144
145 switch (token->type) {
146 case PSI_T_EOL:
147 eol = true;
148 break;
149 case PSI_T_HASH:
150 if (eol) {
151 hash = true;
152 eol = false;
153 }
154 break;
155 case PSI_T_DEFINE:
156 if (hash) {
157 define = true;
158 hash = false;
159 }
160 break;
161 case PSI_T_NAME:
162 if (define) {
163 name = true;
164 define = false;
165 }
166 break;
167 case PSI_T_LPAREN:
168 if (name) {
169 name = false;
170 if (!ws) {
171 /* mask special token for parser */
172 struct psi_token *no_ws = psi_token_copy(token);
173
174 no_ws->type = PSI_T_NO_WHITESPACE;
175 zend_string_release(no_ws->text);
176 no_ws->text = zend_string_init_interned("\xA0", 1, 1);
177 psi_cpp_tokiter_add(cpp, no_ws);
178 continue;
179 }
180 }
181 /* no break */
182 default:
183 name = define = hash = eol = false;
184 break;
185 }
186
187 ws = false;
188 psi_cpp_tokiter_add_cur(cpp);
189 psi_cpp_tokiter_next(cpp);
190 }
191
192 return true;
193 }
194
195 static bool psi_cpp_stage2(struct psi_cpp *cpp)
196 {
197 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
198 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
199
200 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP %s\n", "stage2");
201
202 psi_cpp_tokiter_reset(cpp);
203 while (psi_cpp_tokiter_valid(cpp)) {
204 struct psi_token *current = psi_cpp_tokiter_current(cpp);
205
206 if (current->type == PSI_T_HASH) {
207 if (is_eol) {
208 do_cpp = true;
209 is_eol = false;
210 }
211 } else if (current->type == PSI_T_EOL) {
212 #if PSI_CPP_DEBUG
213 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
214 #endif
215 is_eol = true;
216 skip_all = false;
217 do_expansion = true;
218 if (!do_cpp) {
219 psi_cpp_tokiter_del_cur(cpp, true);
220 continue;
221 }
222 } else {
223 is_eol = false;
224
225 if (do_cpp) {
226 switch (current->type) {
227 case PSI_T_DEFINE:
228 #if PSI_CPP_DEBUG
229 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
230 #endif
231 do_expansion = false;
232 skip_all = true;
233 break;
234 case PSI_T_DEFINED:
235 skip_paren = true;
236 /* no break */
237 case PSI_T_IFDEF:
238 case PSI_T_IFNDEF:
239 case PSI_T_UNDEF:
240 #if PSI_CPP_DEBUG
241 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
242 #endif
243 do_expansion = false;
244 break;
245 case PSI_T_LPAREN:
246
247 if (!skip_all) {
248 if (skip_paren) {
249 skip_paren = false;
250 } else {
251 do_expansion = true;
252 #if PSI_CPP_DEBUG
253 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
254 #endif
255 }
256 }
257 break;
258 case PSI_T_NAME:
259 break;
260 default:
261 do_expansion = !skip_all;
262 #if PSI_CPP_DEBUG
263 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
264 #endif
265 }
266 }
267 }
268
269 if (cpp->skip) {
270 if (!do_cpp) {
271 #if PSI_CPP_DEBUG
272 fprintf(stderr, "PSI: CPP skip ");
273 psi_token_dump(2, current);
274 #endif
275 psi_cpp_tokiter_del_cur(cpp, true);
276 continue;
277 }
278 }
279
280 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
281 bool expanded = false;
282
283 while (psi_cpp_tokiter_expand(cpp)) {
284 expanded = true;
285 }
286 if (expanded) {
287 continue;
288 }
289 }
290
291 if (do_cpp) {
292 parser_tokens = psi_plist_add(parser_tokens, &current);
293
294 if (is_eol) {
295 size_t processed = 0;
296 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
297
298 /* EOL */
299 psi_plist_pop(parser_tokens, NULL);
300 psi_plist_clean(parser_tokens);
301 do_cpp = false;
302
303 if (!parsed) {
304 psi_plist_free(parser_tokens);
305 return false;
306 }
307 } else {
308 /* leave EOLs in the input stream, else we might end up
309 * with a hash not preceded with a new line after include */
310 psi_cpp_tokiter_del_cur(cpp, false);
311 }
312
313 #if PSI_CPP_DEBUG > 1
314 psi_cpp_tokiter_dump(2, cpp);
315 #endif
316
317 continue;
318 }
319
320 psi_cpp_tokiter_add_cur(cpp);
321 psi_cpp_tokiter_next(cpp);
322 }
323
324 psi_plist_free(parser_tokens);
325
326 return true;
327 }
328
329 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
330 {
331 bool parsed = false;
332 struct psi_cpp temp = *cpp;
333
334 cpp->tokens.iter = *tokens;
335 cpp->tokens.next = NULL;
336
337 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
338 parsed = true;
339 }
340
341 if (cpp->tokens.next) {
342 free(cpp->tokens.iter);
343 cpp->tokens.iter = cpp->tokens.next;
344 cpp->tokens.next = NULL;
345 }
346
347 *tokens = cpp->tokens.iter;
348
349 if (temp.tokens.iter) {
350 cpp->tokens.iter = temp.tokens.iter;
351 cpp->tokens.next = temp.tokens.next;
352 cpp->index = temp.index;
353 }
354
355 return parsed;
356 }
357
358 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
359 {
360 bool defined;
361
362 if (tok->type == PSI_T_NAME) {
363 defined = zend_hash_exists(&cpp->defs, tok->text);
364 } else {
365 defined = false;
366 }
367
368 #if PSI_CPP_DEBUG
369 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
370 if (defined) {
371 struct psi_cpp_macro_decl *macro = zend_hash_find_ptr(&cpp->defs, tok->text);
372 fprintf(stderr, " @ %s:%u ", macro->token->file->val, macro->token->line);
373 }
374 psi_token_dump(2, tok);
375 #endif
376
377 return defined;
378 }
379
380 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
381 {
382 struct psi_cpp_macro_decl *old = zend_hash_find_ptr(&cpp->defs, decl->token->text);
383
384 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
385 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
386 "'%s' redefined", decl->token->text->val);
387 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
388 "'%s' previously defined", old->token->text->val);
389 }
390 #if PSI_CPP_DEBUG
391 if (decl->exp) {
392 fprintf(stderr, "PSI: CPP MACRO num_exp -> %s ", decl->token->text->val);
393 } else {
394 fprintf(stderr, "PSI: CPP MACRO decl -> %s ", decl->token->text->val);
395 }
396 psi_cpp_macro_decl_dump(2, decl);
397 fprintf(stderr, "\n");
398 #endif
399 zend_hash_update_ptr(&cpp->defs, decl->token->text, decl);
400 }
401
402 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
403 {
404 return SUCCESS == zend_hash_del(&cpp->defs, tok->text);
405 }
406
407 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
408 {
409 struct psi_validate_scope scope = {0};
410
411 scope.defs = &cpp->defs;
412 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, &scope)) {
413 return false;
414 }
415 if (!psi_num_exp_get_long(exp->data.num, NULL, &cpp->defs)) {
416 return false;
417 }
418 return true;
419 }
420
421 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
422 {
423 struct psi_parser_input *include;
424
425 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
426
427 include = psi_parser_open_file(cpp->parser, path, false);
428 if (include) {
429 struct psi_plist *tokens;
430
431 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
432
433 tokens = psi_parser_scan(cpp->parser, include);
434 if (tokens) {
435 *parsed = psi_cpp_process(cpp, &tokens);
436
437 if (*parsed) {
438 size_t num_tokens = psi_plist_count(tokens);
439
440 ++cpp->expanded;
441 psi_cpp_tokiter_add_range(cpp, num_tokens, psi_plist_eles(tokens));
442 free(tokens);
443 } else {
444 psi_plist_free(tokens);
445 }
446 }
447 psi_parser_input_free(&include);
448
449 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
450 return true;
451 }
452 return false;
453 }
454
455 static inline void include_path(const struct psi_token *file, char **path)
456 {
457 if (file->text->val[0] == '/') {
458 *path = file->text->val;
459 } else {
460 char *dir;
461 size_t len;
462
463 strncpy(*path, file->file->val, PATH_MAX);
464
465 dir = dirname(*path);
466 len = strlen(dir);
467
468 assert(len + file->text->len + 1 < PATH_MAX);
469
470 memmove(*path, dir, len);
471 (*path)[len] = '/';
472 memcpy(&(*path)[len + 1], file->text->val, file->text->len + 1);
473 }
474 }
475
476 bool psi_cpp_include(struct psi_cpp *cpp, const struct psi_token *file, unsigned flags)
477 {
478 bool parsed = false;
479
480 if (file->type == PSI_T_QUOTED_STRING && (!(flags & PSI_CPP_INCLUDE_NEXT) || file->text->val[0] == '/')) {
481 /* first try as is, full or relative path */
482 char temp[PATH_MAX], *path = temp;
483
484 include_path(file, &path);
485
486 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, strlen(path))) {
487 return true;
488 }
489 if (try_include(cpp, path, &parsed)) {
490 /* found */
491 return parsed;
492 }
493 }
494
495 /* look through search paths */
496 if (file->text->val[0] != '/') {
497 char path[PATH_MAX];
498 const char *sep;
499 int p_len;
500
501 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
502 if ((sep = strchr(cpp->search, ':'))) {
503 cpp->search = sep + 1;
504 } else {
505 /* point to end of string */
506 cpp->search += strlen(cpp->search);
507 }
508 }
509
510 if (!(flags & PSI_CPP_INCLUDE_NEXT)) {
511 cpp->search = PSI_G(search_path);
512 }
513
514 do {
515 int d_len;
516
517 sep = strchr(cpp->search, ':');
518 d_len = sep ? sep - cpp->search : strlen(cpp->search);
519
520 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, (int) file->text->len, file->text->val))) {
521 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
522 return true;
523 }
524 if (try_include(cpp, path, &parsed)) {
525 break;
526 }
527 }
528
529 if (sep) {
530 cpp->search = sep + 1;
531 }
532 } while (sep);
533 }
534
535 return parsed;
536 }