2cf4094e93efbd5ef9303c67e23ce75c97fefecc
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include "cpp.h"
29 #include "parser.h"
30
31 #define PSI_CPP_SEARCH
32 #define PSI_CPP_PREDEF
33 #include "php_psi_cpp.h"
34
35 static void free_cpp_def(zval *p)
36 {
37 if (Z_TYPE_P(p) == IS_PTR) {
38 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
39 }
40 }
41
42 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
43 {
44 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
45
46 cpp->parser = P;
47 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
48 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
49
50 return cpp;
51 }
52
53 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
54 {
55 struct psi_parser_input *predef;
56
57 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
58 bool parsed = psi_parser_parse(cpp->parser, predef);
59 free(predef);
60 return parsed;
61 }
62
63 return false;
64 }
65
66 static int dump_def(zval *p)
67 {
68 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
69
70 if (decl) {
71 dprintf(2, "#define ");
72 psi_cpp_macro_decl_dump(2, decl);
73 dprintf(2, "\n");
74 }
75 return ZEND_HASH_APPLY_KEEP;
76 }
77
78 void psi_cpp_free(struct psi_cpp **cpp_ptr)
79 {
80 if (*cpp_ptr) {
81 struct psi_cpp *cpp = *cpp_ptr;
82
83 *cpp_ptr = NULL;
84 if (cpp->parser->flags & PSI_DEBUG) {
85 fprintf(stderr, "PSI: CPP decls:\n");
86 zend_hash_apply(&cpp->defs, dump_def);
87 }
88 zend_hash_destroy(&cpp->defs);
89 zend_hash_destroy(&cpp->once);
90 free(cpp);
91 }
92 }
93
94 static bool psi_cpp_stage1(struct psi_cpp *cpp)
95 {
96 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
97
98 psi_cpp_tokiter_reset(cpp);
99 while (psi_cpp_tokiter_valid(cpp)) {
100 struct psi_token *token = psi_cpp_tokiter_current(cpp);
101
102 /* strip comments and attributes */
103 if (token->type == PSI_T_COMMENT || token->type == PSI_T_CPP_ATTRIBUTE) {
104 psi_cpp_tokiter_del_cur(cpp, true);
105 continue;
106 }
107
108 /* line continuations */
109 if (token->type == PSI_T_EOL) {
110 if (esc) {
111 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
112 psi_cpp_tokiter_prev(cpp);
113 esc = false;
114 continue;
115 }
116 } else if (token->type == PSI_T_BSLASH) {
117 esc = !esc;
118 } else {
119 esc = false;
120 }
121
122 /* this whole turf is needed to distinct between:
123 * #define foo (1,2,3)
124 * #define foo(a,b,c)
125 */
126
127 if (token->type == PSI_T_WHITESPACE) {
128 ws = true;
129 psi_cpp_tokiter_del_cur(cpp, true);
130 continue;
131 }
132
133 switch (token->type) {
134 case PSI_T_EOL:
135 eol = true;
136 break;
137 case PSI_T_HASH:
138 if (eol) {
139 hash = true;
140 eol = false;
141 }
142 break;
143 case PSI_T_DEFINE:
144 if (hash) {
145 define = true;
146 hash = false;
147 }
148 break;
149 case PSI_T_NAME:
150 if (define) {
151 name = true;
152 define = false;
153 }
154 break;
155 case PSI_T_LPAREN:
156 if (name) {
157 name = false;
158 if (!ws) {
159 /* mask special token for parser */
160 struct psi_token *no_ws = psi_token_copy(token);
161
162 no_ws->type = PSI_T_NO_WHITESPACE;
163 no_ws->text[0] = '\xA0';
164 psi_cpp_tokiter_ins_cur(cpp, no_ws);
165 continue;
166 }
167 }
168 /* no break */
169 default:
170 name = define = hash = eol = false;
171 break;
172 }
173
174 ws = false;
175 psi_cpp_tokiter_next(cpp);
176 }
177
178 return true;
179 }
180
181 static bool psi_cpp_stage2(struct psi_cpp *cpp)
182 {
183 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
184
185 do {
186 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
187
188 psi_cpp_tokiter_reset(cpp);
189
190 while (psi_cpp_tokiter_valid(cpp)) {
191 struct psi_token *current = psi_cpp_tokiter_current(cpp);
192
193 if (current->type == PSI_T_HASH) {
194 if (is_eol) {
195 do_cpp = true;
196 is_eol = false;
197 }
198 } else if (current->type == PSI_T_EOL) {
199 #if PSI_CPP_DEBUG
200 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
201 #endif
202 is_eol = true;
203 skip_all = false;
204 do_expansion = true;
205 if (!do_cpp) {
206 psi_cpp_tokiter_del_cur(cpp, true);
207 continue;
208 }
209 } else {
210 is_eol = false;
211
212 if (do_cpp) {
213 switch (current->type) {
214 case PSI_T_DEFINE:
215 #if PSI_CPP_DEBUG
216 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
217 #endif
218 do_expansion = false;
219 skip_all = true;
220 break;
221 case PSI_T_DEFINED:
222 skip_paren = true;
223 /* no break */
224 case PSI_T_IFDEF:
225 case PSI_T_IFNDEF:
226 case PSI_T_UNDEF:
227 #if PSI_CPP_DEBUG
228 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
229 #endif
230 do_expansion = false;
231 break;
232 case PSI_T_LPAREN:
233
234 if (!skip_all) {
235 if (skip_paren) {
236 skip_paren = false;
237 } else {
238 do_expansion = true;
239 #if PSI_CPP_DEBUG
240 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
241 #endif
242 }
243 }
244 break;
245 case PSI_T_NAME:
246 break;
247 default:
248 do_expansion = !skip_all;
249 #if PSI_CPP_DEBUG
250 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
251 #endif
252 }
253 }
254 }
255
256 if (cpp->skip) {
257 /* FIXME: del_range */
258 if (!do_cpp) {
259 #if PSI_CPP_DEBUG
260 fprintf(stderr, "PSI: CPP skip ");
261 psi_token_dump(2, current);
262 #endif
263 psi_cpp_tokiter_del_cur(cpp, true);
264 continue;
265 }
266 }
267
268 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
269 bool expanded = false;
270
271 while (psi_cpp_tokiter_expand(cpp)) {
272 expanded = true;
273 }
274 if (expanded) {
275 continue;
276 }
277 }
278
279 if (do_cpp) {
280 parser_tokens = psi_plist_add(parser_tokens, &current);
281
282 if (is_eol) {
283 size_t processed = 0;
284 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
285
286 /* EOL */
287 psi_plist_pop(parser_tokens, NULL);
288 psi_plist_clean(parser_tokens);
289 do_cpp = false;
290
291 if (!parsed) {
292 psi_plist_free(parser_tokens);
293 return false;
294 }
295 } else {
296 /* leave EOLs in the input stream, else we might end up
297 * with a hash not preceded with a new line after include */
298 psi_cpp_tokiter_del_cur(cpp, false);
299 }
300
301 #if PSI_CPP_DEBUG > 1
302 psi_cpp_tokiter_dump(2, cpp);
303 #endif
304
305 continue;
306 }
307
308 psi_cpp_tokiter_next(cpp);
309 }
310 } while (cpp->expanded);
311
312 psi_plist_free(parser_tokens);
313
314 return true;
315 }
316
317 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
318 {
319 bool parsed = false;
320 struct psi_cpp temp = *cpp;
321
322 cpp->tokens = *tokens;
323 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
324 parsed = true;
325 }
326 *tokens = cpp->tokens;
327
328 if (temp.tokens) {
329 cpp->tokens = temp.tokens;
330 cpp->index = temp.index;
331 }
332
333 return parsed;
334 }
335
336 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
337 {
338 bool defined;
339
340 if (tok->type == PSI_T_NAME) {
341 defined = zend_hash_str_exists(&cpp->defs, tok->text, tok->size);
342 } else {
343 defined = false;
344 }
345
346 #if PSI_CPP_DEBUG
347 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
348 psi_token_dump(2, tok);
349 #endif
350
351 return defined;
352 }
353
354 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
355 {
356 struct psi_cpp_macro_decl *old = zend_hash_str_find_ptr(&cpp->defs, decl->token->text, decl->token->size);
357
358 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
359 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
360 "'%s' redefined", decl->token->text);
361 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
362 "'%s' previously defined", old->token->text);
363 }
364 zend_hash_str_update_ptr(&cpp->defs, decl->token->text, decl->token->size, decl);
365 }
366
367 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
368 {
369 return SUCCESS == zend_hash_str_del(&cpp->defs, tok->text, tok->size);
370 }
371
372 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
373 {
374 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, NULL, NULL, NULL, NULL, NULL)) {
375 return false;
376 }
377 if (!psi_long_num_exp(exp->data.num, NULL, &cpp->defs)) {
378 return false;
379 }
380 return true;
381 }
382
383 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
384 {
385 struct psi_parser_input *include;
386
387 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
388
389 include = psi_parser_open_file(cpp->parser, path, false);
390 if (include) {
391 struct psi_plist *tokens;
392
393 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
394
395 tokens = psi_parser_scan(cpp->parser, include);
396 if (tokens) {
397 *parsed = psi_cpp_process(cpp, &tokens);
398
399 if (*parsed) {
400 ++cpp->expanded;
401 psi_cpp_tokiter_ins_range(cpp, cpp->index,
402 psi_plist_count(tokens), psi_plist_eles(tokens));
403 free(tokens);
404 } else {
405 psi_plist_free(tokens);
406 }
407 }
408 free(include);
409
410 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
411 return true;
412 }
413 return false;
414 }
415
416 bool psi_cpp_include(struct psi_cpp *cpp, const char *file, unsigned flags)
417 {
418 char path[PATH_MAX];
419 bool parsed = false;
420 int p_len, f_len = strlen(file) - 2;
421
422 if (file[1] == '/') {
423 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s", f_len, file + 1))) {
424 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
425 return true;
426 }
427 return try_include(cpp, path, &parsed) && parsed;
428 }
429 } else {
430 const char *sep;
431
432 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
433 if ((sep = strchr(cpp->search, ':'))) {
434 cpp->search = sep + 1;
435 } else {
436 /* point to end of string */
437 cpp->search += strlen(cpp->search);
438 }
439 }
440
441 if (!(flags & PSI_CPP_INCLUDE_NEXT) || !cpp->search) {
442 cpp->search = &psi_cpp_search[0];
443 }
444
445 do {
446 int d_len;
447
448 sep = strchr(cpp->search, ':');
449 d_len = sep ? sep - cpp->search : strlen(cpp->search);
450
451 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, f_len, file + 1))) {
452 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
453 return true;
454 }
455 if (try_include(cpp, path, &parsed)) {
456 break;
457 }
458 }
459 cpp->search = sep + 1;
460 } while (sep);
461 }
462
463 return parsed;
464 }