cpp
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include "cpp.h"
29 #include "parser.h"
30
31 #define PSI_CPP_SEARCH
32 #define PSI_CPP_PREDEF
33 #include "php_psi_cpp.h"
34
35 static void free_cpp_def(zval *p)
36 {
37 if (Z_TYPE_P(p) == IS_PTR) {
38 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
39 }
40 }
41
42 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
43 {
44 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
45
46 cpp->parser = P;
47 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
48 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
49
50 return cpp;
51 }
52
53 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
54 {
55 struct psi_parser_input *predef;
56
57 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
58 bool parsed = psi_parser_parse(cpp->parser, predef);
59 free(predef);
60 return parsed;
61 }
62
63 return false;
64 }
65
66 static int dump_def(zval *p)
67 {
68 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
69
70 if (decl) {
71 dprintf(2, "#define ");
72 psi_cpp_macro_decl_dump(2, decl);
73 dprintf(2, "\n");
74 }
75 return ZEND_HASH_APPLY_KEEP;
76 }
77
78 void psi_cpp_free(struct psi_cpp **cpp_ptr)
79 {
80 if (*cpp_ptr) {
81 struct psi_cpp *cpp = *cpp_ptr;
82
83 *cpp_ptr = NULL;
84 if (cpp->parser->flags & PSI_DEBUG) {
85 fprintf(stderr, "PSI: CPP decls:\n");
86 zend_hash_apply(&cpp->defs, dump_def);
87 }
88 zend_hash_destroy(&cpp->defs);
89 zend_hash_destroy(&cpp->once);
90 free(cpp);
91 }
92 }
93
94 static bool psi_cpp_stage1(struct psi_cpp *cpp)
95 {
96 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
97
98 psi_cpp_tokiter_reset(cpp);
99 while (psi_cpp_tokiter_valid(cpp)) {
100 struct psi_token *token = psi_cpp_tokiter_current(cpp);
101
102 /* strip comments and attributes */
103 if (token->type == PSI_T_COMMENT || token->type == PSI_T_CPP_ATTRIBUTE) {
104 psi_cpp_tokiter_del_cur(cpp, true);
105 continue;
106 }
107
108 /* line continuations */
109 if (token->type == PSI_T_EOL) {
110 if (esc) {
111 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
112 psi_cpp_tokiter_prev(cpp);
113 esc = false;
114 continue;
115 }
116 } else if (token->type == PSI_T_BSLASH) {
117 esc = !esc;
118 } else {
119 esc = false;
120 }
121
122 /* this whole turf is needed to distinct between:
123 * #define foo (1,2,3)
124 * #define foo(a,b,c)
125 */
126
127 if (token->type == PSI_T_WHITESPACE) {
128 ws = true;
129 psi_cpp_tokiter_del_cur(cpp, true);
130 continue;
131 }
132
133 switch (token->type) {
134 case PSI_T_EOL:
135 eol = true;
136 break;
137 case PSI_T_HASH:
138 if (eol) {
139 hash = true;
140 eol = false;
141 }
142 break;
143 case PSI_T_DEFINE:
144 if (hash) {
145 define = true;
146 hash = false;
147 }
148 break;
149 case PSI_T_NAME:
150 if (define) {
151 name = true;
152 define = false;
153 }
154 break;
155 case PSI_T_LPAREN:
156 if (name) {
157 name = false;
158 if (!ws) {
159 /* mask special token for parser */
160 struct psi_token *no_ws = psi_token_copy(token);
161
162 no_ws->type = PSI_T_NO_WHITESPACE;
163 no_ws->text[0] = '\xA0';
164 psi_cpp_tokiter_ins_cur(cpp, no_ws);
165 continue;
166 }
167 }
168 /* no break */
169 default:
170 name = define = hash = eol = false;
171 break;
172 }
173
174 ws = false;
175 psi_cpp_tokiter_next(cpp);
176 }
177
178 return true;
179 }
180
181 static bool psi_cpp_stage2(struct psi_cpp *cpp)
182 {
183 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
184
185 do {
186 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
187
188 psi_cpp_tokiter_reset(cpp);
189
190 while (psi_cpp_tokiter_valid(cpp)) {
191 struct psi_token *current = psi_cpp_tokiter_current(cpp);
192
193 if (current->type == PSI_T_HASH) {
194 if (is_eol) {
195 do_cpp = true;
196 is_eol = false;
197 }
198 } else if (current->type == PSI_T_EOL) {
199 #if PSI_CPP_DEBUG
200 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
201 #endif
202 is_eol = true;
203 skip_all = false;
204 do_expansion = true;
205 if (!do_cpp) {
206 psi_cpp_tokiter_del_cur(cpp, true);
207 continue;
208 }
209 } else {
210 is_eol = false;
211
212 if (do_cpp) {
213 switch (current->type) {
214 case PSI_T_DEFINE:
215 #if PSI_CPP_DEBUG
216 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
217 #endif
218 do_expansion = false;
219 skip_all = true;
220 break;
221 case PSI_T_DEFINED:
222 skip_paren = true;
223 /* no break */
224 case PSI_T_IFDEF:
225 case PSI_T_IFNDEF:
226 case PSI_T_UNDEF:
227 #if PSI_CPP_DEBUG
228 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
229 #endif
230 do_expansion = false;
231 break;
232 case PSI_T_LPAREN:
233
234 if (!skip_all) {
235 if (skip_paren) {
236 skip_paren = false;
237 } else {
238 do_expansion = true;
239 #if PSI_CPP_DEBUG
240 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
241 #endif
242 }
243 }
244 break;
245 case PSI_T_NAME:
246 break;
247 default:
248 do_expansion = !skip_all;
249 #if PSI_CPP_DEBUG
250 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
251 #endif
252 }
253 }
254 }
255
256 if (cpp->skip) {
257 /* FIXME: del_range */
258 if (!do_cpp) {
259 #if PSI_CPP_DEBUG
260 fprintf(stderr, "PSI: CPP skip ");
261 psi_token_dump(2, current);
262 #endif
263 psi_cpp_tokiter_del_cur(cpp, true);
264 continue;
265 }
266 }
267
268 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
269 bool expanded = false;
270
271 while (psi_cpp_tokiter_expand(cpp)) {
272 expanded = true;
273 }
274 if (expanded) {
275 continue;
276 }
277 }
278
279 if (do_cpp) {
280 parser_tokens = psi_plist_add(parser_tokens, &current);
281
282 if (is_eol) {
283 size_t processed = 0;
284
285 if (!psi_parser_process(cpp->parser, parser_tokens, &processed)) {
286 psi_plist_free(parser_tokens);
287 return false;
288 }
289 psi_plist_pop(parser_tokens, NULL);
290 psi_plist_clean(parser_tokens);
291 do_cpp = false;
292 } else {
293 /* leave EOLs in the input stream, else we might end up
294 * with a hash not preceeded with a new line after include */
295 psi_cpp_tokiter_del_cur(cpp, false);
296 }
297
298 #if PSI_CPP_DEBUG > 1
299 psi_cpp_tokiter_dump(2, cpp);
300 #endif
301
302 continue;
303 }
304
305 psi_cpp_tokiter_next(cpp);
306 }
307 } while (cpp->expanded);
308
309 psi_plist_free(parser_tokens);
310
311 return true;
312 }
313
314 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
315 {
316 bool parsed = false;
317 struct psi_cpp temp = *cpp;
318
319 cpp->tokens = *tokens;
320 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
321 parsed = true;
322 }
323 *tokens = cpp->tokens;
324
325 if (temp.tokens) {
326 cpp->tokens = temp.tokens;
327 cpp->index = temp.index;
328 }
329
330 return parsed;
331 }
332
333 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
334 {
335 bool defined;
336
337 if (tok->type == PSI_T_NAME) {
338 defined = zend_hash_str_exists(&cpp->defs, tok->text, tok->size);
339 } else {
340 defined = false;
341 }
342
343 #if PSI_CPP_DEBUG
344 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
345 psi_token_dump(2, tok);
346 #endif
347
348 return defined;
349 }
350
351 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
352 {
353 struct psi_cpp_macro_decl *old = zend_hash_str_find_ptr(&cpp->defs, decl->token->text, decl->token->size);
354
355 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
356 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
357 "'%s' redefined", decl->token->text);
358 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
359 "'%s' previously defined", old->token->text);
360 }
361 zend_hash_str_update_ptr(&cpp->defs, decl->token->text, decl->token->size, decl);
362 }
363
364 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
365 {
366 return SUCCESS == zend_hash_str_del(&cpp->defs, tok->text, tok->size);
367 }
368
369 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
370 {
371 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, NULL, NULL, NULL, NULL, NULL)) {
372 return false;
373 }
374 if (!psi_long_num_exp(exp->data.num, NULL, &cpp->defs)) {
375 return false;
376 }
377 return true;
378 }
379
380 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
381 {
382 struct psi_parser_input *include;
383
384 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
385
386 include = psi_parser_open_file(cpp->parser, path, false);
387 if (include) {
388 struct psi_plist *tokens;
389
390 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
391
392 tokens = psi_parser_scan(cpp->parser, include);
393 if (tokens) {
394 *parsed = psi_cpp_process(cpp, &tokens);
395
396 if (*parsed) {
397 ++cpp->expanded;
398 psi_cpp_tokiter_ins_range(cpp, cpp->index,
399 psi_plist_count(tokens), psi_plist_eles(tokens));
400 free(tokens);
401 } else {
402 psi_plist_free(tokens);
403 }
404 }
405 free(include);
406
407 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
408 return true;
409 }
410 return false;
411 }
412
413 bool psi_cpp_include(struct psi_cpp *cpp, const char *file, unsigned flags)
414 {
415 char path[PATH_MAX];
416 bool parsed = false;
417 int p_len, f_len = strlen(file) - 2;
418
419 if (file[1] == '/') {
420 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s", f_len, file + 1))) {
421 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
422 return true;
423 }
424 return try_include(cpp, path, &parsed) && parsed;
425 }
426 } else {
427 const char *sep;
428
429 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
430 if ((sep = strchr(cpp->search, ':'))) {
431 cpp->search = sep + 1;
432 } else {
433 /* point to end of string */
434 cpp->search += strlen(cpp->search);
435 }
436 }
437
438 if (!(flags & PSI_CPP_INCLUDE_NEXT) || !cpp->search) {
439 cpp->search = &psi_cpp_search[0];
440 }
441
442 do {
443 int d_len;
444
445 sep = strchr(cpp->search, ':');
446 d_len = sep ? sep - cpp->search : strlen(cpp->search);
447
448 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, f_len, file + 1))) {
449 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
450 return true;
451 }
452 if (try_include(cpp, path, &parsed)) {
453 break;
454 }
455 }
456 cpp->search = sep + 1;
457 } while (sep);
458 }
459
460 return parsed;
461 }