ffi: fix buffer overrun when padding struct storage
[m6w6/ext-psi] / src / cpp.c
1 /*******************************************************************************
2 Copyright (c) 2017, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include "cpp.h"
29 #include "parser.h"
30
31 #define PSI_CPP_SEARCH
32 #define PSI_CPP_PREDEF
33 #include "php_psi_cpp.h"
34
35 static void free_cpp_def(zval *p)
36 {
37 if (Z_TYPE_P(p) == IS_PTR) {
38 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
39 }
40 }
41
42 struct psi_cpp *psi_cpp_init(struct psi_parser *P)
43 {
44 struct psi_cpp *cpp = calloc(1, sizeof(*cpp));
45
46 cpp->parser = P;
47 zend_hash_init(&cpp->defs, 0, NULL, free_cpp_def, 1);
48 zend_hash_init(&cpp->once, 0, NULL, NULL, 1);
49
50 return cpp;
51 }
52
53 bool psi_cpp_load_defaults(struct psi_cpp *cpp)
54 {
55 struct psi_parser_input *predef;
56
57 if ((predef = psi_parser_open_string(cpp->parser, psi_cpp_predef, sizeof(psi_cpp_predef) - 1))) {
58 bool parsed = psi_parser_parse(cpp->parser, predef);
59 free(predef);
60 return parsed;
61 }
62
63 return false;
64 }
65
66 static int dump_def(zval *p)
67 {
68 struct psi_cpp_macro_decl *decl = Z_PTR_P(p);
69
70 if (decl) {
71 dprintf(2, "#define ");
72 psi_cpp_macro_decl_dump(2, decl);
73 dprintf(2, "\n");
74 }
75 return ZEND_HASH_APPLY_KEEP;
76 }
77
78 void psi_cpp_free(struct psi_cpp **cpp_ptr)
79 {
80 if (*cpp_ptr) {
81 struct psi_cpp *cpp = *cpp_ptr;
82
83 *cpp_ptr = NULL;
84 if (cpp->parser->flags & PSI_DEBUG) {
85 fprintf(stderr, "PSI: CPP decls:\n");
86 zend_hash_apply(&cpp->defs, dump_def);
87 }
88 zend_hash_destroy(&cpp->defs);
89 zend_hash_destroy(&cpp->once);
90 free(cpp);
91 }
92 }
93
94 static bool psi_cpp_stage1(struct psi_cpp *cpp)
95 {
96 bool name = false, define = false, hash = false, eol = true, esc = false, ws = false;
97
98 psi_cpp_tokiter_reset(cpp);
99 while (psi_cpp_tokiter_valid(cpp)) {
100 struct psi_token *token = psi_cpp_tokiter_current(cpp);
101
102 /* strip comments and attributes */
103 if (token->type == PSI_T_COMMENT
104 || token->type == PSI_T_CPP_ATTRIBUTE) {
105 psi_cpp_tokiter_del_cur(cpp, true);
106 continue;
107 }
108
109 /* line continuations */
110 if (token->type == PSI_T_EOL) {
111 if (esc) {
112 psi_cpp_tokiter_del_range(cpp, psi_cpp_tokiter_index(cpp) - 1, 2, true);
113 psi_cpp_tokiter_prev(cpp);
114 esc = false;
115 continue;
116 }
117 } else if (token->type == PSI_T_BSLASH) {
118 esc = !esc;
119 } else {
120 esc = false;
121 }
122
123 /* this whole turf is needed to distinct between:
124 * #define foo (1,2,3)
125 * #define foo(a,b,c)
126 */
127
128 if (token->type == PSI_T_WHITESPACE) {
129 if (name) {
130 name = false;
131 }
132 ws = true;
133 psi_cpp_tokiter_del_cur(cpp, true);
134 continue;
135 }
136
137 switch (token->type) {
138 case PSI_T_EOL:
139 eol = true;
140 break;
141 case PSI_T_HASH:
142 if (eol) {
143 hash = true;
144 eol = false;
145 }
146 break;
147 case PSI_T_DEFINE:
148 if (hash) {
149 define = true;
150 hash = false;
151 }
152 break;
153 case PSI_T_NAME:
154 if (define) {
155 name = true;
156 define = false;
157 }
158 break;
159 case PSI_T_LPAREN:
160 if (name) {
161 name = false;
162 if (!ws) {
163 /* mask special token for parser */
164 struct psi_token *no_ws = psi_token_copy(token);
165
166 no_ws->type = PSI_T_NO_WHITESPACE;
167 no_ws->text[0] = '\xA0';
168 psi_cpp_tokiter_ins_cur(cpp, no_ws);
169 continue;
170 }
171 }
172 /* no break */
173 default:
174 name = define = hash = eol = false;
175 break;
176 }
177
178 ws = false;
179 psi_cpp_tokiter_next(cpp);
180 }
181
182 return true;
183 }
184
185 static bool psi_cpp_stage2(struct psi_cpp *cpp)
186 {
187 struct psi_plist *parser_tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
188
189 do {
190 bool is_eol = true, do_cpp = false, do_expansion = true, skip_paren = false, skip_all = false;
191
192 psi_cpp_tokiter_reset(cpp);
193
194 while (psi_cpp_tokiter_valid(cpp)) {
195 struct psi_token *current = psi_cpp_tokiter_current(cpp);
196
197 if (current->type == PSI_T_HASH) {
198 if (is_eol) {
199 do_cpp = true;
200 is_eol = false;
201 }
202 } else if (current->type == PSI_T_EOL) {
203 #if PSI_CPP_DEBUG
204 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_EOL\n");
205 #endif
206 is_eol = true;
207 skip_all = false;
208 do_expansion = true;
209 if (!do_cpp) {
210 psi_cpp_tokiter_del_cur(cpp, true);
211 continue;
212 }
213 } else {
214 is_eol = false;
215
216 if (do_cpp) {
217 switch (current->type) {
218 case PSI_T_DEFINE:
219 #if PSI_CPP_DEBUG
220 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_DEFINE, skip_all\n");
221 #endif
222 do_expansion = false;
223 skip_all = true;
224 break;
225 case PSI_T_DEFINED:
226 skip_paren = true;
227 /* no break */
228 case PSI_T_IFDEF:
229 case PSI_T_IFNDEF:
230 case PSI_T_UNDEF:
231 #if PSI_CPP_DEBUG
232 fprintf(stderr, "PSI: CPP do_expansion=false, PSI_T_{IF{,N},UN}DEF\n");
233 #endif
234 do_expansion = false;
235 break;
236 case PSI_T_LPAREN:
237
238 if (!skip_all) {
239 if (skip_paren) {
240 skip_paren = false;
241 } else {
242 do_expansion = true;
243 #if PSI_CPP_DEBUG
244 fprintf(stderr, "PSI: CPP do_expansion=true, PSI_T_LPAREN, !skip_all, !skip_paren\n");
245 #endif
246 }
247 }
248 break;
249 case PSI_T_NAME:
250 break;
251 default:
252 do_expansion = !skip_all;
253 #if PSI_CPP_DEBUG
254 fprintf(stderr, "PSI: CPP do_expansion=%s, <- !skip_all\n", do_expansion?"true":"false");
255 #endif
256 }
257 }
258 }
259
260 if (cpp->skip) {
261 /* FIXME: del_range */
262 if (!do_cpp) {
263 #if PSI_CPP_DEBUG
264 fprintf(stderr, "PSI: CPP skip ");
265 psi_token_dump(2, current);
266 #endif
267 psi_cpp_tokiter_del_cur(cpp, true);
268 continue;
269 }
270 }
271
272 if (do_expansion && current->type == PSI_T_NAME && psi_cpp_tokiter_defined(cpp)) {
273 bool expanded = false;
274
275 while (psi_cpp_tokiter_expand(cpp)) {
276 expanded = true;
277 }
278 if (expanded) {
279 continue;
280 }
281 }
282
283 if (do_cpp) {
284 parser_tokens = psi_plist_add(parser_tokens, &current);
285
286 if (is_eol) {
287 size_t processed = 0;
288 bool parsed = psi_parser_process(cpp->parser, parser_tokens, &processed);
289
290 /* EOL */
291 psi_plist_pop(parser_tokens, NULL);
292 psi_plist_clean(parser_tokens);
293 do_cpp = false;
294
295 if (!parsed) {
296 psi_plist_free(parser_tokens);
297 return false;
298 }
299 } else {
300 /* leave EOLs in the input stream, else we might end up
301 * with a hash not preceded with a new line after include */
302 psi_cpp_tokiter_del_cur(cpp, false);
303 }
304
305 #if PSI_CPP_DEBUG > 1
306 psi_cpp_tokiter_dump(2, cpp);
307 #endif
308
309 continue;
310 }
311
312 psi_cpp_tokiter_next(cpp);
313 }
314 } while (cpp->expanded);
315
316 psi_plist_free(parser_tokens);
317
318 return true;
319 }
320
321 bool psi_cpp_process(struct psi_cpp *cpp, struct psi_plist **tokens)
322 {
323 bool parsed = false;
324 struct psi_cpp temp = *cpp;
325
326 cpp->tokens = *tokens;
327 if (psi_cpp_stage1(cpp) && psi_cpp_stage2(cpp)) {
328 parsed = true;
329 }
330 *tokens = cpp->tokens;
331
332 if (temp.tokens) {
333 cpp->tokens = temp.tokens;
334 cpp->index = temp.index;
335 }
336
337 return parsed;
338 }
339
340 bool psi_cpp_defined(struct psi_cpp *cpp, struct psi_token *tok)
341 {
342 bool defined;
343
344 if (tok->type == PSI_T_NAME) {
345 defined = zend_hash_str_exists(&cpp->defs, tok->text, tok->size);
346 } else {
347 defined = false;
348 }
349
350 #if PSI_CPP_DEBUG
351 fprintf(stderr, "PSI: CPP defined -> %s ", defined ? "true" : "false");
352 psi_token_dump(2, tok);
353 #endif
354
355 return defined;
356 }
357
358 void psi_cpp_define(struct psi_cpp *cpp, struct psi_cpp_macro_decl *decl)
359 {
360 struct psi_cpp_macro_decl *old = zend_hash_str_find_ptr(&cpp->defs, decl->token->text, decl->token->size);
361
362 if (old && !psi_cpp_macro_decl_equal(old, decl)) {
363 cpp->parser->error(PSI_DATA(cpp->parser), decl->token, PSI_WARNING,
364 "'%s' redefined", decl->token->text);
365 cpp->parser->error(PSI_DATA(cpp->parser), old->token, PSI_WARNING,
366 "'%s' previously defined", old->token->text);
367 }
368 zend_hash_str_update_ptr(&cpp->defs, decl->token->text, decl->token->size, decl);
369 }
370
371 bool psi_cpp_undef(struct psi_cpp *cpp, struct psi_token *tok)
372 {
373 return SUCCESS == zend_hash_str_del(&cpp->defs, tok->text, tok->size);
374 }
375
376 bool psi_cpp_if(struct psi_cpp *cpp, struct psi_cpp_exp *exp)
377 {
378 if (!psi_num_exp_validate(PSI_DATA(cpp->parser), exp->data.num, NULL, NULL, NULL, NULL, NULL)) {
379 return false;
380 }
381 if (!psi_long_num_exp(exp->data.num, NULL, &cpp->defs)) {
382 return false;
383 }
384 return true;
385 }
386
387 static inline bool try_include(struct psi_cpp *cpp, const char *path, bool *parsed)
388 {
389 struct psi_parser_input *include;
390
391 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include trying %s\n", path);
392
393 include = psi_parser_open_file(cpp->parser, path, false);
394 if (include) {
395 struct psi_plist *tokens;
396
397 PSI_DEBUG_PRINT(cpp->parser, "PSI: CPP include scanning %s\n", path);
398
399 tokens = psi_parser_scan(cpp->parser, include);
400 if (tokens) {
401 *parsed = psi_cpp_process(cpp, &tokens);
402
403 if (*parsed) {
404 ++cpp->expanded;
405 psi_cpp_tokiter_ins_range(cpp, cpp->index,
406 psi_plist_count(tokens), psi_plist_eles(tokens));
407 free(tokens);
408 } else {
409 psi_plist_free(tokens);
410 }
411 }
412 free(include);
413
414 zend_hash_str_add_empty_element(&cpp->once, path, strlen(path));
415 return true;
416 }
417 return false;
418 }
419
420 bool psi_cpp_include(struct psi_cpp *cpp, const char *file, unsigned flags)
421 {
422 bool parsed = false;
423 int f_len = strlen(file);
424
425 if (!(flags & PSI_CPP_INCLUDE_NEXT) || *file == '/') {
426 /* first try as is, full or relative path */
427 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, file, f_len)) {
428 return true;
429 }
430 if (try_include(cpp, file, &parsed)) {
431 /* found */
432 return parsed;
433 }
434 }
435
436 /* look through search paths */
437 if (*file != '/') {
438 char path[PATH_MAX];
439 const char *sep;
440 int p_len;
441
442 if ((flags & PSI_CPP_INCLUDE_NEXT) && cpp->search) {
443 if ((sep = strchr(cpp->search, ':'))) {
444 cpp->search = sep + 1;
445 } else {
446 /* point to end of string */
447 cpp->search += strlen(cpp->search);
448 }
449 }
450
451 if (!(flags & PSI_CPP_INCLUDE_NEXT) || !cpp->search) {
452 cpp->search = &psi_cpp_search[0];
453 }
454
455 do {
456 int d_len;
457
458 sep = strchr(cpp->search, ':');
459 d_len = sep ? sep - cpp->search : strlen(cpp->search);
460
461 if (PATH_MAX > (p_len = snprintf(path, PATH_MAX, "%.*s/%.*s", d_len, cpp->search, f_len, file))) {
462 if ((flags & PSI_CPP_INCLUDE_ONCE) && zend_hash_str_exists(&cpp->once, path, p_len)) {
463 return true;
464 }
465 if (try_include(cpp, path, &parsed)) {
466 break;
467 }
468 }
469 cpp->search = sep + 1;
470 } while (sep);
471 }
472
473 return parsed;
474 }