1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
32 #include <Zend/zend_smart_str.h>
38 # define YYMAXFILL 256
41 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
44 P = malloc(sizeof(*P));
46 memset(P, 0, sizeof(*P));
48 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
50 P->preproc = psi_cpp_init(P);
52 psi_cpp_load_defaults(P->preproc);
57 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
61 struct psi_parser_input *fb;
63 if (stat(filename, &sb)) {
65 P->error(PSI_DATA(P), NULL, PSI_WARNING,
66 "Could not stat '%s': %s",
67 filename, strerror(errno));
72 if (!(fb = malloc(sizeof(*fb) + sb.st_size + YYMAXFILL))) {
74 P->error(PSI_DATA(P), NULL, PSI_WARNING,
75 "Could not allocate %zu bytes for reading '%s': %s",
76 sb.st_size + YYMAXFILL, filename, strerror(errno));
81 if (!(fp = fopen(filename, "r"))) {
84 P->error(PSI_DATA(P), NULL, PSI_WARNING,
85 "Could not open '%s' for reading: %s",
86 filename, strerror(errno));
91 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
95 P->error(PSI_DATA(P), NULL, PSI_WARNING,
96 "Could not read %zu bytes from '%s': %s",
97 sb.st_size + YYMAXFILL, filename, strerror(errno));
102 fb->length = sb.st_size;
103 fb->file = zend_string_init(filename, strlen(filename), 1);
108 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
110 struct psi_parser_input *sb;
112 if (!(sb = malloc(sizeof(*sb) + length + YYMAXFILL))) {
113 P->error(PSI_DATA(P), NULL, PSI_WARNING,
114 "Could not allocate %zu bytes: %s",
115 length + YYMAXFILL, strerror(errno));
119 memcpy(sb->buffer, string, length);
120 memset(sb->buffer + length, 0, YYMAXFILL);
123 sb->file = zend_string_init("<stdin>", strlen("<stdin>"), 1);
128 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
130 if (psi_cpp_process(P->preproc, tokens)) {
136 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
138 if (psi_plist_count(tokens)) {
139 return 0 == psi_parser_proc_parse(P, tokens, processed);
144 void psi_parser_postprocess(struct psi_parser *P)
148 struct psi_validate_scope scope = {0};
150 psi_validate_scope_ctor(&scope);
151 scope.defs = &P->preproc->defs;
154 P->flags |= PSI_SILENT;
156 /* register const macros */
157 ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
159 if (scope.macro->sig) {
160 } else if (scope.macro->exp) {
161 if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
162 struct psi_impl_type *type;
163 struct psi_impl_def_val *def;
164 struct psi_const *cnst;
165 struct psi_num_exp *num;
166 smart_str ns_name = {0};
167 zend_string *name_str, *type_str;
169 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
170 smart_str_append_ex(&ns_name, name, 1);
171 name_str = smart_str_extract(&ns_name);
172 type_str = zend_string_init(ZEND_STRL("<eval number>"), 1);
174 num = psi_num_exp_copy(scope.macro->exp);
175 def = psi_impl_def_val_init(PSI_T_NUMBER, num);
176 type = psi_impl_type_init(PSI_T_NUMBER, type_str);
177 cnst = psi_const_init(type, name_str, def);
178 P->consts = psi_plist_add(P->consts, &cnst);
179 zend_string_release(name_str);
180 zend_string_release(type_str);
183 if (psi_plist_count(scope.macro->tokens) == 1) {
186 if (psi_plist_get(scope.macro->tokens, 0, &t)) {
187 if (t->type == PSI_T_QUOTED_STRING) {
188 struct psi_impl_type *type;
189 struct psi_impl_def_val *def;
190 struct psi_const *cnst;
191 smart_str ns_name = {0};
192 zend_string *name_str, *type_str;
194 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
195 smart_str_append_ex(&ns_name, name, 1);
196 name_str = smart_str_extract(&ns_name);
197 type_str = zend_string_init(ZEND_STRL("string"), 1);
199 type = psi_impl_type_init(PSI_T_STRING, type_str);
200 def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
201 cnst = psi_const_init(type, name_str, def);
202 P->consts = psi_plist_add(P->consts, &cnst);
203 zend_string_release(name_str);
204 zend_string_release(type_str);
210 ZEND_HASH_FOREACH_END();
214 psi_validate_scope_dtor(&scope);
217 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
219 struct psi_plist *scanned, *preproc;
220 size_t processed = 0;
222 if (!(scanned = psi_parser_scan(P, I))) {
226 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
227 psi_plist_free(scanned);
231 if (!psi_parser_process(P, preproc, &processed)) {
232 psi_plist_free(preproc);
236 psi_parser_postprocess(P);
238 psi_plist_free(preproc);
242 void psi_parser_dtor(struct psi_parser *P)
244 psi_cpp_free(&P->preproc);
245 psi_data_dtor(PSI_DATA(P));
247 memset(P, 0, sizeof(*P));
250 void psi_parser_free(struct psi_parser **P)
263 #define NEWTOKEN(t) \
264 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
265 tokens = psi_plist_add(tokens, &token); \
266 if (P->flags & PSI_DEBUG) { \
267 fprintf(stderr, "PSI< "); \
268 psi_token_dump(2, token); \
276 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
278 struct psi_plist *tokens;
279 struct psi_token *token;
280 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
285 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
287 tok = mrk = eol = cur = I->buffer;
288 lim = I->buffer + I->length;
290 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
302 re2c:define:YYCTYPE = "unsigned char";
303 re2c:define:YYCURSOR = cur;
304 re2c:define:YYLIMIT = lim;
305 re2c:define:YYMARKER = mrk;
306 re2c:define:YYCTXMARKER = ctxmrk;
307 re2c:define:YYFILL = "if (cur >= lim) goto done;";
308 re2c:yyfill:parameter = 0;
310 W = [a-zA-Z0-9_\x80-\xff];
313 NAME = [a-zA-Z_\x80-\xff] W*;
314 NSNAME = (NAME)? ("\\" NAME)+;
315 DOLLAR_NAME = '$' W+;
316 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
317 CPP_ATTRIBUTE = "__attribute__" SP* "((";
319 DEC_CONST = [1-9] [0-9]*;
320 OCT_CONST = "0" [0-7]*;
321 HEX_CONST = '0x' [0-9a-fA-F]+;
322 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
324 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
325 FLT_DEC_NUM = "0" | DEC_CONST;
326 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
327 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
329 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
330 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
331 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
332 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
333 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
335 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
336 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
337 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
338 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
339 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
340 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
342 "'" { escaped = false; tok += 1; goto character; }
343 "\"" { escaped = false; tok += 1; goto string; }
344 "u8" / "\"" { char_width = 1; }
345 "u" / ['"] { char_width = 2; }
346 "U" / ['"] { char_width = 4; }
347 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
349 "/*" { goto comment; }
350 "//" { goto comment_sl; }
352 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
353 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
354 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
355 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
356 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
357 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
358 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
359 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
360 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
361 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
362 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
363 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
364 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
365 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
366 "||" { NEWTOKEN(PSI_T_OR); goto start; }
367 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
368 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
369 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
370 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
371 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
372 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
373 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
374 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
375 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
376 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
377 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
378 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
379 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
380 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
381 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
382 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
383 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
384 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
385 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
386 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
387 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
388 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
389 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
390 "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
391 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
392 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
393 "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
394 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
395 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
396 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
397 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
398 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
399 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
400 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
401 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
402 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
403 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
404 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
405 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
406 "int" { NEWTOKEN(PSI_T_INT); goto start; }
407 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
408 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
409 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
410 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
411 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
412 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
413 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
414 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
415 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
416 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
417 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
418 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
419 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
420 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
421 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
422 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
423 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
424 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
425 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
426 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
427 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
428 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
429 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
430 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
431 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
432 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
433 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
434 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
435 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
436 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
437 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
438 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
439 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
440 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
441 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
442 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
443 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
444 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
445 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
446 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
447 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
448 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
449 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
450 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
451 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
452 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
453 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
454 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
455 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
456 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
457 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
458 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
459 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
460 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
461 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
462 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
463 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
464 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
465 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
466 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
467 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
468 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
469 [^] { NEWTOKEN(-2); goto error; }
470 * { NEWTOKEN(-1); goto error; }
477 EOL { NEWLINE(); goto character; }
478 "\\" { escaped = !escaped; goto character; }
485 NEWTOKEN(PSI_T_QUOTED_CHAR);
487 token->flags = char_width;
490 * { escaped = false; goto character; }
497 EOL { NEWLINE(); goto string; }
498 "\\" { escaped = !escaped; goto string; }
505 NEWTOKEN(PSI_T_QUOTED_STRING);
507 token->flags = char_width;
510 * { escaped = false; goto string; }
517 EOL { NEWLINE(); goto comment; }
518 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
526 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
527 * { goto comment_sl; }
535 "(" { ++parens; goto cpp_attribute; }
536 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
537 EOL { NEWLINE(); goto cpp_attribute; }
538 * { goto cpp_attribute; }
543 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
544 token->type, token->text->len, token->text->val, tok - eol + 1);
545 psi_plist_free(tokens);
550 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);