1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
32 #include <Zend/zend_smart_str.h>
38 # define YYMAXFILL 256
41 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
44 P = malloc(sizeof(*P));
46 memset(P, 0, sizeof(*P));
48 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
50 P->preproc = psi_cpp_init(P);
52 psi_cpp_load_defaults(P->preproc);
57 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
61 struct psi_parser_input *fb;
63 if (stat(filename, &sb)) {
65 P->error(PSI_DATA(P), NULL, PSI_WARNING,
66 "Could not stat '%s': %s",
67 filename, strerror(errno));
72 if (!(fb = malloc(sizeof(*fb) + sb.st_size + YYMAXFILL))) {
74 P->error(PSI_DATA(P), NULL, PSI_WARNING,
75 "Could not allocate %zu bytes for reading '%s': %s",
76 sb.st_size + YYMAXFILL, filename, strerror(errno));
81 if (!(fp = fopen(filename, "r"))) {
84 P->error(PSI_DATA(P), NULL, PSI_WARNING,
85 "Could not open '%s' for reading: %s",
86 filename, strerror(errno));
91 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
95 P->error(PSI_DATA(P), NULL, PSI_WARNING,
96 "Could not read %zu bytes from '%s': %s",
97 sb.st_size + YYMAXFILL, filename, strerror(errno));
102 fb->length = sb.st_size;
103 fb->file = zend_string_init(filename, strlen(filename), 1);
108 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
110 struct psi_parser_input *sb;
112 if (!(sb = malloc(sizeof(*sb) + length + YYMAXFILL))) {
113 P->error(PSI_DATA(P), NULL, PSI_WARNING,
114 "Could not allocate %zu bytes: %s",
115 length + YYMAXFILL, strerror(errno));
119 memcpy(sb->buffer, string, length);
120 memset(sb->buffer + length, 0, YYMAXFILL);
123 sb->file = zend_string_init("<stdin>", strlen("<stdin>"), 1);
128 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
130 if (psi_cpp_process(P->preproc, tokens)) {
136 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
138 if (psi_plist_count(tokens)) {
139 return 0 == psi_parser_proc_parse(P, tokens, processed);
144 void psi_parser_postprocess(struct psi_parser *P)
148 struct psi_validate_scope scope = {0};
150 psi_validate_scope_ctor(&scope);
151 scope.defs = &P->preproc->defs;
154 P->flags |= PSI_SILENT;
156 /* register const macros */
157 ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
159 if (scope.macro->sig) {
160 } else if (scope.macro->exp) {
161 if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
162 struct psi_impl_type *type;
163 struct psi_impl_def_val *def;
164 struct psi_const *cnst;
165 struct psi_num_exp *num;
166 smart_str ns_name = {0};
167 zend_string *name_str;
169 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
170 smart_str_append_ex(&ns_name, name, 1);
171 name_str = smart_str_extract(&ns_name);
173 num = psi_num_exp_copy(scope.macro->exp);
174 def = psi_impl_def_val_init(PSI_T_NUMBER, num);
175 type = psi_impl_type_init(PSI_T_NUMBER,
176 zend_string_init(ZEND_STRL("<eval number>"), 1));
177 cnst = psi_const_init(type, name_str, def);
178 P->consts = psi_plist_add(P->consts, &cnst);
179 zend_string_release(name_str);
182 if (psi_plist_count(scope.macro->tokens) == 1) {
185 if (psi_plist_get(scope.macro->tokens, 0, &t)) {
186 if (t->type == PSI_T_QUOTED_STRING) {
187 struct psi_impl_type *type;
188 struct psi_impl_def_val *def;
189 struct psi_const *cnst;
190 smart_str ns_name = {0};
191 zend_string *name_str;
193 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
194 smart_str_append_ex(&ns_name, name, 1);
195 name_str = smart_str_extract(&ns_name);
197 type = psi_impl_type_init(PSI_T_STRING,
198 zend_string_init(ZEND_STRL("string"), 1));
199 def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
200 cnst = psi_const_init(type, name_str, def);
201 P->consts = psi_plist_add(P->consts, &cnst);
202 zend_string_release(name_str);
208 ZEND_HASH_FOREACH_END();
212 psi_validate_scope_dtor(&scope);
215 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
217 struct psi_plist *scanned, *preproc;
218 size_t processed = 0;
220 if (!(scanned = psi_parser_scan(P, I))) {
224 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
225 psi_plist_free(scanned);
229 if (!psi_parser_process(P, preproc, &processed)) {
230 psi_plist_free(preproc);
234 psi_parser_postprocess(P);
236 psi_plist_free(preproc);
240 void psi_parser_dtor(struct psi_parser *P)
242 psi_cpp_free(&P->preproc);
243 psi_data_dtor(PSI_DATA(P));
245 memset(P, 0, sizeof(*P));
248 void psi_parser_free(struct psi_parser **P)
261 #define NEWTOKEN(t) \
262 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
263 tokens = psi_plist_add(tokens, &token); \
264 if (P->flags & PSI_DEBUG) { \
265 fprintf(stderr, "PSI< "); \
266 psi_token_dump(2, token); \
274 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
276 struct psi_plist *tokens;
277 struct psi_token *token;
278 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
283 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
285 tok = mrk = eol = cur = I->buffer;
286 lim = I->buffer + I->length;
288 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
300 re2c:define:YYCTYPE = "unsigned char";
301 re2c:define:YYCURSOR = cur;
302 re2c:define:YYLIMIT = lim;
303 re2c:define:YYMARKER = mrk;
304 re2c:define:YYCTXMARKER = ctxmrk;
305 re2c:define:YYFILL = "if (cur >= lim) goto done;";
306 re2c:yyfill:parameter = 0;
308 W = [a-zA-Z0-9_\x80-\xff];
311 NAME = [a-zA-Z_\x80-\xff] W*;
312 NSNAME = (NAME)? ("\\" NAME)+;
313 DOLLAR_NAME = '$' W+;
314 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
315 CPP_ATTRIBUTE = "__attribute__" SP* "((";
317 DEC_CONST = [1-9] [0-9]*;
318 OCT_CONST = "0" [0-7]*;
319 HEX_CONST = '0x' [0-9a-fA-F]+;
320 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
322 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
323 FLT_DEC_NUM = "0" | DEC_CONST;
324 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
325 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
327 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
328 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
329 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
330 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
331 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
333 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
334 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
335 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
336 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
337 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
338 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
340 "'" { escaped = false; tok += 1; goto character; }
341 "\"" { escaped = false; tok += 1; goto string; }
342 "u8" / "\"" { char_width = 1; }
343 "u" / ['"] { char_width = 2; }
344 "U" / ['"] { char_width = 4; }
345 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
347 "/*" { goto comment; }
348 "//" { goto comment_sl; }
350 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
351 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
352 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
353 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
354 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
355 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
356 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
357 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
358 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
359 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
360 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
361 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
362 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
363 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
364 "||" { NEWTOKEN(PSI_T_OR); goto start; }
365 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
366 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
367 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
368 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
369 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
370 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
371 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
372 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
373 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
374 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
375 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
376 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
377 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
378 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
379 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
380 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
381 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
382 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
383 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
384 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
385 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
386 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
387 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
388 "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
389 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
390 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
391 "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
392 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
393 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
394 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
395 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
396 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
397 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
398 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
399 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
400 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
401 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
402 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
403 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
404 "int" { NEWTOKEN(PSI_T_INT); goto start; }
405 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
406 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
407 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
408 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
409 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
410 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
411 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
412 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
413 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
414 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
415 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
416 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
417 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
418 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
419 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
420 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
421 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
422 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
423 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
424 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
425 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
426 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
427 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
428 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
429 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
430 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
431 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
432 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
433 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
434 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
435 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
436 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
437 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
438 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
439 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
440 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
441 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
442 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
443 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
444 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
445 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
446 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
447 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
448 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
449 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
450 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
451 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
452 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
453 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
454 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
455 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
456 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
457 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
458 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
459 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
460 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
461 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
462 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
463 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
464 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
465 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
466 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
467 [^] { NEWTOKEN(-2); goto error; }
468 * { NEWTOKEN(-1); goto error; }
475 EOL { NEWLINE(); goto character; }
476 "\\" { escaped = !escaped; goto character; }
483 NEWTOKEN(PSI_T_QUOTED_CHAR);
485 token->flags = char_width;
488 * { escaped = false; goto character; }
495 EOL { NEWLINE(); goto string; }
496 "\\" { escaped = !escaped; goto string; }
503 NEWTOKEN(PSI_T_QUOTED_STRING);
505 token->flags = char_width;
508 * { escaped = false; goto string; }
515 EOL { NEWLINE(); goto comment; }
516 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
524 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
525 * { goto comment_sl; }
533 "(" { ++parens; goto cpp_attribute; }
534 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
535 EOL { NEWLINE(); goto cpp_attribute; }
536 * { goto cpp_attribute; }
541 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
542 token->type, token->text->len, token->text->val, tok - eol + 1);
543 psi_plist_free(tokens);
548 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);