1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
32 #include <Zend/zend_smart_str.h>
38 # define YYMAXFILL 256
41 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
44 P = malloc(sizeof(*P));
46 memset(P, 0, sizeof(*P));
48 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
50 P->preproc = psi_cpp_init(P);
55 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
59 struct psi_parser_input *fb;
61 if (stat(filename, &sb)) {
63 P->error(PSI_DATA(P), NULL, PSI_WARNING,
64 "Could not stat '%s': %s",
65 filename, strerror(errno));
70 if (!(fb = malloc(sizeof(*fb) + sb.st_size + YYMAXFILL))) {
72 P->error(PSI_DATA(P), NULL, PSI_WARNING,
73 "Could not allocate %zu bytes for reading '%s': %s",
74 sb.st_size + YYMAXFILL, filename, strerror(errno));
79 if (!(fp = fopen(filename, "r"))) {
82 P->error(PSI_DATA(P), NULL, PSI_WARNING,
83 "Could not open '%s' for reading: %s",
84 filename, strerror(errno));
89 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
93 P->error(PSI_DATA(P), NULL, PSI_WARNING,
94 "Could not read %zu bytes from '%s': %s",
95 sb.st_size + YYMAXFILL, filename, strerror(errno));
100 fb->length = sb.st_size;
101 fb->file = zend_string_init_interned(filename, strlen(filename), 1);
106 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
108 struct psi_parser_input *sb;
110 if (!(sb = malloc(sizeof(*sb) + length + YYMAXFILL))) {
111 P->error(PSI_DATA(P), NULL, PSI_WARNING,
112 "Could not allocate %zu bytes: %s",
113 length + YYMAXFILL, strerror(errno));
117 memcpy(sb->buffer, string, length);
118 memset(sb->buffer + length, 0, YYMAXFILL);
121 sb->file = zend_string_init_interned("<stdin>", strlen("<stdin>"), 1);
126 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
128 if (psi_cpp_process(P->preproc, tokens)) {
134 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
136 if (psi_plist_count(tokens)) {
137 return 0 == psi_parser_proc_parse(P, tokens, processed);
142 void psi_parser_postprocess(struct psi_parser *P)
146 struct psi_validate_scope scope = {0};
148 psi_validate_scope_ctor(&scope);
149 scope.defs = &P->preproc->defs;
152 P->flags |= PSI_SILENT;
154 /* register const macros */
155 ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
157 if (scope.macro->sig) {
158 } else if (scope.macro->exp) {
159 if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
160 struct psi_impl_type *type;
161 struct psi_impl_def_val *def;
162 struct psi_const *cnst;
163 struct psi_num_exp *num;
164 smart_str ns_name = {0};
165 zend_string *name_str, *type_str;
167 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
168 smart_str_append_ex(&ns_name, name, 1);
169 name_str = smart_str_extract(&ns_name);
170 type_str = zend_string_init_interned(ZEND_STRL("<eval number>"), 1);
172 num = psi_num_exp_copy(scope.macro->exp);
173 def = psi_impl_def_val_init(PSI_T_NUMBER, num);
174 type = psi_impl_type_init(PSI_T_NUMBER, type_str);
175 cnst = psi_const_init(type, name_str, def);
176 P->consts = psi_plist_add(P->consts, &cnst);
177 zend_string_release(name_str);
178 zend_string_release(type_str);
181 if (psi_plist_count(scope.macro->tokens) == 1) {
184 if (psi_plist_get(scope.macro->tokens, 0, &t)) {
185 if (t->type == PSI_T_QUOTED_STRING) {
186 struct psi_impl_type *type;
187 struct psi_impl_def_val *def;
188 struct psi_const *cnst;
189 smart_str ns_name = {0};
190 zend_string *name_str, *type_str;
192 smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
193 smart_str_append_ex(&ns_name, name, 1);
194 name_str = smart_str_extract(&ns_name);
195 type_str = zend_string_init_interned(ZEND_STRL("string"), 1);
197 type = psi_impl_type_init(PSI_T_STRING, type_str);
198 def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
199 cnst = psi_const_init(type, name_str, def);
200 P->consts = psi_plist_add(P->consts, &cnst);
201 zend_string_release(name_str);
202 zend_string_release(type_str);
208 ZEND_HASH_FOREACH_END();
212 psi_validate_scope_dtor(&scope);
215 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
217 struct psi_plist *scanned, *preproc;
218 size_t processed = 0;
220 if (!(scanned = psi_parser_scan(P, I))) {
224 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
225 psi_plist_free(scanned);
229 if (!psi_parser_process(P, preproc, &processed)) {
230 psi_plist_free(preproc);
234 psi_parser_postprocess(P);
236 psi_plist_free(preproc);
240 void psi_parser_dtor(struct psi_parser *P)
242 psi_cpp_free(&P->preproc);
243 psi_data_dtor(PSI_DATA(P));
245 memset(P, 0, sizeof(*P));
248 void psi_parser_free(struct psi_parser **P)
261 #define NEWTOKEN(t) \
262 if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
263 token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
265 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
267 tokens = psi_plist_add(tokens, &token); \
268 if (P->flags & PSI_DEBUG) { \
269 fprintf(stderr, "PSI< "); \
270 psi_token_dump(2, token); \
276 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
278 struct psi_plist *tokens;
279 struct psi_token *token;
280 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
285 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
287 tok = mrk = eol = cur = I->buffer;
288 lim = I->buffer + I->length;
290 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
302 re2c:define:YYCTYPE = "unsigned char";
303 re2c:define:YYCURSOR = cur;
304 re2c:define:YYLIMIT = lim;
305 re2c:define:YYMARKER = mrk;
306 re2c:define:YYCTXMARKER = ctxmrk;
307 re2c:define:YYFILL = "if (cur >= lim) goto done;";
308 re2c:yyfill:parameter = 0;
310 W = [a-zA-Z0-9_\x80-\xff];
313 NAME = [a-zA-Z_\x80-\xff] W*;
314 NSNAME = (NAME)? ("\\" NAME)+;
315 DOLLAR_NAME = '$' W+;
316 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
317 CPP_ATTRIBUTE = "__attribute__" SP* "((";
319 DEC_CONST = [1-9] [0-9]*;
320 OCT_CONST = "0" [0-7]*;
321 HEX_CONST = '0x' [0-9a-fA-F]+;
322 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
324 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
325 FLT_DEC_NUM = "0" | DEC_CONST;
326 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
327 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
329 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
330 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
331 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
332 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
333 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
335 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
336 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
337 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
338 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
339 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
340 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
342 "'" { escaped = false; tok += 1; goto character; }
343 "\"" { escaped = false; tok += 1; goto string; }
344 "u8" / "\"" { char_width = 1; }
345 "u" / ['"] { char_width = 2; }
346 "U" / ['"] { char_width = 4; }
347 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
349 "/*" { goto comment; }
350 "//" { goto comment_sl; }
352 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
353 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
354 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
355 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
356 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
357 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
358 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
359 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
360 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
361 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
362 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
363 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
364 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
365 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
366 "||" { NEWTOKEN(PSI_T_OR); goto start; }
367 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
368 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
369 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
370 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
371 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
372 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
373 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
374 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
375 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
376 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
377 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
378 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
379 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
380 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
381 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
382 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
383 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
384 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
385 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
386 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
387 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
388 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
389 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
390 "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
391 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
392 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
393 "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
394 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
395 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
396 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
397 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
398 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
399 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
400 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
401 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
402 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
403 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
404 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
405 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
406 "int" { NEWTOKEN(PSI_T_INT); goto start; }
407 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
408 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
409 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
410 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
411 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
412 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
413 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
414 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
415 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
416 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
417 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
418 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
419 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
420 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
421 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
422 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
423 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
424 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
425 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
426 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
427 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
428 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
429 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
430 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
431 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
432 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
433 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
434 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
435 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
436 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
437 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
438 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
439 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
440 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
441 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
442 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
443 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
444 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
445 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
446 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
447 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
448 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
449 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
450 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
451 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
452 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
453 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
454 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
455 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
456 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
457 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
458 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
459 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
460 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
461 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
462 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
463 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
464 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
465 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
466 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
467 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
468 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
469 [^] { NEWTOKEN(-2); goto error; }
470 * { NEWTOKEN(-1); goto error; }
477 EOL { NEWLINE(); goto character; }
478 "\\" { escaped = !escaped; goto character; }
485 NEWTOKEN(PSI_T_QUOTED_CHAR);
487 token->flags = char_width;
490 * { escaped = false; goto character; }
497 EOL { NEWLINE(); goto string; }
498 "\\" { escaped = !escaped; goto string; }
505 NEWTOKEN(PSI_T_QUOTED_STRING);
507 token->flags = char_width;
510 * { escaped = false; goto string; }
517 EOL { NEWLINE(); goto comment; }
518 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
526 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
527 * { goto comment_sl; }
535 "(" { ++parens; goto cpp_attribute; }
536 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
537 EOL { NEWLINE(); goto cpp_attribute; }
538 * { goto cpp_attribute; }
543 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
544 token->type, token->text->len, token->text->val, tok - eol + 1);
545 psi_plist_free(tokens);
550 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);