1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
35 # define YYMAXFILL 256
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
41 P = malloc(sizeof(*P));
43 memset(P, 0, sizeof(*P));
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
47 P->preproc = psi_cpp_init(P);
49 psi_cpp_load_defaults(P->preproc);
54 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
58 struct psi_parser_input *fb;
60 if (stat(filename, &sb)) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not stat '%s': %s",
64 filename, strerror(errno));
69 if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
71 P->error(PSI_DATA(P), NULL, PSI_WARNING,
72 "Could not allocate %zu bytes for reading '%s': %s",
73 sb.st_size + YYMAXFILL, filename, strerror(errno));
78 if (!(fp = fopen(filename, "r"))) {
81 P->error(PSI_DATA(P), NULL, PSI_WARNING,
82 "Could not open '%s' for reading: %s",
83 filename, strerror(errno));
88 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
92 P->error(PSI_DATA(P), NULL, PSI_WARNING,
93 "Could not read %zu bytes from '%s': %s",
94 sb.st_size + YYMAXFILL, filename, strerror(errno));
99 memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
100 fb->length = sb.st_size;
101 fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
102 memcpy(fb->file, filename, strlen(filename) + 1);
107 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
109 struct psi_parser_input *sb;
111 if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
112 P->error(PSI_DATA(P), NULL, PSI_WARNING,
113 "Could not allocate %zu bytes: %s",
114 length + YYMAXFILL, strerror(errno));
118 memcpy(sb->buffer, string, length);
119 memset(sb->buffer + length, 0, YYMAXFILL);
122 sb->file = &sb->buffer[length + YYMAXFILL];
123 memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
129 static void psi_parser_register_constants(struct psi_parser *P)
134 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
136 struct psi_impl_def_val *iv;
137 struct psi_const_type *ct;
145 switch (Z_TYPE_P(val)) {
150 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
155 tmp.zend.lval = Z_LVAL_P(val);
160 tmp.dval = Z_DVAL_P(val);
165 str = zval_get_string(val);
166 tmp.zend.str = zend_string_dup(str, 1);
167 zend_string_release(str);
171 iv = psi_impl_def_val_init(ctt, NULL);
173 ct = psi_const_type_init(ctt, ctn);
174 c = psi_const_init(ct, key->val, iv);
176 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
178 P->consts = psi_plist_add(P->consts, &c);
180 ZEND_HASH_FOREACH_END();
184 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
186 if (psi_cpp_process(P->preproc, tokens)) {
192 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
194 if (psi_plist_count(tokens)) {
195 return 0 == psi_parser_proc_parse(P, tokens, processed);
200 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
202 struct psi_plist *scanned, *preproc;
203 size_t processed = 0;
205 if (!(scanned = psi_parser_scan(P, I))) {
209 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
210 psi_plist_free(scanned);
214 if (!psi_parser_process(P, preproc, &processed)) {
215 psi_plist_free(preproc);
219 psi_plist_free(preproc);
223 void psi_parser_dtor(struct psi_parser *P)
225 psi_cpp_free(&P->preproc);
226 psi_data_dtor(PSI_DATA(P));
228 memset(P, 0, sizeof(*P));
231 void psi_parser_free(struct psi_parser **P)
244 #define NEWTOKEN(t) \
245 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
246 tokens = psi_plist_add(tokens, &token); \
247 if (P->flags & PSI_DEBUG) { \
248 fprintf(stderr, "PSI< "); \
249 psi_token_dump(2, token); \
253 char s[SIZEOF_UINT32_T];
257 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
259 struct psi_plist *tokens;
260 struct psi_token *token;
261 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
266 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file);
268 tok = mrk = eol = cur = I->buffer;
269 lim = I->buffer + I->length;
271 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
281 re2c:define:YYCTYPE = "unsigned char";
282 re2c:define:YYCURSOR = cur;
283 re2c:define:YYLIMIT = lim;
284 re2c:define:YYMARKER = mrk;
285 re2c:define:YYCTXMARKER = ctxmrk;
286 re2c:define:YYFILL = "if (cur >= lim) goto done;";
287 re2c:yyfill:parameter = 0;
289 W = [a-zA-Z0-9_\x80-\xff];
292 NAME = [a-zA-Z_\x80-\xff] W*;
293 NSNAME = (NAME)? ("\\" NAME)+;
294 DOLLAR_NAME = '$' W+;
295 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
296 CPP_ATTRIBUTE = "__attribute__" SP* "((";
298 DEC_CONST = [1-9] [0-9]*;
299 OCT_CONST = "0" [0-7]*;
300 HEX_CONST = '0x' [0-9a-fA-F]+;
301 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
303 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
304 FLT_DEC_NUM = "0" | DEC_CONST;
305 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
306 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
308 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
309 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
310 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
311 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
312 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
314 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
315 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
316 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
317 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
318 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
319 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
321 "'" { escaped = false; tok += 1; goto character; }
322 "\"" { escaped = false; tok += 1; goto string; }
323 "u8" / "\"" { char_width = 1; }
324 "u" / ['"] { char_width = 2; }
325 "U" / ['"] { char_width = 4; }
326 "L" / ['"] { char_width = SIZEOF_WCHAR_T/8; }
328 "/*" { goto comment; }
329 "//" { goto comment_sl; }
331 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
332 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
333 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
334 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
335 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
336 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
337 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
338 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
339 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
340 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
341 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
342 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
343 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
344 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
345 "||" { NEWTOKEN(PSI_T_OR); goto start; }
346 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
347 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
348 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
349 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
350 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
351 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
352 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
353 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
354 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
355 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
356 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
357 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
358 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
359 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
360 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
361 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
362 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
363 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
364 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
365 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
366 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
367 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
368 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
369 "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
370 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
371 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
372 "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
373 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
374 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
375 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
376 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
377 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
378 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
379 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
380 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
381 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
382 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
383 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
384 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
385 "int" { NEWTOKEN(PSI_T_INT); goto start; }
386 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
387 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
388 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
389 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
390 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
391 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
392 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
393 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
394 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
395 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
396 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
397 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
398 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
399 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
400 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
401 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
402 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
403 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
404 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
405 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
406 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
407 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
408 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
409 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
410 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
411 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
412 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
413 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
414 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
415 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
416 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
417 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
418 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
419 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
420 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
421 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
422 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
423 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
424 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
425 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
426 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
427 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
428 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
429 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
430 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
431 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
432 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
433 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
434 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
435 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
436 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
437 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
438 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
439 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
440 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
441 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
442 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
443 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
444 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
445 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
446 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
447 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
448 [^] { NEWTOKEN(-2); goto error; }
449 * { NEWTOKEN(-1); goto error; }
456 EOL { NEWLINE(); goto character; }
457 "\\" { escaped = !escaped; goto character; }
464 NEWTOKEN(PSI_T_QUOTED_CHAR);
466 token->flags = char_width;
469 * { escaped = false; goto character; }
476 EOL { NEWLINE(); goto string; }
477 "\\" { escaped = !escaped; goto string; }
484 NEWTOKEN(PSI_T_QUOTED_STRING);
486 token->flags = char_width;
489 * { escaped = false; goto string; }
496 EOL { NEWLINE(); goto comment; }
497 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
505 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
506 * { goto comment_sl; }
514 "(" { ++parens; goto cpp_attribute; }
515 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
516 EOL { NEWLINE(); goto cpp_attribute; }
517 * { goto cpp_attribute; }
522 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
523 token->type, token->size, token->text, tok - eol + 1);
524 psi_plist_free(tokens);
529 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);