1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
35 # define YYMAXFILL 256
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
41 P = malloc(sizeof(*P));
43 memset(P, 0, sizeof(*P));
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
47 P->preproc = psi_cpp_init(P);
49 psi_cpp_load_defaults(P->preproc);
54 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
58 struct psi_parser_input *fb;
60 if (stat(filename, &sb)) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not stat '%s': %s",
64 filename, strerror(errno));
69 if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
71 P->error(PSI_DATA(P), NULL, PSI_WARNING,
72 "Could not allocate %zu bytes for reading '%s': %s",
73 sb.st_size + YYMAXFILL, filename, strerror(errno));
78 if (!(fp = fopen(filename, "r"))) {
81 P->error(PSI_DATA(P), NULL, PSI_WARNING,
82 "Could not open '%s' for reading: %s",
83 filename, strerror(errno));
88 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
92 P->error(PSI_DATA(P), NULL, PSI_WARNING,
93 "Could not read %zu bytes from '%s': %s",
94 sb.st_size + YYMAXFILL, filename, strerror(errno));
99 memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
100 fb->length = sb.st_size;
101 fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
102 memcpy(fb->file, filename, strlen(filename) + 1);
107 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
109 struct psi_parser_input *sb;
111 if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
112 P->error(PSI_DATA(P), NULL, PSI_WARNING,
113 "Could not allocate %zu bytes: %s",
114 length + YYMAXFILL, strerror(errno));
118 memcpy(sb->buffer, string, length);
119 memset(sb->buffer + length, 0, YYMAXFILL);
122 sb->file = &sb->buffer[length + YYMAXFILL];
123 memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
129 static void psi_parser_register_constants(struct psi_parser *P)
134 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
136 struct psi_impl_def_val *iv;
137 struct psi_const_type *ct;
145 switch (Z_TYPE_P(val)) {
150 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
155 tmp.zend.lval = Z_LVAL_P(val);
160 tmp.dval = Z_DVAL_P(val);
165 str = zval_get_string(val);
166 tmp.zend.str = zend_string_dup(str, 1);
167 zend_string_release(str);
171 iv = psi_impl_def_val_init(ctt, NULL);
173 ct = psi_const_type_init(ctt, ctn);
174 c = psi_const_init(ct, key->val, iv);
176 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
178 P->consts = psi_plist_add(P->consts, &c);
180 ZEND_HASH_FOREACH_END();
184 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
186 if (psi_cpp_process(P->preproc, tokens)) {
192 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
194 if (psi_plist_count(tokens)) {
195 return 0 == psi_parser_proc_parse(P, tokens, processed);
200 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
202 struct psi_plist *scanned, *preproc;
203 size_t processed = 0;
205 if (!(scanned = psi_parser_scan(P, I))) {
209 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
210 psi_plist_free(scanned);
214 if (!psi_parser_process(P, preproc, &processed)) {
215 psi_plist_free(preproc);
219 psi_plist_free(preproc);
223 void psi_parser_dtor(struct psi_parser *P)
225 psi_cpp_free(&P->preproc);
226 psi_data_dtor(PSI_DATA(P));
228 memset(P, 0, sizeof(*P));
231 void psi_parser_free(struct psi_parser **P)
244 #define NEWTOKEN(t) \
245 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
246 tokens = psi_plist_add(tokens, &token); \
247 if (P->flags & PSI_DEBUG) { \
248 fprintf(stderr, "PSI< "); \
249 psi_token_dump(2, token); \
253 char s[SIZEOF_UINT32_T];
257 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
259 struct psi_plist *tokens;
260 struct psi_token *token;
261 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
266 tok = mrk = eol = cur = I->buffer;
267 lim = I->buffer + I->length;
269 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
279 re2c:define:YYCTYPE = "unsigned char";
280 re2c:define:YYCURSOR = cur;
281 re2c:define:YYLIMIT = lim;
282 re2c:define:YYMARKER = mrk;
283 re2c:define:YYCTXMARKER = ctxmrk;
284 re2c:define:YYFILL = "if (cur >= lim) goto done;";
285 re2c:yyfill:parameter = 0;
287 W = [a-zA-Z0-9_\x80-\xff];
290 NAME = [a-zA-Z_\x80-\xff] W*;
291 NSNAME = (NAME)? ("\\" NAME)+;
292 DOLLAR_NAME = '$' W+;
293 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
294 CPP_ATTRIBUTE = "__attribute__" SP* "((";
296 DEC_CONST = [1-9] [0-9]*;
297 OCT_CONST = "0" [0-7]*;
298 HEX_CONST = '0x' [0-9a-fA-F]+;
299 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
301 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
302 FLT_DEC_NUM = "0" | DEC_CONST;
303 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
304 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
306 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
307 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
308 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
309 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
310 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
312 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
313 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
314 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
315 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
316 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
317 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
319 "'" { escaped = false; tok += 1; goto character; }
320 "\"" { escaped = false; tok += 1; goto string; }
321 "u8" / "\"" { char_width = 1; }
322 "u" / ['"] { char_width = 2; }
323 "U" / ['"] { char_width = 4; }
324 "L" / ['"] { char_width = SIZEOF_WCHAR_T/8; }
326 "/*" { goto comment; }
327 "//" { goto comment_sl; }
329 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
330 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
331 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
332 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
333 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
334 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
335 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
336 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
337 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
338 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
339 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
340 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
341 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
342 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
343 "||" { NEWTOKEN(PSI_T_OR); goto start; }
344 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
345 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
346 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
347 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
348 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
349 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
350 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
351 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
352 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
353 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
354 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
355 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
356 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
357 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
358 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
359 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
360 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
361 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
362 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
363 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
364 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
365 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
366 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
367 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
368 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
369 "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
370 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
371 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
372 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
373 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
374 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
375 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
376 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
377 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
378 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
379 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
380 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
381 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
382 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
383 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
384 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
385 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
386 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
387 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
388 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
389 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
390 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
391 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
392 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
393 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
394 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
395 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
396 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
397 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
398 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
399 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
400 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
401 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
402 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
403 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
404 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
405 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
406 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
407 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
408 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
409 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
410 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
411 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
412 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
413 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
414 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
415 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
416 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
417 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
418 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
419 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
420 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
421 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
422 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
423 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
424 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
425 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
426 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
427 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
428 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
429 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
430 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
431 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
432 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
433 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
434 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
435 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
436 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
437 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
438 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
439 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
440 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
441 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
442 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
443 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
444 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
445 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
446 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
447 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
448 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
449 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
450 [^] { NEWTOKEN(-2); goto error; }
451 * { NEWTOKEN(-1); goto error; }
458 EOL { NEWLINE(); goto character; }
459 "\\" { escaped = !escaped; }
466 NEWTOKEN(PSI_T_QUOTED_CHAR);
468 token->flags = char_width;
471 * { escaped = false; goto character; }
478 EOL { NEWLINE(); goto string; }
479 "\\" { escaped = !escaped; goto string; }
486 NEWTOKEN(PSI_T_QUOTED_STRING);
488 token->flags = char_width;
491 * { escaped = false; goto string; }
498 EOL { NEWLINE(); goto comment; }
499 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
507 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
508 * { goto comment_sl; }
516 "(" { ++parens; goto cpp_attribute; }
517 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
518 EOL { NEWLINE(); goto cpp_attribute; }
519 * { goto cpp_attribute; }
524 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
525 token->type, token->size, token->text, tok - eol + 1);
526 psi_plist_free(tokens);
531 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);