1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
35 # define YYMAXFILL 256
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
41 P = malloc(sizeof(*P));
43 memset(P, 0, sizeof(*P));
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
47 P->preproc = psi_cpp_init(P);
49 psi_cpp_load_defaults(P->preproc);
54 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
58 struct psi_parser_input *fb;
60 if (stat(filename, &sb)) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not stat '%s': %s",
64 filename, strerror(errno));
69 if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
71 P->error(PSI_DATA(P), NULL, PSI_WARNING,
72 "Could not allocate %zu bytes for reading '%s': %s",
73 sb.st_size + YYMAXFILL, filename, strerror(errno));
78 if (!(fp = fopen(filename, "r"))) {
81 P->error(PSI_DATA(P), NULL, PSI_WARNING,
82 "Could not open '%s' for reading: %s",
83 filename, strerror(errno));
88 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
92 P->error(PSI_DATA(P), NULL, PSI_WARNING,
93 "Could not read %zu bytes from '%s': %s",
94 sb.st_size + YYMAXFILL, filename, strerror(errno));
99 memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
100 fb->length = sb.st_size;
101 fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
102 memcpy(fb->file, filename, strlen(filename) + 1);
107 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
109 struct psi_parser_input *sb;
111 if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
112 P->error(PSI_DATA(P), NULL, PSI_WARNING,
113 "Could not allocate %zu bytes: %s",
114 length + YYMAXFILL, strerror(errno));
118 memcpy(sb->buffer, string, length);
119 memset(sb->buffer + length, 0, YYMAXFILL);
122 sb->file = &sb->buffer[length + YYMAXFILL];
123 memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
129 static void psi_parser_register_constants(struct psi_parser *P)
134 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
136 struct psi_impl_def_val *iv;
137 struct psi_const_type *ct;
145 switch (Z_TYPE_P(val)) {
150 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
155 tmp.zend.lval = Z_LVAL_P(val);
160 tmp.dval = Z_DVAL_P(val);
165 str = zval_get_string(val);
166 tmp.zend.str = zend_string_dup(str, 1);
167 zend_string_release(str);
171 iv = psi_impl_def_val_init(ctt, NULL);
173 ct = psi_const_type_init(ctt, ctn);
174 c = psi_const_init(ct, key->val, iv);
176 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
178 P->consts = psi_plist_add(P->consts, &c);
180 ZEND_HASH_FOREACH_END();
184 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
186 if (psi_cpp_process(P->preproc, tokens)) {
192 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
194 if (psi_plist_count(tokens)) {
195 return 0 == psi_parser_proc_parse(P, tokens, processed);
200 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
202 struct psi_plist *scanned, *preproc;
203 size_t processed = 0;
205 if (!(scanned = psi_parser_scan(P, I))) {
209 if (!(preproc = psi_parser_preprocess(P, &scanned))) {
210 psi_plist_free(scanned);
214 if (!psi_parser_process(P, preproc, &processed)) {
215 psi_plist_free(preproc);
219 psi_plist_free(preproc);
223 void psi_parser_dtor(struct psi_parser *P)
225 psi_cpp_free(&P->preproc);
226 psi_data_dtor(PSI_DATA(P));
228 memset(P, 0, sizeof(*P));
231 void psi_parser_free(struct psi_parser **P)
244 #define NEWTOKEN(t) \
245 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
246 tokens = psi_plist_add(tokens, &token); \
247 if (P->flags & PSI_DEBUG) { \
248 fprintf(stderr, "PSI< "); \
249 psi_token_dump(2, token); \
253 char s[SIZEOF_UINT32_T];
257 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
259 struct psi_plist *tokens;
260 struct psi_token *token;
261 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
264 tok = mrk = eol = cur = I->buffer;
265 lim = I->buffer + I->length;
267 tokens = psi_plist_init((void (*)(void *)) psi_token_free);
276 re2c:define:YYCTYPE = "unsigned char";
277 re2c:define:YYCURSOR = cur;
278 re2c:define:YYLIMIT = lim;
279 re2c:define:YYMARKER = mrk;
280 re2c:define:YYCTXMARKER = ctxmrk;
281 re2c:define:YYFILL = "if (cur >= lim) goto done;";
282 re2c:yyfill:parameter = 0;
284 W = [a-zA-Z0-9_\x80-\xff];
287 NAME = [a-zA-Z_\x80-\xff]W*;
288 NSNAME = (NAME)? ("\\" NAME)+;
289 DOLLAR_NAME = '$' W+;
290 QUOTED_STRING = "L"? "\"" ([^"])+ "\"";
291 QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'";
292 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
293 CPP_ATTRIBUTE = "__attribute__" W* "((";
294 CPP_PRAGMA_ONCE = "pragma" W+ "once";
296 DEC_CONST = [1-9] [0-9]*;
297 OCT_CONST = "0" [0-7]*;
298 HEX_CONST = '0x' [0-9a-fA-F]+;
299 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
300 INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?;
301 INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?;
303 FLT_HEX_FRAC = [0-9a-fA-F]*;
304 FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?;
305 FLT_HEX_EXPO = 'p' [+-]? [0-9]+;
306 FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO;
307 FLT_DEC_NUM = "0" | DEC_CONST;
308 FLT_DEC_FRAC = [0-9]*;
309 FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?;
310 FLT_DEC_EXPO = 'e' [+-]? [0-9]+;
311 FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM "." FLT_DEC_FRAC) | ("." [0-9]+);
312 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
313 FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l'));
314 FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?;
316 [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
317 [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
318 [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
319 [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
320 [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
322 [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
323 [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
324 [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
325 [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
326 [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
327 [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
329 "/*" { goto comment; }
330 "//" { goto comment_sl; }
331 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
332 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
333 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
334 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
335 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
336 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
337 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
338 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
339 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
340 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
341 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
342 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
343 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
344 "||" { NEWTOKEN(PSI_T_OR); goto start; }
345 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
346 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
347 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
348 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
349 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
350 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
351 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
352 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
353 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
354 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
355 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
356 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
357 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
358 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
359 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
360 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
361 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
362 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
363 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
364 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
365 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
366 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
367 "once" { NEWTOKEN(PSI_T_ONCE); goto start; }
368 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
369 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
370 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
371 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
372 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
373 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
374 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
375 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
376 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
377 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
378 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
379 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
380 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
381 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
382 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
383 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
384 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
385 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
386 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
387 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
388 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
389 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
390 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
391 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
392 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
393 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
394 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
395 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
396 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
397 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
398 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
399 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
400 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
401 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
402 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
403 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
404 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
405 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
406 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
407 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
408 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
409 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
410 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
411 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
412 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
413 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
414 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
415 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
416 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
417 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
418 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
419 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
420 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
421 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
422 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
423 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
424 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
425 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
426 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
427 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
428 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
429 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
430 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
431 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
432 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
433 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
434 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
435 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
436 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
437 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
438 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
439 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
440 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
441 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
442 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
443 QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
444 QUOTED_CHAR { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; }
445 CPP_HEADER { NEWTOKEN(PSI_T_CPP_HEADER); goto start; }
446 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
447 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
448 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
449 [^] { NEWTOKEN(-2); goto error; }
450 * { NEWTOKEN(-1); goto error; }
457 EOL { NEWLINE(); goto comment; }
458 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
466 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
467 * { goto comment_sl; }
475 "(" { ++parens; goto cpp_attribute; }
476 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
477 EOL { NEWLINE(); goto cpp_attribute; }
478 * { goto cpp_attribute; }
483 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
484 token->type, token->size, token->text, tok - eol + 1);
485 psi_plist_free(tokens);
490 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);