1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
33 void *psi_parser_proc_init(void);
34 void psi_parser_proc_free(void **parser_proc);
35 void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser);
36 void psi_parser_proc_trace(FILE *out, char *prefix);
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
41 P = malloc(sizeof(*P));
43 memset(P, 0, sizeof(*P));
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
49 P->proc = psi_parser_proc_init();
51 ZEND_INIT_SYMTABLE(&P->cpp.defs);
53 ZVAL_ARR(&tmp, &P->cpp.defs);
54 add_assoc_string(&tmp, "PHP_OS", PHP_OS);
56 if (flags & PSI_DEBUG) {
57 psi_parser_proc_trace(stderr, "PSI> ");
63 bool psi_parser_open_file(struct psi_parser *P, const char *filename)
65 FILE *fp = fopen(filename, "r");
68 P->error(PSI_DATA(P), NULL, PSI_WARNING,
69 "Could not open '%s' for reading: %s",
70 filename, strerror(errno));
74 P->input.type = PSI_PARSE_FILE;
75 P->input.data.file.handle = fp;
82 P->error(PSI_DATA(P), NULL, PSI_WARNING,
83 "Could not stat '%s': %s",
84 filename, strerror(errno));
88 P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
89 if (MAP_FAILED == P->input.data.file.buffer) {
90 P->error(PSI_DATA(P), NULL, PSI_WARNING,
91 "Could not map '%s' for reading: %s",
92 filename, strerror(errno));
95 P->input.data.file.length = sb.st_size;
97 P->input.data.file.buffer = malloc(BSIZE);
100 P->file.fn = strdup(filename);
105 bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
107 P->input.type = PSI_PARSE_STRING;
108 P->input.data.string.length = length;
109 if (!(P->input.data.string.buffer = strndup(string, length))) {
113 P->file.fn = strdup("<input>");
118 static ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
120 PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type);
124 switch (P->input.type) {
126 P->cur = P->tok = P->mrk = P->input.data.file.buffer;
128 P->eof = P->input.data.file.buffer + P->input.data.file.length;
132 P->lim = P->input.data.file.buffer;
136 case PSI_PARSE_STRING:
137 P->cur = P->tok = P->mrk = P->input.data.string.buffer;
138 P->eof = P->input.data.string.buffer + P->input.data.string.length;
143 PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof);
146 switch (P->input.type) {
147 case PSI_PARSE_STRING:
153 size_t consumed = P->tok - P->buf;
154 size_t reserved = P->lim - P->tok;
155 size_t available = BSIZE - reserved;
159 memmove(P->buf, P->tok, reserved);
166 didread = fread(P->lim, 1, available, P->fp);
168 if (didread < available) {
171 PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
172 consumed, reserved, available, didread);
178 PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur);
180 return P->lim - P->cur;
183 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
186 psi_parser_proc_parse(P->proc, T->type, T, P);
188 psi_parser_proc_parse(P->proc, 0, NULL, P);
192 void psi_parser_dtor(struct psi_parser *P)
194 psi_parser_proc_free(&P->proc);
196 switch (P->input.type) {
198 if (P->input.data.file.buffer) {
200 munmap(P->input.data.file.buffer, P->input.data.file.length);
202 free(P->input.data.file.buffer);
205 if (P->input.data.file.handle) {
206 fclose(P->input.data.file.handle);
210 case PSI_PARSE_STRING:
211 if (P->input.data.string.buffer) {
212 free(P->input.data.string.buffer);
217 psi_data_dtor(PSI_DATA(P));
219 zend_hash_destroy(&P->cpp.defs);
221 memset(P, 0, sizeof(*P));
224 void psi_parser_free(struct psi_parser **P)
233 static bool cpp_truth(struct psi_parser *P);
234 static bool cpp_defined(struct psi_parser *P);
235 static zval *cpp_define_var(struct psi_parser *P);
236 static void cpp_define_val(struct psi_parser *P, token_t typ, zval *val);
237 static void cpp_undefine(struct psi_parser *P);
238 static void cpp_error(struct psi_parser *P, const char *msg, ...);
241 #if BSIZE < YYMAXFILL
242 # error BSIZE must be greater than YYMAXFILL
245 #define RETURN(t) do { \
247 PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
248 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
249 P->file.fn, P->line, P->col); \
253 #define NEWLINE(label) \
258 token_t psi_parser_scan(struct psi_parser *P)
261 psi_parser_fill(P, 0);
265 /* we might come from EOL, so just go to cpp instead of cpp_skip */
269 P->col += P->cur - P->tok;
275 re2c:define:YYCTYPE = "unsigned char";
276 re2c:define:YYCURSOR = P->cur;
277 re2c:define:YYLIMIT = P->lim;
278 re2c:define:YYMARKER = P->mrk;
279 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
280 re2c:yyfill:parameter = 0;
285 NSNAME = (NAME)? ("\\" NAME)+;
286 DOLLAR_NAME = '$' W+;
287 QUOTED_STRING = "\"" ([^\"])+ "\"";
288 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
290 "#" { --P->cur; goto cpp; }
291 "/*" { goto comment; }
292 "//" .* "\n" { NEWLINE(nextline); }
293 "(" {RETURN(PSI_T_LPAREN);}
294 ")" {RETURN(PSI_T_RPAREN);}
295 ";" {RETURN(PSI_T_EOS);}
296 "," {RETURN(PSI_T_COMMA);}
297 ":" {RETURN(PSI_T_COLON);}
298 "{" {RETURN(PSI_T_LBRACE);}
299 "}" {RETURN(PSI_T_RBRACE);}
300 "[" {RETURN(PSI_T_LBRACKET);}
301 "]" {RETURN(PSI_T_RBRACKET);}
302 "!=" {RETURN(PSI_T_CMP_NE);}
303 "==" {RETURN(PSI_T_CMP_EQ);}
304 "&&" {RETURN(PSI_T_AND);}
305 "||" {RETURN(PSI_T_OR);}
306 "=" {RETURN(PSI_T_EQUALS);}
307 "*" {RETURN(PSI_T_ASTERISK);}
308 "~" {RETURN(PSI_T_TILDE);}
309 "!" {RETURN(PSI_T_NOT);}
310 "%" {RETURN(PSI_T_MODULO);}
311 "&" {RETURN(PSI_T_AMPERSAND);}
312 "+" {RETURN(PSI_T_PLUS);}
313 "-" {RETURN(PSI_T_MINUS);}
314 "/" {RETURN(PSI_T_SLASH);}
315 "|" {RETURN(PSI_T_PIPE);}
316 "^" {RETURN(PSI_T_CARET);}
317 "<<" {RETURN(PSI_T_LSHIFT);}
318 ">>" {RETURN(PSI_T_RSHIFT);}
319 "<=" {RETURN(PSI_T_CMP_LE);}
320 ">=" {RETURN(PSI_T_CMP_GE);}
321 "<" {RETURN(PSI_T_LCHEVR);}
322 ">" {RETURN(PSI_T_RCHEVR);}
323 "..." {RETURN(PSI_T_ELLIPSIS);}
324 [\r\n] { NEWLINE(nextline); }
326 'TRUE' {RETURN(PSI_T_TRUE);}
327 'FALSE' {RETURN(PSI_T_FALSE);}
328 'NULL' {RETURN(PSI_T_NULL);}
329 'MIXED' {RETURN(PSI_T_MIXED);}
330 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
331 'VOID' {RETURN(PSI_T_VOID);}
332 'BOOL' {RETURN(PSI_T_BOOL);}
333 'CHAR' {RETURN(PSI_T_CHAR);}
334 'SHORT' {RETURN(PSI_T_SHORT);}
335 'INT' {RETURN(PSI_T_INT);}
336 'LONG' {RETURN(PSI_T_LONG);}
337 'FLOAT' {RETURN(PSI_T_FLOAT);}
338 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
339 'INT8_T' {RETURN(PSI_T_INT8);}
340 'UINT8_T' {RETURN(PSI_T_UINT8);}
341 'INT16_T' {RETURN(PSI_T_INT16);}
342 'UINT16_T' {RETURN(PSI_T_UINT16);}
343 'INT32_T' {RETURN(PSI_T_INT32);}
344 'UINT32_T' {RETURN(PSI_T_UINT32);}
345 'INT64_T' {RETURN(PSI_T_INT64);}
346 'UINT64_T' {RETURN(PSI_T_UINT64);}
347 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
348 'SIGNED' {RETURN(PSI_T_SIGNED);}
349 'STRING' {RETURN(PSI_T_STRING);}
350 'ARRAY' {RETURN(PSI_T_ARRAY);}
351 'OBJECT' {RETURN(PSI_T_OBJECT);}
352 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
353 'STATIC' {RETURN(PSI_T_STATIC);}
354 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
355 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
356 'STRUCT' {RETURN(PSI_T_STRUCT);}
357 'UNION' {RETURN(PSI_T_UNION);}
358 'ENUM' {RETURN(PSI_T_ENUM);}
359 'CONST' {RETURN(PSI_T_CONST);}
360 'LIB' {RETURN(PSI_T_LIB);}
361 'LET' {RETURN(PSI_T_LET);}
362 'SET' {RETURN(PSI_T_SET);}
363 'PRE_ASSERT' {RETURN(PSI_T_PRE_ASSERT);}
364 'POST_ASSERT' {RETURN(PSI_T_POST_ASSERT);}
365 'RETURN' {RETURN(PSI_T_RETURN);}
366 'FREE' {RETURN(PSI_T_FREE);}
367 'TEMP' {RETURN(PSI_T_TEMP);}
368 'STRLEN' {RETURN(PSI_T_STRLEN);}
369 'STRVAL' {RETURN(PSI_T_STRVAL);}
370 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
371 'INTVAL' {RETURN(PSI_T_INTVAL);}
372 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
373 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
374 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
375 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
376 'ZVAL' {RETURN(PSI_T_ZVAL);}
377 'COUNT' {RETURN(PSI_T_COUNT);}
378 'CALLOC' {RETURN(PSI_T_CALLOC);}
379 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
380 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
381 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
382 'TO_INT' {RETURN(PSI_T_TO_INT);}
383 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
384 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
385 NUMBER {RETURN(PSI_T_NUMBER);}
386 NAME {RETURN(PSI_T_NAME);}
387 NSNAME {RETURN(PSI_T_NSNAME);}
388 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
389 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
396 "\n" { NEWLINE(comment); }
397 "*" "/" { continue; }
398 [^] { goto comment; }
401 #define PSI_DEBUG_CPP(P, msg, ...) do { \
402 if (PSI_DATA(P)->flags & PSI_DEBUG) { \
403 fprintf(stderr, "PSI> CPP %.*s line=%u level=%u skip=%u ", \
404 (int) strcspn(P->tok, "\r\n"), P->tok, \
405 P->line, P->cpp.level, P->cpp.skip); \
406 fprintf(stderr, msg, __VA_ARGS__); \
414 "#" [\t ]* "if" { goto cpp_if; }
415 "#" [\t ]* "ifdef" { goto cpp_ifdef; }
416 "#" [\t ]* "ifndef" { goto cpp_ifndef; }
417 "#" [\t ]* "else" { goto cpp_else; }
418 "#" [\t ]* "endif" { goto cpp_endif; }
419 "#" [\t ]* "define" { goto cpp_define; }
420 "#" [\t ]* "undef" { goto cpp_undef; }
421 "#" [\t ]* "error" { goto cpp_error; }
422 [^] { goto cpp_default; }
436 [\r\n] { goto cpp_skip_eol; }
437 [^] { goto cpp_skip; }
441 PSI_DEBUG_PRINT(P, "PSI> CPP skip line %u\n", P->line);
445 PSI_DEBUG_CPP(P, "%s\n", "");
457 [\t ]+ { goto cpp_if_cont; }
458 "!" [\t ]* "defined" [\t ]* "("? { goto cpp_ifndef_cont; }
459 "defined" [ \t]* "("? { goto cpp_ifdef_cont; }
460 NAME [\t ]* [\r\n] { goto cpp_if_name_eol; }
461 [^] { goto cpp_if_default; }
465 cpp_error(P, "PSI syntax error: invalid #if");
472 P->cpp.skip = P->cpp.level;
477 PSI_DEBUG_CPP(P, "%s\n", "");
489 [\t ]+ { goto cpp_ifdef_cont; }
490 NAME [\t ]* ")"? [\t ]* [\r\n] { goto cpp_ifdef_name_eol; }
491 [^] { goto cpp_ifdef_default; }
495 cpp_error(P, "PSI syntax error: invalid #ifdef");
498 cpp_ifdef_name_eol: ;
499 if (cpp_defined(P)) {
502 P->cpp.skip = P->cpp.level;
507 PSI_DEBUG_CPP(P, "%s\n", "");
519 [\t ]+ { goto cpp_ifndef_cont; }
520 NAME [\t ]* ")"? [\t ]* [\r\n] { goto cpp_ifndef_name_eol; }
521 [^] { goto cpp_ifndef_default; }
524 cpp_ifndef_default: ;
525 cpp_error(P, "PSI syntax error: invalid #ifndef");
528 cpp_ifndef_name_eol: ;
529 if (!cpp_defined(P)) {
532 P->cpp.skip = P->cpp.level;
537 PSI_DEBUG_CPP(P, "%s\n", "");
542 cpp_error(P, "PSI syntax error: ignoring lone #else");
546 P->cpp.skip = P->cpp.level;
548 } else if (P->cpp.skip == P->cpp.level) {
554 PSI_DEBUG_CPP(P, "%s\n", "");
558 cpp_error(P, "PSI syntax_error: ignoring lone #endif");
560 } else if (P->cpp.skip == P->cpp.level) {
567 PSI_DEBUG_CPP(P, "%s\n", "");
579 [\t ]+ { goto cpp_define_cont; }
580 [\r\n] { goto cpp_define_eol; }
581 NAME { goto cpp_define_name; }
582 QUOTED_STRING { goto cpp_define_quoted_string; }
583 NUMBER { goto cpp_define_number; }
584 [^] { goto cpp_define_default; }
587 cpp_define_default: ;
588 cpp_error(P, "PSI syntax error: invalid #ifndef");
593 cpp_error(P, "PSI syntax error: ignoring lone #define");
600 if (Z_TYPE_P(val) != IS_TRUE) {
601 cpp_error(P, "PSI syntax error: invalid #define");
604 cpp_define_val(P, PSI_T_NAME, val);
606 val = cpp_define_var(P);
608 goto cpp_define_cont;
610 cpp_define_quoted_string: ;
612 cpp_error(P, "PSI syntax error: invalid quoted string in #define");
615 cpp_define_val(P, PSI_T_QUOTED_STRING, val);
617 goto cpp_define_cont;
621 cpp_error(P, "PSI syntax error: invalid quoted string in #define");
624 cpp_define_val(P, PSI_T_NUMBER, val);
626 goto cpp_define_cont;
629 PSI_DEBUG_CPP(P, "%s\n", "");
639 [\t ]+ { goto cpp_undef_cont; }
640 NAME [\t ]* [\r\n] { goto cpp_undef_name_eol; }
641 [^] { goto cpp_undef_default; }
645 cpp_error(P, "PSI syntax error: invalid #undef");
648 cpp_undef_name_eol: ;
653 size_t len = strcspn(P->cur, "\r\n");
656 P->tok = P->cur + len;
659 cpp_error(P, "%.*s", (int) len, P->cur);
668 static bool cpp_truth(struct psi_parser *P)
670 size_t len = P->cur - P->tok;
672 while (len && isspace(P->tok[len - 1])) {
676 zval *val = zend_symtable_str_find(&P->cpp.defs, P->tok, len);
677 bool truth = val ? zend_is_true(val) : false;
679 PSI_DEBUG_PRINT(P, "PSI> CPP truth(%.*s)=%s\n",
680 (int) len, P->tok, truth ? "true" : "false");
685 static bool cpp_defined(struct psi_parser *P)
687 size_t len = P->cur - P->tok;
689 while (len && isspace(P->tok[len - 1])) {
694 bool defined = zend_symtable_str_exists(&P->cpp.defs, P->tok, len);
695 PSI_DEBUG_PRINT(P, "PSI> CPP defined(%.*s)=%s\n",
696 (int) len, P->tok, defined ? "true" : "false");
700 static zval *cpp_define_var(struct psi_parser *P)
702 if (cpp_defined(P)) {
703 psi_error(PSI_WARNING, P->file.fn, P->line, "PSI syntax error: Unexpected end of input");
705 size_t len = P->cur - P->tok;
707 while (len && isspace(P->tok[len - 1])) {
711 PSI_DEBUG_PRINT(P, "PSI> CPP define %.*s\n", (int) len, P->tok);
713 if (zend_symtable_str_exists(&P->cpp.defs, P->tok, len)) {
714 cpp_error(P, "Redefinition of %.*s", (int) len, P->tok);
719 return zend_symtable_str_update(&P->cpp.defs, P->tok, len, &val);
722 static void cpp_define_val(struct psi_parser *P, token_t typ, zval *val) {
723 size_t len = P->cur - P->tok;
725 while (len && isspace(P->tok[len - 1])) {
729 PSI_DEBUG_PRINT(P, "PSI> define = %.*s\n", (int) len, P->tok);
732 case PSI_T_QUOTED_STRING:
733 ZVAL_STRINGL(val, P->tok + 1, len - 2);
736 ZVAL_STRINGL(val, P->tok, len);
737 convert_scalar_to_number(val);
744 static void cpp_undefine(struct psi_parser *P)
746 size_t len = P->cur - P->tok;
748 while (len && isspace(P->tok[len - 1])) {
752 zend_symtable_str_del(&P->cpp.defs, P->tok, len);
755 static void cpp_error(struct psi_parser *P, const char *msg, ...)
760 psi_verror(PSI_WARNING, P->file.fn, P->line, msg, argv);