1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
35 # define YYMAXFILL 256
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
41 P = malloc(sizeof(*P));
43 memset(P, 0, sizeof(*P));
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
47 P->preproc = psi_cpp_init(P);
49 psi_cpp_load_defaults(P->preproc);
54 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
58 struct psi_parser_input *fb;
60 if (stat(filename, &sb)) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not stat '%s': %s",
64 filename, strerror(errno));
69 if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
71 P->error(PSI_DATA(P), NULL, PSI_WARNING,
72 "Could not allocate %zu bytes for reading '%s': %s",
73 sb.st_size + YYMAXFILL, filename, strerror(errno));
78 if (!(fp = fopen(filename, "r"))) {
81 P->error(PSI_DATA(P), NULL, PSI_WARNING,
82 "Could not open '%s' for reading: %s",
83 filename, strerror(errno));
88 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
92 P->error(PSI_DATA(P), NULL, PSI_WARNING,
93 "Could not read %zu bytes from '%s': %s",
94 sb.st_size + YYMAXFILL, filename, strerror(errno));
99 memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
100 fb->length = sb.st_size;
101 fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
102 memcpy(fb->file, filename, strlen(filename) + 1);
107 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
109 struct psi_parser_input *sb;
111 if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
112 P->error(PSI_DATA(P), NULL, PSI_WARNING,
113 "Could not allocate %zu bytes: %s",
114 length + YYMAXFILL, strerror(errno));
118 memcpy(sb->buffer, string, length);
119 memset(sb->buffer + length, 0, YYMAXFILL);
122 sb->file = &sb->buffer[length + YYMAXFILL];
123 memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
129 static void psi_parser_register_constants(struct psi_parser *P)
134 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
136 struct psi_impl_def_val *iv;
137 struct psi_const_type *ct;
145 switch (Z_TYPE_P(val)) {
150 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
155 tmp.zend.lval = Z_LVAL_P(val);
160 tmp.dval = Z_DVAL_P(val);
165 str = zval_get_string(val);
166 tmp.zend.str = zend_string_dup(str, 1);
167 zend_string_release(str);
171 iv = psi_impl_def_val_init(ctt, NULL);
173 ct = psi_const_type_init(ctt, ctn);
174 c = psi_const_init(ct, key->val, iv);
176 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
178 P->consts = psi_plist_add(P->consts, &c);
180 ZEND_HASH_FOREACH_END();
184 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist *tokens)
186 if (psi_cpp_process(P->preproc, &tokens)) {
192 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
194 if (psi_plist_count(tokens)) {
197 if (P->flags & PSI_DEBUG) {
198 psi_parser_proc_debug = 1;
200 rc = psi_parser_proc_parse(P, tokens, processed);
201 if (P->flags & PSI_DEBUG) {
202 psi_parser_proc_debug = 0;
209 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
211 struct psi_plist *scanned, *preproc;
212 size_t processed = 0;
214 if (!(scanned = psi_parser_scan(P, I))) {
218 if (!(preproc = psi_parser_preprocess(P, scanned))) {
219 psi_plist_free(scanned);
223 if (!psi_parser_process(P, preproc, &processed)) {
224 psi_plist_free(preproc);
228 psi_plist_free(preproc);
232 void psi_parser_dtor(struct psi_parser *P)
234 psi_cpp_free(&P->preproc);
235 psi_data_dtor(PSI_DATA(P));
237 memset(P, 0, sizeof(*P));
240 void psi_parser_free(struct psi_parser **P)
253 #define NEWTOKEN(t) \
254 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
255 tokens = psi_plist_add(tokens, &token); \
256 if (P->flags & PSI_DEBUG) { \
257 fprintf(stderr, "PSI< "); \
258 psi_token_dump(2, token); \
262 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
264 struct psi_plist *tokens;
265 struct psi_token *token;
266 const char *tok, *cur, *lim, *mrk, *eol;
268 tok = mrk = eol = cur = I->buffer;
269 lim = I->buffer + I->length;
271 tokens = psi_plist_init((void (*)(void *)) psi_token_free);
279 re2c:define:YYCTYPE = "unsigned char";
280 re2c:define:YYCURSOR = cur;
281 re2c:define:YYLIMIT = lim;
282 re2c:define:YYMARKER = mrk;
283 re2c:define:YYFILL = "if (cur >= lim) goto done;";
284 re2c:yyfill:parameter = 0;
286 W = [a-zA-Z0-9_\x80-\xff];
289 NAME = [a-zA-Z_\x80-\xff]W*;
290 NSNAME = (NAME)? ("\\" NAME)+;
291 DOLLAR_NAME = '$' W+;
292 QUOTED_STRING = "L"? "\"" ([^"])+ "\"";
293 QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'";
294 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
296 DEC_CONST = [1-9] [0-9]*;
297 OCT_CONST = "0" [0-7]*;
298 HEX_CONST = '0x' [0-9a-fA-F]+;
299 INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?;
300 INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?;
302 FLT_HEX_FRAC = [0-9a-fA-F]*;
303 FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?;
304 FLT_HEX_EXPO = 'p' [+-]? [0-9]+;
305 FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO;
306 FLT_DEC_NUM = "0" | DEC_CONST;
307 FLT_DEC_FRAC = [0-9]*;
308 FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?;
309 FLT_DEC_EXPO = 'e' [+-]? [0-9]+;
310 FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM? "." FLT_DEC_FRAC);
311 FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l'));
312 FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?;
314 NUMBER = [+-]? (INT_NUMBER | FLT_NUMBER);
316 "/*" { goto comment; }
317 "//" { goto comment_sl; }
318 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
319 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
320 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
321 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
322 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
323 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
324 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
325 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
326 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
327 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
328 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
329 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
330 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
331 "||" { NEWTOKEN(PSI_T_OR); goto start; }
332 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
333 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
334 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
335 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
336 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
337 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
338 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
339 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
340 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
341 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
342 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
343 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
344 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
345 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
346 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
347 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
348 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
349 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
350 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
351 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
352 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
353 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
354 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
355 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
356 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
357 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
358 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
359 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
360 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
361 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
362 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
363 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
364 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
365 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
366 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
367 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
368 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
369 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
370 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
371 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
372 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
373 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
374 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
375 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
376 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
377 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
378 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
379 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
380 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
381 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
382 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
383 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
384 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
385 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
386 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
387 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
388 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
389 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
390 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
391 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
392 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
393 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
394 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
395 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
396 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
397 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
398 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
399 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
400 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
401 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
402 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
403 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
404 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
405 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
406 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
407 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
408 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
409 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
410 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
411 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
412 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
413 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
414 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
415 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
416 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
417 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
418 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
419 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
420 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
421 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
422 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
423 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
424 NUMBER { NEWTOKEN(PSI_T_NUMBER); goto start; }
425 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
426 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
427 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
428 QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
429 QUOTED_CHAR { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; }
430 CPP_HEADER { NEWTOKEN(PSI_T_CPP_HEADER); goto start; }
431 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
432 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
433 [^] { NEWTOKEN(-2); goto error; }
434 * { NEWTOKEN(-1); goto error; }
441 EOL { NEWLINE(); goto comment; }
442 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
450 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
451 * { goto comment_sl; }
456 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
457 token->type, token->size, token->text, tok - eol + 1);
458 psi_plist_free(tokens);
463 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);