X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-psi;a=blobdiff_plain;f=src%2Fparser.re;h=3fdd8660905258b11b281b0d4ca8d5b3a386c913;hp=707d201764ef36e6a8e9a50b95c1d9dce6dbf7cc;hb=ad2c22df5e451fffecc7b7ebdc9c5a4bb36e543e;hpb=6509a2053456d0e63b6f383b757289d3016ed1a5 diff --git a/src/parser.re b/src/parser.re index 707d201..3fdd866 100644 --- a/src/parser.re +++ b/src/parser.re @@ -34,36 +34,6 @@ #ifndef YYMAXFILL # define YYMAXFILL 256 #endif -/*!re2c - -re2c:indent:top = 2; -re2c:define:YYCTYPE = "unsigned char"; -re2c:define:YYCURSOR = P->cur; -re2c:define:YYLIMIT = P->lim; -re2c:define:YYMARKER = P->mrk; -re2c:define:YYFILL = "if (P->cur >= P->lim) goto done;"; -re2c:yyfill:parameter = 0; - -B = [^a-zA-Z0-9_]; -W = [a-zA-Z0-9_]; -SP = [ \t]; -EOL = [\r\n]; -NAME = [a-zA-Z_]W*; -NSNAME = (NAME)? ("\\" NAME)+; -DOLLAR_NAME = '$' W+; -QUOTED_STRING = "\"" ([^"])+ "\""; -NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?; - -*/ - -static void free_cpp_def(zval *p) -{ - if (Z_TYPE_P(p) == IS_PTR) { - psi_cpp_macro_decl_free((void *) &Z_PTR_P(p)); - } else if (Z_REFCOUNTED_P(p)) { - zval_ptr_dtor(p); - } -} struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags) { @@ -74,104 +44,85 @@ struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, uns psi_data_ctor_with_dtors(PSI_DATA(P), error, flags); - P->col = 1; - P->line = 1; - P->proc = psi_parser_proc_init(); + P->preproc = psi_cpp_init(P); - zend_hash_init(&P->cpp.defs, 0, NULL, free_cpp_def, 1); - zval tmp; - ZVAL_ARR(&tmp, &P->cpp.defs); - add_assoc_string(&tmp, "PHP_OS", PHP_OS); - - if (flags & PSI_DEBUG) { - psi_parser_proc_trace(stderr, "PSI> "); - } + psi_cpp_load_defaults(P->preproc); return P; } -void psi_parser_reset(struct psi_parser *P) -{ - P->cur = P->tok = P->mrk = P->input.buffer; - P->lim = P->input.buffer + P->input.length; -} - -bool psi_parser_open_file(struct psi_parser *P, const char *filename) +struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors) { struct stat sb; FILE *fp; - char *fb; + struct psi_parser_input *fb; if (stat(filename, &sb)) { - P->error(PSI_DATA(P), NULL, PSI_WARNING, - "Could not stat '%s': %s", - filename, strerror(errno)); - return false; + if (report_errors) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not stat '%s': %s", + filename, strerror(errno)); + } + return NULL; } - if (!(fb = malloc(sb.st_size + YYMAXFILL))) { - P->error(PSI_DATA(P), NULL, PSI_WARNING, - "Could not allocate %zu bytes for reading '%s': %s", - sb.st_size + YYMAXFILL, filename, strerror(errno)); - return false; + if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) { + if (report_errors) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not allocate %zu bytes for reading '%s': %s", + sb.st_size + YYMAXFILL, filename, strerror(errno)); + } + return NULL; } if (!(fp = fopen(filename, "r"))) { free(fb); - P->error(PSI_DATA(P), NULL, PSI_WARNING, - "Could not open '%s' for reading: %s", - filename, strerror(errno)); - return false; + if (report_errors) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not open '%s' for reading: %s", + filename, strerror(errno)); + } + return NULL; } - if (sb.st_size != fread(fb, 1, sb.st_size, fp)) { + if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) { free(fb); fclose(fp); - P->error(PSI_DATA(P), NULL, PSI_WARNING, - "Could not read %zu bytes from '%s': %s", - sb.st_size + YYMAXFILL, filename, strerror(errno)); - return false; - } - memset(fb + sb.st_size, 0, YYMAXFILL); - - if (P->input.buffer) { - free(P->input.buffer); + if (report_errors) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not read %zu bytes from '%s': %s", + sb.st_size + YYMAXFILL, filename, strerror(errno)); + } + return NULL; } - P->input.buffer = fb; - P->input.length = sb.st_size; - - P->file.fn = strdup(filename); - psi_parser_reset(P); + memset(fb->buffer + sb.st_size, 0, YYMAXFILL); + fb->length = sb.st_size; + fb->file = &fb->buffer[sb.st_size + YYMAXFILL]; + memcpy(fb->file, filename, strlen(filename) + 1); - return true; + return fb; } -bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length) +struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length) { - char *sb; + struct psi_parser_input *sb; - if (!(sb = malloc(length + YYMAXFILL))) { + if (!(sb = malloc(sizeof(*sb) + sizeof("") + length + YYMAXFILL))) { P->error(PSI_DATA(P), NULL, PSI_WARNING, "Could not allocate %zu bytes: %s", length + YYMAXFILL, strerror(errno)); - return false; - } - - memcpy(sb, string, length); - memset(sb + length, 0, YYMAXFILL); - - if (P->input.buffer) { - free(P->input.buffer); + return NULL; } - P->input.buffer = sb; - P->input.length = length; - P->file.fn = strdup(""); + memcpy(sb->buffer, string, length); + memset(sb->buffer + length, 0, YYMAXFILL); - psi_parser_reset(P); + sb->length = length; + sb->file = &sb->buffer[length + YYMAXFILL]; + memcpy(sb->file, "", sizeof("")); - return true; + return sb; } #if 0 @@ -230,43 +181,50 @@ static void psi_parser_register_constants(struct psi_parser *P) } #endif -void psi_parser_parse(struct psi_parser *P) +struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens) { - size_t i = 0; - struct psi_token *T; + if (psi_cpp_process(P->preproc, tokens)) { + return *tokens; + } + return NULL; +} - P->cpp.tokens = psi_parser_scan(P); +bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed) +{ + if (psi_plist_count(tokens)) { + return 0 == psi_parser_proc_parse(P, tokens, processed); + } + return true; +} - psi_cpp_preprocess(P, &P->cpp); +bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I) +{ + struct psi_plist *scanned, *preproc; + size_t processed = 0; - if (psi_plist_count(P->cpp.tokens)) { - while (psi_plist_get(P->cpp.tokens, i++, &T)) { - if (P->flags & PSI_DEBUG) { - fprintf(stderr, "PSI> "); - psi_token_dump(2, T); - } - psi_parser_proc_parse(P->proc, T->type, T, P); - } - psi_parser_proc_parse(P->proc, 0, NULL, P); + if (!(scanned = psi_parser_scan(P, I))) { + return false; + } + + if (!(preproc = psi_parser_preprocess(P, &scanned))) { + psi_plist_free(scanned); + return false; } - psi_plist_free(P->cpp.tokens); - P->cpp.tokens = NULL; + if (!psi_parser_process(P, preproc, &processed)) { + psi_plist_free(preproc); + return false; + } + + psi_plist_free(preproc); + return true; } void psi_parser_dtor(struct psi_parser *P) { - psi_parser_proc_free(&P->proc); - - if (P->input.buffer) { - free(P->input.buffer); - P->input.buffer = NULL; - } - + psi_cpp_free(&P->preproc); psi_data_dtor(PSI_DATA(P)); - zend_hash_destroy(&P->cpp.defs); - memset(P, 0, sizeof(*P)); } @@ -280,39 +238,99 @@ void psi_parser_free(struct psi_parser **P) } #define NEWLINE() \ - P->col = 1; \ - ++P->line + eol = cur; \ + ++I->lines #define NEWTOKEN(t) \ - P->num = t; \ - token = psi_token_alloc(P); \ + token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \ tokens = psi_plist_add(tokens, &token); \ - P->col += P->cur - P->tok; \ if (P->flags & PSI_DEBUG) { \ fprintf(stderr, "PSI< "); \ psi_token_dump(2, token); \ - } \ - token = NULL + } +union int_suffix { + char s[SIZEOF_UINT32_T]; + uint32_t i; +}; -struct psi_plist *psi_parser_scan(struct psi_parser *P) +struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I) { struct psi_plist *tokens; struct psi_token *token; + const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk; + unsigned parens; + bool escaped; + token_t char_width; - if (!P->cur) { - return NULL; - } + PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file); - tokens = psi_plist_init(NULL); + tok = mrk = eol = cur = I->buffer; + lim = I->buffer + I->length; + I->lines = 1; + tokens = psi_plist_init((psi_plist_dtor) psi_token_free); start: ; - P->tok = P->cur; + char_width = 1; + ctxmrk = NULL; + tok = cur; + + (void) ctxmrk; /*!re2c + re2c:indent:top = 2; + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = cur; + re2c:define:YYLIMIT = lim; + re2c:define:YYMARKER = mrk; + re2c:define:YYCTXMARKER = ctxmrk; + re2c:define:YYFILL = "if (cur >= lim) goto done;"; + re2c:yyfill:parameter = 0; + + W = [a-zA-Z0-9_\x80-\xff]; + SP = [ \t\f]; + EOL = [\r\n]; + NAME = [a-zA-Z_\x80-\xff] W*; + NSNAME = (NAME)? ("\\" NAME)+; + DOLLAR_NAME = '$' W+; + CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">"; + CPP_ATTRIBUTE = "__attribute__" SP* "(("; + + DEC_CONST = [1-9] [0-9]*; + OCT_CONST = "0" [0-7]*; + HEX_CONST = '0x' [0-9a-fA-F]+; + INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST); + + FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+; + FLT_DEC_NUM = "0" | DEC_CONST; + FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+); + FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST); + + [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; } + [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; } + [+-]? INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; } + [+-]? INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; } + [+-]? INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; } + + [+-]? FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; } + [+-]? FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; } + [+-]? FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; } + [+-]? FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; } + [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; } + [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; } + + "'" { escaped = false; tok += 1; goto character; } + "\"" { escaped = false; tok += 1; goto string; } + "u8" / "\"" { char_width = 1; } + "u" / ['"] { char_width = 2; } + "U" / ['"] { char_width = 4; } + "L" / ['"] { char_width = SIZEOF_WCHAR_T/8; } + "/*" { goto comment; } "//" { goto comment_sl; } + + "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; } "#" { NEWTOKEN(PSI_T_HASH); goto start; } "(" { NEWTOKEN(PSI_T_LPAREN); goto start; } ")" { NEWTOKEN(PSI_T_RPAREN); goto start; } @@ -345,7 +363,33 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P) ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; } "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; } ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; } + "." { NEWTOKEN(PSI_T_PERIOD); goto start; } "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; } + "?" { NEWTOKEN(PSI_T_IIF); goto start; } + "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; } + "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; } + "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; } + "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; } + "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; } + "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; } + "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; } + "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; } + "line" { NEWTOKEN(PSI_T_LINE); goto start; } + "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; } + "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; } + "union" { NEWTOKEN(PSI_T_UNION); goto start; } + "enum" { NEWTOKEN(PSI_T_ENUM); goto start; } + "const" { NEWTOKEN(PSI_T_CONST); goto start; } + "void" { NEWTOKEN(PSI_T_VOID); goto start; } + "bool" { NEWTOKEN(PSI_T_BOOL); goto start; } + "char" { NEWTOKEN(PSI_T_CHAR); goto start; } + "short" { NEWTOKEN(PSI_T_SHORT); goto start; } + "int" { NEWTOKEN(PSI_T_INT); goto start; } + "long" { NEWTOKEN(PSI_T_LONG); goto start; } + "float" { NEWTOKEN(PSI_T_FLOAT); goto start; } + "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; } + "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; } + "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; } 'IF' { NEWTOKEN(PSI_T_IF); goto start; } 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; } 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; } @@ -357,46 +401,26 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P) 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; } 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; } 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; } + 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; } + 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; } 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; } 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; } 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; } 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; } 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; } - 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; } - 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; } - 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; } - 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; } - 'INT' { NEWTOKEN(PSI_T_INT); goto start; } - 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; } - 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; } - 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; } - 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; } - 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; } - 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; } - 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; } - 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; } - 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; } - 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; } - 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; } - 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; } - 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; } 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; } 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; } 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; } 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; } 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; } 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; } - 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; } - 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; } - 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; } - 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; } - 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; } 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; } 'LET' { NEWTOKEN(PSI_T_LET); goto start; } 'SET' { NEWTOKEN(PSI_T_SET); goto start; } 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; } 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; } 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; } + 'AS' { NEWTOKEN(PSI_T_AS); goto start; } 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; } 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; } 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; } @@ -416,14 +440,55 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P) 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; } 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; } 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; } - NUMBER { NEWTOKEN(PSI_T_NUMBER); goto start; } NAME { NEWTOKEN(PSI_T_NAME); goto start; } NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; } DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; } - QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; } + CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; } + CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; } EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; } SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; } - * { goto error; } + [^] { NEWTOKEN(-2); goto error; } + * { NEWTOKEN(-1); goto error; } + + */ + + character: ; + /*!re2c + + EOL { NEWLINE(); goto character; } + "\\" { escaped = !escaped; goto character; } + "'" { + if (escaped) { + escaped = false; + goto character; + } + cur -= 1; + NEWTOKEN(PSI_T_QUOTED_CHAR); + cur += 1; + token->flags = char_width; + goto start; + } + * { escaped = false; goto character; } + + */ + + string: ; + /*!re2c + + EOL { NEWLINE(); goto string; } + "\\" { escaped = !escaped; goto string; } + "\"" { + if (escaped) { + escaped = false; + goto string; + } + cur -= 1; + NEWTOKEN(PSI_T_QUOTED_STRING); + cur += 1; + token->flags = char_width; + goto start; + } + * { escaped = false; goto string; } */ @@ -443,9 +508,27 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P) * { goto comment_sl; } */ -error: + + cpp_attribute: ; + + /*!re2c + + "(" { ++parens; goto cpp_attribute; } + ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } } + EOL { NEWLINE(); goto cpp_attribute; } + * { goto cpp_attribute; } + + */ +error: ; + + P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu", + token->type, token->size, token->text, tok - eol + 1); psi_plist_free(tokens); return NULL; + done: + + PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim); + return tokens; }