X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-psi;a=blobdiff_plain;f=src%2Fparser.re;h=7892b14d544f15eed503fb45f735fcdbbc7ac716;hp=59c8bdaf141d12136b62afe2b794d884166fb2e9;hb=02e801eabbe26a129ea05e6723c94e10bb653dab;hpb=ba906e039ffe9e57842ce5135aa43efa00b8a4c6 diff --git a/src/parser.re b/src/parser.re index 59c8bda..7892b14 100644 --- a/src/parser.re +++ b/src/parser.re @@ -26,6 +26,7 @@ #include "php_psi_stdinc.h" #include #include +#include #include #include "parser.h" @@ -181,10 +182,10 @@ static void psi_parser_register_constants(struct psi_parser *P) } #endif -struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist *tokens) +struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens) { - if (psi_cpp_process(P->preproc, &tokens)) { - return tokens; + if (psi_cpp_process(P->preproc, tokens)) { + return *tokens; } return NULL; } @@ -197,6 +198,67 @@ bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t * return true; } +void psi_parser_postprocess(struct psi_parser *P) +{ + unsigned flags; + zend_string *name; + struct psi_validate_scope scope = {0}; + + psi_validate_scope_ctor(&scope); + scope.defs = &P->preproc->defs; + + flags = P->flags; + P->flags |= PSI_SILENT; + + /* register const macros */ + ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro) + { + if (scope.macro->sig) { + } else if (scope.macro->exp) { + if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) { + struct psi_impl_type *type; + struct psi_impl_def_val *def; + struct psi_const *cnst; + struct psi_num_exp *num; + char *name_str = malloc(name->len + sizeof("psi\\")); + + strcat(strcpy(name_str, "psi\\"), name->val); + num = psi_num_exp_copy(scope.macro->exp); + def = psi_impl_def_val_init(PSI_T_NUMBER, num); + type = psi_impl_type_init(PSI_T_NUMBER, ""); + cnst = psi_const_init(type, name_str, def); + P->consts = psi_plist_add(P->consts, &cnst); + free(name_str); + } + } else { + if (psi_plist_count(scope.macro->tokens) == 1) { + struct psi_token *t; + + if (psi_plist_get(scope.macro->tokens, 0, &t)) { + if (t->type == PSI_T_QUOTED_STRING) { + struct psi_impl_type *type; + struct psi_impl_def_val *def; + struct psi_const *cnst; + char *name_str = malloc(name->len + sizeof("psi\\")); + + strcat(strcpy(name_str, "psi\\"), name->val); + type = psi_impl_type_init(PSI_T_STRING, "string"); + def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text); + cnst = psi_const_init(type, name_str, def); + P->consts = psi_plist_add(P->consts, &cnst); + free(name_str); + } + } + } + } + } + ZEND_HASH_FOREACH_END(); + + P->flags = flags; + + psi_validate_scope_dtor(&scope); +} + bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I) { struct psi_plist *scanned, *preproc; @@ -206,7 +268,7 @@ bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I) return false; } - if (!(preproc = psi_parser_preprocess(P, scanned))) { + if (!(preproc = psi_parser_preprocess(P, &scanned))) { psi_plist_free(scanned); return false; } @@ -216,6 +278,8 @@ bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I) return false; } + psi_parser_postprocess(P); + psi_plist_free(preproc); return true; } @@ -250,7 +314,7 @@ void psi_parser_free(struct psi_parser **P) } union int_suffix { - char s[SIZEOF_UINT32_T]; + char s[4]; uint32_t i; }; @@ -260,16 +324,23 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input struct psi_token *token; const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk; unsigned parens; + bool escaped; + token_t char_width; + + PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file); tok = mrk = eol = cur = I->buffer; lim = I->buffer + I->length; I->lines = 1; - tokens = psi_plist_init((void (*)(void *)) psi_token_free); + tokens = psi_plist_init((psi_plist_dtor) psi_token_free); start: ; + char_width = 1; ctxmrk = NULL; tok = cur; + (void) ctxmrk; + /*!re2c re2c:indent:top = 2; @@ -282,36 +353,23 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input re2c:yyfill:parameter = 0; W = [a-zA-Z0-9_\x80-\xff]; - SP = [ \t]; + SP = [ \t\f]; EOL = [\r\n]; - NAME = [a-zA-Z_\x80-\xff]W*; + NAME = [a-zA-Z_\x80-\xff] W*; NSNAME = (NAME)? ("\\" NAME)+; DOLLAR_NAME = '$' W+; - QUOTED_STRING = "L"? "\"" ([^"])+ "\""; - QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'"; CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">"; - CPP_ATTRIBUTE = "__attribute__" W* "(("; - CPP_PRAGMA_ONCE = "pragma" W+ "once"; + CPP_ATTRIBUTE = "__attribute__" SP* "(("; DEC_CONST = [1-9] [0-9]*; OCT_CONST = "0" [0-7]*; HEX_CONST = '0x' [0-9a-fA-F]+; INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST); - INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?; - INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?; - FLT_HEX_FRAC = [0-9a-fA-F]*; - FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?; - FLT_HEX_EXPO = 'p' [+-]? [0-9]+; - FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO; + FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+; FLT_DEC_NUM = "0" | DEC_CONST; - FLT_DEC_FRAC = [0-9]*; - FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?; - FLT_DEC_EXPO = 'e' [+-]? [0-9]+; - FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM "." FLT_DEC_FRAC) | ("." [0-9]+); + FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+); FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST); - FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l')); - FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?; [+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; } [+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; } @@ -326,8 +384,17 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input [+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; } [+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; } + "'" { escaped = false; tok += 1; goto character; } + "\"" { escaped = false; tok += 1; goto string; } + "u8" / "\"" { char_width = 1; } + "u" / ['"] { char_width = 2; } + "U" / ['"] { char_width = 4; } + "L" / ['"] { char_width = sizeof(wchar_t)/8; } + "/*" { goto comment; } "//" { goto comment_sl; } + + "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; } "#" { NEWTOKEN(PSI_T_HASH); goto start; } "(" { NEWTOKEN(PSI_T_LPAREN); goto start; } ")" { NEWTOKEN(PSI_T_RPAREN); goto start; } @@ -364,7 +431,29 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; } "?" { NEWTOKEN(PSI_T_IIF); goto start; } "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; } - "once" { NEWTOKEN(PSI_T_ONCE); goto start; } + "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; } + "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; } + "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; } + "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; } + "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; } + "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; } + "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; } + "line" { NEWTOKEN(PSI_T_LINE); goto start; } + "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; } + "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; } + "union" { NEWTOKEN(PSI_T_UNION); goto start; } + "enum" { NEWTOKEN(PSI_T_ENUM); goto start; } + "const" { NEWTOKEN(PSI_T_CONST); goto start; } + "void" { NEWTOKEN(PSI_T_VOID); goto start; } + "bool" { NEWTOKEN(PSI_T_BOOL); goto start; } + "char" { NEWTOKEN(PSI_T_CHAR); goto start; } + "short" { NEWTOKEN(PSI_T_SHORT); goto start; } + "int" { NEWTOKEN(PSI_T_INT); goto start; } + "long" { NEWTOKEN(PSI_T_LONG); goto start; } + "float" { NEWTOKEN(PSI_T_FLOAT); goto start; } + "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; } + "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; } + "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; } 'IF' { NEWTOKEN(PSI_T_IF); goto start; } 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; } 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; } @@ -383,41 +472,19 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; } 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; } 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; } - 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; } - 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; } - 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; } - 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; } - 'INT' { NEWTOKEN(PSI_T_INT); goto start; } - 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; } - 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; } - 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; } - 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; } - 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; } - 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; } - 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; } - 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; } - 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; } - 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; } - 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; } - 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; } - 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; } 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; } 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; } 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; } 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; } 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; } 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; } - 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; } - 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; } - 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; } - 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; } - 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; } 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; } 'LET' { NEWTOKEN(PSI_T_LET); goto start; } 'SET' { NEWTOKEN(PSI_T_SET); goto start; } 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; } 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; } 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; } + 'AS' { NEWTOKEN(PSI_T_AS); goto start; } 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; } 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; } 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; } @@ -440,9 +507,7 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input NAME { NEWTOKEN(PSI_T_NAME); goto start; } NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; } DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; } - QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; } - QUOTED_CHAR { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; } - CPP_HEADER { NEWTOKEN(PSI_T_CPP_HEADER); goto start; } + CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; } CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; } EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; } SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; } @@ -451,6 +516,46 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input */ + character: ; + /*!re2c + + EOL { NEWLINE(); goto character; } + "\\" { escaped = !escaped; goto character; } + "'" { + if (escaped) { + escaped = false; + goto character; + } + cur -= 1; + NEWTOKEN(PSI_T_QUOTED_CHAR); + cur += 1; + token->flags = char_width; + goto start; + } + * { escaped = false; goto character; } + + */ + + string: ; + /*!re2c + + EOL { NEWLINE(); goto string; } + "\\" { escaped = !escaped; goto string; } + "\"" { + if (escaped) { + escaped = false; + goto string; + } + cur -= 1; + NEWTOKEN(PSI_T_QUOTED_STRING); + cur += 1; + token->flags = char_width; + goto start; + } + * { escaped = false; goto string; } + + */ + comment: ; /*!re2c