X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-psi;a=blobdiff_plain;f=src%2Fparser.re;h=b56385d49e0f42c5e7a4d4ed32b6485f9eeb6c22;hp=69e76c8f124244f9c7920844569dfa5da11c3a80;hb=ff0048963fcbcddc9de14a3e1c370fcc81d12af7;hpb=e16c15d5936c3b57b05e49570fb9526920de8188 diff --git a/src/parser.re b/src/parser.re index 69e76c8..b56385d 100644 --- a/src/parser.re +++ b/src/parser.re @@ -1,236 +1,228 @@ -#include +#include "php_psi_stdinc.h" +#include #include #include "parser.h" -#include "parser_proc.h" -void *PSI_ParserProcAlloc(void*(unsigned long)); -void PSI_ParserProcFree(void*, void(*)(void*)); -void PSI_ParserProc(void *, token_t, PSI_Token *, PSI_Parser *); -void PSI_ParserProcTrace(FILE *, const char*); +void *psi_parser_proc_init(void); +void psi_parser_proc_free(void **parser_proc); +void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser); +void psi_parser_proc_trace(FILE *out, char *prefix); -PSI_Parser *PSI_ParserInit(PSI_Parser *P, const char *filename, psi_error_cb error, unsigned flags) +struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags) { - FILE *fp; - if (!P) { P = malloc(sizeof(*P)); } memset(P, 0, sizeof(*P)); - fp = fopen(filename, "r"); - - if (!fp) { - perror(filename); - return NULL; - } - - if (!P) { - P = malloc(sizeof(*P)); - } - memset(P, 0, sizeof(*P)); + psi_data_ctor_with_dtors(PSI_DATA(P), error, flags); - P->psi.file.fn = strdup(filename); - P->fp = fp; P->col = 1; P->line = 1; - P->error = error; - P->flags = flags; + P->proc = psi_parser_proc_init(); - P->proc = PSI_ParserProcAlloc(malloc); - if (flags & PSI_PARSER_DEBUG) { - PSI_ParserProcTrace(stderr, "PSI> "); + if (flags & PSI_DEBUG) { + psi_parser_proc_trace(stderr, "PSI> "); } - PSI_ParserFill(P, 0); - return P; } -void PSI_ParserSyntaxError(PSI_Parser *P, const char *fn, size_t ln, const char *msg, ...) { - char buf[0x1000] = {0}; - va_list argv; +bool psi_parser_open_file(struct psi_parser *P, const char *filename) +{ + FILE *fp = fopen(filename, "r"); - va_start(argv, msg); - vsnprintf(buf, 0x1000-1, msg, argv); - va_end(argv); + if (!fp) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not open '%s' for reading: %s", + filename, strerror(errno)); + return false; + } + + P->input.type = PSI_PARSE_FILE; + P->input.data.file.handle = fp; + +#if HAVE_MMAP + struct stat sb; + int fd = fileno(fp); - P->error(PSI_WARNING, "PSI syntax error on line %zu in '%s'%s%s", - ln, fn, msg ? ": ": "", buf); + if (fstat(fd, &sb)) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not stat '%s': %s", + filename, strerror(errno)); + return false; + } - ++P->errors; + P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (MAP_FAILED == P->input.data.file.buffer) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not map '%s' for reading: %s", + filename, strerror(errno)); + return false; + } + P->input.data.file.length = sb.st_size; +#else + P->input.data.file.buffer = malloc(BSIZE); +#endif + + P->file.fn = strdup(filename); + + return true; } -size_t PSI_ParserFill(PSI_Parser *P, size_t n) +bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length) { - if (P->flags & PSI_PARSER_DEBUG) { - fprintf(stderr, "PSI> Fill: n=%zu\n", n); + P->input.type = PSI_PARSE_STRING; + P->input.data.string.length = length; + if (!(P->input.data.string.buffer = strndup(string, length))) { + return false; } + + P->file.fn = strdup(""); + + return true; +} + +static ssize_t psi_parser_fill(struct psi_parser *P, size_t n) +{ + PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type); + + /* init if n==0 */ if (!n) { - P->cur = P->tok = P->lim = P->mrk = P->buf; - P->eof = NULL; - } + switch (P->input.type) { + case PSI_PARSE_FILE: + P->cur = P->tok = P->mrk = P->input.data.file.buffer; +#if HAVE_MMAP + P->eof = P->input.data.file.buffer + P->input.data.file.length; + P->lim = P->eof; +#else + P->eof = NULL; + P->lim = P->input.data.file.buffer; +#endif + break; - if (!P->eof) { - size_t consumed = P->tok - P->buf; - size_t reserved = P->lim - P->tok; - size_t available = BSIZE - reserved; - size_t didread; - - if (consumed) { - memmove(P->buf, P->tok, reserved); - P->tok -= consumed; - P->cur -= consumed; - P->lim -= consumed; - P->mrk -= consumed; + case PSI_PARSE_STRING: + P->cur = P->tok = P->mrk = P->input.data.string.buffer; + P->eof = P->input.data.string.buffer + P->input.data.string.length; + P->lim = P->eof; + break; } - didread = fread(P->lim, 1, available, P->fp); - P->lim += didread; - if (didread < available) { - P->eof = P->lim; - } + PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof); + } - if (P->flags & PSI_PARSER_DEBUG) { - fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n", - consumed, reserved, available, didread); + switch (P->input.type) { + case PSI_PARSE_STRING: + break; + + case PSI_PARSE_FILE: +#if !HAVE_MMAP + if (!P->eof) { + size_t consumed = P->tok - P->buf; + size_t reserved = P->lim - P->tok; + size_t available = BSIZE - reserved; + size_t didread; + + if (consumed) { + memmove(P->buf, P->tok, reserved); + P->tok -= consumed; + P->cur -= consumed; + P->lim -= consumed; + P->mrk -= consumed; + } + + didread = fread(P->lim, 1, available, P->fp); + P->lim += didread; + if (didread < available) { + P->eof = P->lim; + } + PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n", + consumed, reserved, available, didread); } +#endif + break; } - if (P->flags & PSI_PARSER_DEBUG) { - fprintf(stderr, "PSI> Fill: avail=%zu\n", P->lim - P->cur); - } + + PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur); + return P->lim - P->cur; } -void PSI_ParserParse(PSI_Parser *P, PSI_Token *T) +void psi_parser_parse(struct psi_parser *P, struct psi_token *T) { if (T) { - PSI_ParserProc(P->proc, T->type, T, P); + psi_parser_proc_parse(P->proc, T->type, T, P); } else { - PSI_ParserProc(P->proc, 0, NULL, P); + psi_parser_proc_parse(P->proc, 0, NULL, P); } } -void PSI_ParserDtor(PSI_Parser *P) +void psi_parser_dtor(struct psi_parser *P) { - PSI_ParserProcFree(P->proc, free); + psi_parser_proc_free(&P->proc); + + switch (P->input.type) { + case PSI_PARSE_FILE: + if (P->input.data.file.buffer) { +#if HAVE_MMAP + munmap(P->input.data.file.buffer, P->input.data.file.length); +#else + free(P->input.data.file.buffer); +#endif + } + if (P->input.data.file.handle) { + fclose(P->input.data.file.handle); + } + break; - if (P->fp) { - fclose(P->fp); + case PSI_PARSE_STRING: + if (P->input.data.string.buffer) { + free(P->input.data.string.buffer); + } + break; } - PSI_DataDtor((PSI_Data *) P); + psi_data_dtor(PSI_DATA(P)); memset(P, 0, sizeof(*P)); } -void PSI_ParserFree(PSI_Parser **P) +void psi_parser_free(struct psi_parser **P) { if (*P) { - PSI_ParserDtor(*P); + psi_parser_dtor(*P); free(*P); *P = NULL; } } /*!max:re2c*/ -#define BSIZE 256 - #if BSIZE < YYMAXFILL # error BSIZE must be greater than YYMAXFILL #endif -#define PSI_T(n) \ -(n) == PSI_T_NAME ? "NAME" : \ -(n) == PSI_T_PLUS ? "PLUS" : \ -(n) == PSI_T_MINUS ? "MINUS" : \ -(n) == PSI_T_SLASH ? "SLASH" : \ -(n) == PSI_T_ASTERISK ? "ASTERISK" : \ -(n) == PSI_T_TEMP ? "TEMP" : \ -(n) == PSI_T_FREE ? "FREE" : \ -(n) == PSI_T_SET ? "SET" : \ -(n) == PSI_T_LET ? "LET" : \ -(n) == PSI_T_RETURN ? "RETURN" : \ -(n) == PSI_T_LIB ? "LIB" : \ -(n) == PSI_T_INT ? "INT" : \ -(n) == PSI_T_UNSIGNED ? "UNSIGNED" : \ -(n) == PSI_T_EOF ? "EOF" : \ -(n) == PSI_T_QUOTED_STRING ? "QUOTED_STRING" : \ -(n) == PSI_T_EOS ? "EOS" : \ -(n) == PSI_T_STRUCT ? "STRUCT" : \ -(n) == PSI_T_LBRACE ? "LBRACE" : \ -(n) == PSI_T_RBRACE ? "RBRACE" : \ -(n) == PSI_T_COLON ? "COLON" : \ -(n) == PSI_T_LPAREN ? "LPAREN" : \ -(n) == PSI_T_NUMBER ? "NUMBER" : \ -(n) == PSI_T_RPAREN ? "RPAREN" : \ -(n) == PSI_T_BOOL ? "BOOL" : \ -(n) == PSI_T_FLOAT ? "FLOAT" : \ -(n) == PSI_T_STRING ? "STRING" : \ -(n) == PSI_T_CONST ? "CONST" : \ -(n) == PSI_T_NSNAME ? "NSNAME" : \ -(n) == PSI_T_EQUALS ? "EQUALS" : \ -(n) == PSI_T_TYPEDEF ? "TYPEDEF" : \ -(n) == PSI_T_VOID ? "VOID" : \ -(n) == PSI_T_LBRACKET ? "LBRACKET" : \ -(n) == PSI_T_RBRACKET ? "RBRACKET" : \ -(n) == PSI_T_COMMA ? "COMMA" : \ -(n) == PSI_T_ELLIPSIS ? "ELLIPSIS" : \ -(n) == PSI_T_DOUBLE ? "DOUBLE" : \ -(n) == PSI_T_INT8 ? "INT8" : \ -(n) == PSI_T_UINT8 ? "UINT8" : \ -(n) == PSI_T_INT16 ? "INT16" : \ -(n) == PSI_T_UINT16 ? "UINT16" : \ -(n) == PSI_T_INT32 ? "INT32" : \ -(n) == PSI_T_UINT32 ? "UINT32" : \ -(n) == PSI_T_INT64 ? "INT64" : \ -(n) == PSI_T_UINT64 ? "UINT64" : \ -(n) == PSI_T_FUNCTION ? "FUNCTION" : \ -(n) == PSI_T_NULL ? "NULL" : \ -(n) == PSI_T_TRUE ? "TRUE" : \ -(n) == PSI_T_FALSE ? "FALSE" : \ -(n) == PSI_T_DOLLAR ? "DOLLAR" : \ -(n) == PSI_T_CALLOC ? "CALLOC" : \ -(n) == PSI_T_OBJVAL ? "OBJVAL" : \ -(n) == PSI_T_ARRVAL ? "ARRVAL" : \ -(n) == PSI_T_PATHVAL ? "PATHVAL" : \ -(n) == PSI_T_STRLEN ? "STRLEN" : \ -(n) == PSI_T_STRVAL ? "STRVAL" : \ -(n) == PSI_T_FLOATVAL ? "FLOATVAL" : \ -(n) == PSI_T_INTVAL ? "INTVAL" : \ -(n) == PSI_T_BOOLVAL ? "BOOLVAL" : \ -(n) == PSI_T_TO_OBJECT ? "TO_OBJECT" : \ -(n) == PSI_T_TO_ARRAY ? "TO_ARRAY" : \ -(n) == PSI_T_TO_STRING ? "TO_STRING" : \ -(n) == PSI_T_TO_INT ? "TO_INT" : \ -(n) == PSI_T_TO_FLOAT ? "TO_FLOAT" : \ -(n) == PSI_T_TO_BOOL ? "TO_BOOL" : \ -(n) == PSI_T_MIXED ? "MIXED" : \ -(n) == PSI_T_ARRAY ? "ARRAY" : \ -(n) == PSI_T_OBJECT ? "OBJECT" : \ -(n) == PSI_T_AMPERSAND ? "AMPERSAND" : \ - - #define RETURN(t) do { \ P->num = t; \ - if (P->flags & PSI_PARSER_DEBUG) { \ - fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%zu:%zu)\n", \ + PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \ P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \ - P->psi.file.fn, P->line, P->col); \ - } \ + P->file.fn, P->line, P->col); \ return t; \ } while(1) #define ADDCOLS \ P->col += P->cur - P->tok -#define NEWLINE \ +#define NEWLINE(label) \ P->col = 1; \ ++P->line; \ - goto nextline + goto label -token_t PSI_ParserScan(PSI_Parser *P) +token_t psi_parser_scan(struct psi_parser *P) { + if (!P->cur) { + psi_parser_fill(P, 0); + } for (;;) { ADDCOLS; nextline: @@ -241,17 +233,19 @@ token_t PSI_ParserScan(PSI_Parser *P) re2c:define:YYCURSOR = P->cur; re2c:define:YYLIMIT = P->lim; re2c:define:YYMARKER = P->mrk; - re2c:define:YYFILL = "{ if (!PSI_ParserFill(P,@@)) RETURN(PSI_T_EOF); }"; + re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }"; re2c:yyfill:parameter = 0; B = [^a-zA-Z0-9_]; W = [a-zA-Z0-9_]; NAME = [a-zA-Z_]W*; NSNAME = (NAME)? ("\\" NAME)+; + DOLLAR_NAME = '$' W+; QUOTED_STRING = "\"" ([^\"])+ "\""; NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?; - ("#"|"//") .* "\n" { NEWLINE; } + "/*" { goto comment; } + ("#"|"//") .* "\n" { NEWLINE(nextline); } "(" {RETURN(PSI_T_LPAREN);} ")" {RETURN(PSI_T_RPAREN);} ";" {RETURN(PSI_T_EOS);} @@ -262,22 +256,32 @@ token_t PSI_ParserScan(PSI_Parser *P) "[" {RETURN(PSI_T_LBRACKET);} "]" {RETURN(PSI_T_RBRACKET);} "=" {RETURN(PSI_T_EQUALS);} - "$" {RETURN(PSI_T_DOLLAR);} "*" {RETURN(PSI_T_ASTERISK);} + "~" {RETURN(PSI_T_TILDE);} + "!" {RETURN(PSI_T_NOT);} + "%" {RETURN(PSI_T_MODULO);} "&" {RETURN(PSI_T_AMPERSAND);} "+" {RETURN(PSI_T_PLUS);} "-" {RETURN(PSI_T_MINUS);} "/" {RETURN(PSI_T_SLASH);} + "|" {RETURN(PSI_T_PIPE);} + "^" {RETURN(PSI_T_CARET);} + "<<" {RETURN(PSI_T_LSHIFT);} + ">>" {RETURN(PSI_T_RSHIFT);} "..." {RETURN(PSI_T_ELLIPSIS);} - [\r\n] { NEWLINE; } + [\r\n] { NEWLINE(nextline); } [\t ]+ { continue; } 'TRUE' {RETURN(PSI_T_TRUE);} 'FALSE' {RETURN(PSI_T_FALSE);} 'NULL' {RETURN(PSI_T_NULL);} 'MIXED' {RETURN(PSI_T_MIXED);} + 'CALLABLE' {RETURN(PSI_T_CALLABLE);} 'VOID' {RETURN(PSI_T_VOID);} 'BOOL' {RETURN(PSI_T_BOOL);} + 'CHAR' {RETURN(PSI_T_CHAR);} + 'SHORT' {RETURN(PSI_T_SHORT);} 'INT' {RETURN(PSI_T_INT);} + 'LONG' {RETURN(PSI_T_LONG);} 'FLOAT' {RETURN(PSI_T_FLOAT);} 'DOUBLE' {RETURN(PSI_T_DOUBLE);} 'INT8_T' {RETURN(PSI_T_INT8);} @@ -289,12 +293,17 @@ token_t PSI_ParserScan(PSI_Parser *P) 'INT64_T' {RETURN(PSI_T_INT64);} 'UINT64_T' {RETURN(PSI_T_UINT64);} 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);} + 'SIGNED' {RETURN(PSI_T_SIGNED);} 'STRING' {RETURN(PSI_T_STRING);} 'ARRAY' {RETURN(PSI_T_ARRAY);} 'OBJECT' {RETURN(PSI_T_OBJECT);} + 'CALLBACK' {RETURN(PSI_T_CALLBACK);} + 'STATIC' {RETURN(PSI_T_STATIC);} 'FUNCTION' {RETURN(PSI_T_FUNCTION);} 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);} 'STRUCT' {RETURN(PSI_T_STRUCT);} + 'UNION' {RETURN(PSI_T_UNION);} + 'ENUM' {RETURN(PSI_T_ENUM);} 'CONST' {RETURN(PSI_T_CONST);} 'LIB' {RETURN(PSI_T_LIB);} 'LET' {RETURN(PSI_T_LET);} @@ -310,6 +319,8 @@ token_t PSI_ParserScan(PSI_Parser *P) 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);} 'ARRVAL' {RETURN(PSI_T_ARRVAL);} 'OBJVAL' {RETURN(PSI_T_OBJVAL);} + 'ZVAL' {RETURN(PSI_T_ZVAL);} + 'COUNT' {RETURN(PSI_T_COUNT);} 'CALLOC' {RETURN(PSI_T_CALLOC);} 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);} 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);} @@ -320,9 +331,18 @@ token_t PSI_ParserScan(PSI_Parser *P) NUMBER {RETURN(PSI_T_NUMBER);} NAME {RETURN(PSI_T_NAME);} NSNAME {RETURN(PSI_T_NSNAME);} + DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);} QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);} [^] {break;} */ + + comment: + P->tok = P->cur; + /*!re2c + "\n" { NEWLINE(comment); } + "*" "/" { continue; } + [^] { goto comment; } + */ } return -1; }