From 410881ab37d993b0a15d295293a005aa0500ce8d Mon Sep 17 00:00:00 2001 From: Michael Wallner Date: Mon, 28 Nov 2016 13:01:09 +0100 Subject: [PATCH] parser: add mmap and string parser --- config.m4 | 1 + src/context.c | 7 +- src/module.c | 49 ++++++++- src/parser.h | 34 +++++-- src/parser.re | 187 ++++++++++++++++++++++++---------- src/parser_def.h | 6 +- src/parser_proc.y | 6 +- tests/parser/validate001.phpt | 20 ++-- 8 files changed, 228 insertions(+), 82 deletions(-) diff --git a/config.m4 b/config.m4 index 56635fa..db96d5f 100644 --- a/config.m4 +++ b/config.m4 @@ -83,6 +83,7 @@ if test "$PHP_PSI" != no; then AC_PATH_PROG(NM, nm) AC_FUNC_FNMATCH + AC_FUNC_MMAP PSI_CONFIG_INIT PSI_CHECK_STD_TYPES diff --git a/src/context.c b/src/context.c index 3ac26c7..1090e98 100644 --- a/src/context.c +++ b/src/context.c @@ -250,11 +250,16 @@ void psi_context_build(struct psi_context *C, const char *paths) C->error(PSI_DATA(C), NULL, PSI_WARNING, "Path to PSI file too long: %s/%s", ptr, entries[i]->d_name); } - if (!psi_parser_init(&P, psi, C->error, C->flags)) { + if (!psi_parser_init(&P, C->error, C->flags)) { C->error(PSI_DATA(C), NULL, PSI_WARNING, "Failed to init PSI parser (%s): %s", psi, strerror(errno)); continue; } + if (!psi_parser_open_file(&P, psi)) { + C->error(PSI_DATA(C), NULL, PSI_WARNING, "Failed to open PSI file (%s): %s", + psi, strerror(errno)); + continue; + } while (0 < psi_parser_scan(&P)) { psi_parser_parse(&P, psi_token_alloc(&P)); diff --git a/src/module.c b/src/module.c index 8201d5a..28c0414 100644 --- a/src/module.c +++ b/src/module.c @@ -101,7 +101,8 @@ zend_object *psi_object_init(zend_class_entry *ce) ZEND_BEGIN_ARG_INFO_EX(ai_psi_dump, 0, 0, 0) ZEND_ARG_INFO(0, stream) ZEND_END_ARG_INFO(); -static PHP_FUNCTION(psi_dump) { +static PHP_FUNCTION(psi_dump) +{ php_stream *s; zval *r = NULL; int fd = STDOUT_FILENO; @@ -122,7 +123,8 @@ static PHP_FUNCTION(psi_dump) { ZEND_BEGIN_ARG_INFO_EX(ai_psi_validate, 0, 0, 1) ZEND_ARG_INFO(0, file) ZEND_END_ARG_INFO(); -static PHP_FUNCTION(psi_validate) { +static PHP_FUNCTION(psi_validate) +{ zend_string *file; struct psi_parser P; struct psi_data D = {0}; @@ -131,7 +133,47 @@ static PHP_FUNCTION(psi_validate) { return; } - if (!psi_parser_init(&P, file->val, psi_error_wrapper, 0)) { + if (!psi_parser_init(&P, psi_error_wrapper, 0)) { + RETURN_FALSE; + } + if (!psi_parser_open_file(&P, file->val)) { + psi_parser_dtor(&P); + RETURN_FALSE; + } + + while (0 < psi_parser_scan(&P)) { + psi_parser_parse(&P, psi_token_alloc(&P)); + if (P.num == PSI_T_EOF) { + break; + } + } + psi_parser_parse(&P, NULL); + + psi_data_ctor(&D, P.error, P.flags); + RETVAL_BOOL(psi_data_validate(&D, PSI_DATA(&P)) && !P.errors); + psi_data_dtor(&D); + + psi_parser_dtor(&P); +} + +ZEND_BEGIN_ARG_INFO_EX(ai_psi_validate_string, 0, 0, 1) + ZEND_ARG_INFO(0, string) +ZEND_END_ARG_INFO(); +static PHP_FUNCTION(psi_validate_string) +{ + zend_string *string; + struct psi_parser P; + struct psi_data D = {0}; + + if (SUCCESS != zend_parse_parameters(ZEND_NUM_ARGS(), "S", &string)) { + return; + } + + if (!psi_parser_init(&P, psi_error_wrapper, 0)) { + RETURN_FALSE; + } + if (!psi_parser_open_string(&P, string->val, string->len)) { + psi_parser_dtor(&P); RETURN_FALSE; } @@ -247,6 +289,7 @@ static PHP_MINFO_FUNCTION(psi) static const zend_function_entry psi_functions[] = { PHP_FE(psi_dump, ai_psi_dump) PHP_FE(psi_validate, ai_psi_validate) + PHP_FE(psi_validate_string, ai_psi_validate_string) PHP_FE_END }; diff --git a/src/parser.h b/src/parser.h index 7c64b25..03484a8 100644 --- a/src/parser.h +++ b/src/parser.h @@ -36,16 +36,38 @@ struct psi_parser { PSI_DATA_MEMBERS; - FILE *fp; token_t num; - void *proc; unsigned line, col; - char *cur, *tok, *lim, *eof, *ctx, *mrk, buf[BSIZE]; + char *cur, *tok, *lim, *eof, *ctx, *mrk; + + /* internals */ + void *proc; + + struct { + enum psi_parser_input_type { + PSI_PARSE_FILE = 1, + PSI_PARSE_STRING + } type; + + union { + struct { + FILE *handle; + char *buffer; +#if HAVE_MMAP + size_t length; +#endif + } file; + struct { + char *buffer; + size_t length; + } string; + } data; + } input; }; -struct psi_parser *psi_parser_init(struct psi_parser *P, const char *filename, psi_error_cb error, unsigned flags); -void psi_parser_syntax_error(struct psi_parser *P, const char *fn, size_t ln, const char *msg, ...); -ssize_t psi_parser_fill(struct psi_parser *P, size_t n); +struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags); +bool psi_parser_open_file(struct psi_parser *P, const char *filename); +bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length); token_t psi_parser_scan(struct psi_parser *P); void psi_parser_parse(struct psi_parser *P, struct psi_token *src); void psi_parser_dtor(struct psi_parser *P); diff --git a/src/parser.re b/src/parser.re index d2edd5e..ddeaacc 100644 --- a/src/parser.re +++ b/src/parser.re @@ -1,4 +1,5 @@ #include "php_psi_stdinc.h" +#include #include #include "parser.h" @@ -8,28 +9,15 @@ void psi_parser_proc_Free(void*, void(*)(void*)); void psi_parser_proc_(void *, token_t, struct psi_token *, struct psi_parser *); void psi_parser_proc_Trace(FILE *, const char*); -struct psi_parser *psi_parser_init(struct psi_parser *P, const char *filename, psi_error_cb error, unsigned flags) +struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags) { - FILE *fp; - - fp = fopen(filename, "r"); - - if (!fp) { - if (!(flags & PSI_SILENT)) { - error(NULL, NULL, PSI_WARNING, "Could not open '%s' for reading: %s", - filename, strerror(errno)); - } - return NULL; - } - if (!P) { P = malloc(sizeof(*P)); } memset(P, 0, sizeof(*P)); psi_data_ctor_with_dtors(PSI_DATA(P), error, flags); - P->file.fn = strdup(filename); - P->fp = fp; + P->col = 1; P->line = 1; P->proc = psi_parser_proc_Alloc(malloc); @@ -38,49 +26,126 @@ struct psi_parser *psi_parser_init(struct psi_parser *P, const char *filename, p psi_parser_proc_Trace(stderr, "PSI> "); } - psi_parser_fill(P, 0); - return P; } -ssize_t psi_parser_fill(struct psi_parser *P, size_t n) +bool psi_parser_open_file(struct psi_parser *P, const char *filename) { - if (P->flags & PSI_DEBUG) { - fprintf(stderr, "PSI> Fill: n=%zu\n", n); + FILE *fp = fopen(filename, "r"); + + if (!fp) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not open '%s' for reading: %s", + filename, strerror(errno)); + return false; } - if (!n) { - P->cur = P->tok = P->lim = P->mrk = P->buf; - P->eof = NULL; + + P->input.type = PSI_PARSE_FILE; + P->input.data.file.handle = fp; + +#if HAVE_MMAP + struct stat sb; + int fd = fileno(fp); + + if (fstat(fd, &sb)) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not stat '%s': %s", + filename, strerror(errno)); + return false; } - if (!P->eof) { - size_t consumed = P->tok - P->buf; - size_t reserved = P->lim - P->tok; - size_t available = BSIZE - reserved; - size_t didread; - - if (consumed) { - memmove(P->buf, P->tok, reserved); - P->tok -= consumed; - P->cur -= consumed; - P->lim -= consumed; - P->mrk -= consumed; - } + P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (MAP_FAILED == P->input.data.file.buffer) { + P->error(PSI_DATA(P), NULL, PSI_WARNING, + "Could not map '%s' for reading: %s", + filename, strerror(errno)); + return false; + } + P->input.data.file.length = sb.st_size; +#else + P->input.data.file.buffer = malloc(BSIZE); +#endif - didread = fread(P->lim, 1, available, P->fp); - P->lim += didread; - if (didread < available) { - P->eof = P->lim; - } + P->file.fn = strdup(filename); + + return true; +} + +bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length) +{ + P->input.type = PSI_PARSE_STRING; + P->input.data.string.length = length; + if (!(P->input.data.string.buffer = strndup(string, length))) { + return false; + } - if (P->flags & PSI_DEBUG) { - fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n", - consumed, reserved, available, didread); + P->file.fn = strdup(""); + + return true; +} + +static ssize_t psi_parser_fill(struct psi_parser *P, size_t n) +{ + PSI_DEBUG_PRINT(P, "PSI> Fill: n=%zu (input.type=%d)\n", n, P->input.type); + + /* init if n==0 */ + if (!n) { + switch (P->input.type) { + case PSI_PARSE_FILE: + P->cur = P->tok = P->mrk = P->input.data.file.buffer; +#if HAVE_MMAP + P->eof = P->input.data.file.buffer + P->input.data.file.length; + P->lim = P->eof; +#else + P->eof = NULL; + P->lim = P->input.data.file.buffer; +#endif + break; + + case PSI_PARSE_STRING: + P->cur = P->tok = P->mrk = P->input.data.string.buffer; + P->eof = P->input.data.string.buffer + P->input.data.string.length; + P->lim = P->eof; + break; } + + PSI_DEBUG_PRINT(P, "PSI> Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof); } - if (P->flags & PSI_DEBUG) { - fprintf(stderr, "PSI> Fill: avail=%td\n", P->lim - P->cur); + + switch (P->input.type) { + case PSI_PARSE_STRING: + break; + + case PSI_PARSE_FILE: +#if !HAVE_MMAP + if (!P->eof) { + size_t consumed = P->tok - P->buf; + size_t reserved = P->lim - P->tok; + size_t available = BSIZE - reserved; + size_t didread; + + if (consumed) { + memmove(P->buf, P->tok, reserved); + P->tok -= consumed; + P->cur -= consumed; + P->lim -= consumed; + P->mrk -= consumed; + } + + didread = fread(P->lim, 1, available, P->fp); + P->lim += didread; + if (didread < available) { + P->eof = P->lim; + } + PSI_DEBUG_PRINT(P, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n", + consumed, reserved, available, didread); + } +#endif + break; } + + PSI_DEBUG_PRINT(P, "PSI> Fill: avail=%td\n", P->lim - P->cur); + return P->lim - P->cur; } @@ -97,8 +162,25 @@ void psi_parser_dtor(struct psi_parser *P) { psi_parser_proc_Free(P->proc, free); - if (P->fp) { - fclose(P->fp); + switch (P->input.type) { + case PSI_PARSE_FILE: + if (P->input.data.file.buffer) { +#if HAVE_MMAP + munmap(P->input.data.file.buffer, P->input.data.file.length); +#else + free(P->input.data.file.buffer); +#endif + } + if (P->input.data.file.handle) { + fclose(P->input.data.file.handle); + } + break; + + case PSI_PARSE_STRING: + if (P->input.data.string.buffer) { + free(P->input.data.string.buffer); + } + break; } psi_data_dtor(PSI_DATA(P)); @@ -116,19 +198,15 @@ void psi_parser_free(struct psi_parser **P) } /*!max:re2c*/ -#define BSIZE 256 - #if BSIZE < YYMAXFILL # error BSIZE must be greater than YYMAXFILL #endif #define RETURN(t) do { \ P->num = t; \ - if (P->flags & PSI_DEBUG) { \ - fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \ + PSI_DEBUG_PRINT(P, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \ P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \ P->file.fn, P->line, P->col); \ - } \ return t; \ } while(1) @@ -142,6 +220,9 @@ void psi_parser_free(struct psi_parser **P) token_t psi_parser_scan(struct psi_parser *P) { + if (!P->cur) { + psi_parser_fill(P, 0); + } for (;;) { ADDCOLS; nextline: diff --git a/src/parser_def.h b/src/parser_def.h index e57f51e..de5afe9 100644 --- a/src/parser_def.h +++ b/src/parser_def.h @@ -199,7 +199,7 @@ TOKEN_TYPE(free_exp, struct psi_free_exp*) TOKEN_DTOR(free_exp, psi_free_exp_free(&$$);) TOKEN_TYPE(impl_type, struct psi_impl_type*) TOKEN_DTOR(impl_type, psi_impl_type_free(&$$);) -TOKEN_TYPE(reference, char) +TOKEN_TYPE(reference, bool) TOKEN_TYPE(indirection, unsigned) TOKEN_TYPE(pointers, unsigned) @@ -1823,7 +1823,7 @@ PARSE_TYPED(free_exp, call, * reference: */ PARSE_TYPED(reference, r, ) { - r = 0; + r = false; } /* @@ -1831,7 +1831,7 @@ PARSE_TYPED(reference, r, ) { */ PARSE_TYPED(reference, r, TOKEN(AMPERSAND)) { - r = 1; + r = true; } /* diff --git a/src/parser_proc.y b/src/parser_proc.y index 8ada20f..f72be6e 100644 --- a/src/parser_proc.y +++ b/src/parser_proc.y @@ -132,7 +132,7 @@ %destructor free_exp {psi_free_exp_free(&$$);} %type impl_type {struct psi_impl_type*} %destructor impl_type {psi_impl_type_free(&$$);} -%type reference {char} +%type reference {bool} %type indirection {unsigned} %type pointers {unsigned} file ::= blocks. @@ -850,10 +850,10 @@ free_exp(call) ::= NAME(F) LPAREN decl_vars(vars) RPAREN. { call->token = F; } reference(r) ::= . { - r = 0; + r = false; } reference(r) ::= AMPERSAND. { - r = 1; + r = true; } indirection(i) ::= .{ i = 0; diff --git a/tests/parser/validate001.phpt b/tests/parser/validate001.phpt index 1f6113a..5774d77 100644 --- a/tests/parser/validate001.phpt +++ b/tests/parser/validate001.phpt @@ -9,9 +9,7 @@ extension_loaded("psi") or die("skip - need ext/psi"); ===DONE=== ---CLEAN-- - --EXPECTF-- ===TEST=== -Warning: PSI syntax error: Unexpected token ';' at pos 17 in %s001.psi on line 1 +Warning: PSI syntax error: Unexpected token ';' at pos 17 in %s on line 1 -Warning: PSI syntax error: Unexpected token 'int' at pos 14 in %s001.psi on line 1 +Warning: PSI syntax error: Unexpected token 'int' at pos 14 in %s on line 1 -Warning: Cannot compute size of empty struct 'a' in %s001.psi on line 1 +Warning: Cannot compute size of empty struct 'a' in %s on line 1 -Warning: Unknown variable 'X' in numeric expression in %s001.psi on line 4 +Warning: Unknown variable 'X' in numeric expression in %s on line 4 -Warning: PSI syntax error: Unexpected token '(' at pos 26 in %s001.psi on line 1 +Warning: PSI syntax error: Unexpected token '(' at pos 26 in %s on line 1 -Warning: PSI syntax error: Unexpected token '(' at pos 32 in %s001.psi on line 1 +Warning: PSI syntax error: Unexpected token '(' at pos 32 in %s on line 1 ===DONE=== -- 2.30.2