#include "php_psi_stdinc.h"
#include <sys/mman.h>
#include <assert.h>
+#include <errno.h>
#include <stdarg.h>
+#include <Zend/zend_smart_str.h>
+
#include "parser.h"
/*!max:re2c*/
struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
{
if (!P) {
- P = malloc(sizeof(*P));
+ P = pemalloc(sizeof(*P), 1);
}
memset(P, 0, sizeof(*P));
P->preproc = psi_cpp_init(P);
- psi_cpp_load_defaults(P->preproc);
-
return P;
}
return NULL;
}
- if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
+ if (!(fb = pemalloc(sizeof(*fb) + sb.st_size + YYMAXFILL, 1))) {
if (report_errors) {
P->error(PSI_DATA(P), NULL, PSI_WARNING,
"Could not allocate %zu bytes for reading '%s': %s",
return NULL;
}
- memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
fb->length = sb.st_size;
- fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
- memcpy(fb->file, filename, strlen(filename) + 1);
+ fb->file = zend_string_init_interned(filename, strlen(filename), 1);
return fb;
}
{
struct psi_parser_input *sb;
- if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
+ if (!(sb = pemalloc(sizeof(*sb) + length + YYMAXFILL, 1))) {
P->error(PSI_DATA(P), NULL, PSI_WARNING,
"Could not allocate %zu bytes: %s",
length + YYMAXFILL, strerror(errno));
memset(sb->buffer + length, 0, YYMAXFILL);
sb->length = length;
- sb->file = &sb->buffer[length + YYMAXFILL];
- memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
+ sb->file = zend_string_init_interned("<stdin>", strlen("<stdin>"), 1);
return sb;
}
-#if 0
-static void psi_parser_register_constants(struct psi_parser *P)
-{
- zend_string *key;
- zval *val;
-
- ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
- {
- struct psi_impl_def_val *iv;
- struct psi_const_type *ct;
- struct psi_const *c;
- const char *ctn;
- token_t ctt;
- impl_val tmp;
- zend_string *str;
-
- ZVAL_DEREF(val);
- switch (Z_TYPE_P(val)) {
- case IS_TRUE:
- case IS_FALSE:
- ctt = PSI_T_BOOL;
- ctn = "bool";
- tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
- break;
- case IS_LONG:
- ctt = PSI_T_INT;
- ctn = "int";
- tmp.zend.lval = Z_LVAL_P(val);
- break;
- case IS_DOUBLE:
- ctt = PSI_T_FLOAT;
- ctn = "float";
- tmp.dval = Z_DVAL_P(val);
- break;
- default:
- ctt = PSI_T_STRING;
- ctn = "string";
- str = zval_get_string(val);
- tmp.zend.str = zend_string_dup(str, 1);
- zend_string_release(str);
- break;
- }
-
- iv = psi_impl_def_val_init(ctt, NULL);
- iv->ival = tmp;
- ct = psi_const_type_init(ctt, ctn);
- c = psi_const_init(ct, key->val, iv);
- if (!P->consts) {
- P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
- }
- P->consts = psi_plist_add(P->consts, &c);
- }
- ZEND_HASH_FOREACH_END();
-}
-#endif
-
-struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist *tokens)
+struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
{
- if (psi_cpp_process(P->preproc, &tokens)) {
- return tokens;
+ if (psi_cpp_process(P->preproc, tokens)) {
+ return *tokens;
}
return NULL;
}
return true;
}
+void psi_parser_postprocess(struct psi_parser *P)
+{
+ unsigned flags;
+ zend_string *name;
+ struct psi_validate_scope scope = {0};
+
+ psi_validate_scope_ctor(&scope);
+ scope.defs = &P->preproc->defs;
+
+ flags = P->flags;
+ P->flags |= PSI_SILENT;
+
+ /* register const macros */
+ ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
+ {
+ if (scope.macro->sig) {
+ } else if (scope.macro->exp) {
+ if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
+ struct psi_impl_type *type;
+ struct psi_impl_def_val *def;
+ struct psi_const *cnst;
+ struct psi_num_exp *num;
+ smart_str ns_name = {0};
+ zend_string *name_str, *type_str;
+
+ smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
+ smart_str_append_ex(&ns_name, name, 1);
+ name_str = smart_str_extract(&ns_name);
+ type_str = zend_string_init_interned(ZEND_STRL("<eval number>"), 1);
+
+ num = psi_num_exp_copy(scope.macro->exp);
+ def = psi_impl_def_val_init(PSI_T_NUMBER, num);
+ type = psi_impl_type_init(PSI_T_NUMBER, type_str);
+ cnst = psi_const_init(type, name_str, def);
+ P->consts = psi_plist_add(P->consts, &cnst);
+ zend_string_release(name_str);
+ zend_string_release(type_str);
+ }
+ } else {
+ if (psi_plist_count(scope.macro->tokens) == 1) {
+ struct psi_token *t;
+
+ if (psi_plist_get(scope.macro->tokens, 0, &t)) {
+ if (t->type == PSI_T_QUOTED_STRING) {
+ struct psi_impl_type *type;
+ struct psi_impl_def_val *def;
+ struct psi_const *cnst;
+ smart_str ns_name = {0};
+ zend_string *name_str, *type_str;
+
+ smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
+ smart_str_append_ex(&ns_name, name, 1);
+ name_str = smart_str_extract(&ns_name);
+ type_str = zend_string_init_interned(ZEND_STRL("string"), 1);
+
+ type = psi_impl_type_init(PSI_T_STRING, type_str);
+ def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
+ cnst = psi_const_init(type, name_str, def);
+ P->consts = psi_plist_add(P->consts, &cnst);
+ zend_string_release(name_str);
+ zend_string_release(type_str);
+ }
+ }
+ }
+ }
+ }
+ ZEND_HASH_FOREACH_END();
+
+ P->flags = flags;
+
+ psi_validate_scope_dtor(&scope);
+}
+
bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
{
struct psi_plist *scanned, *preproc;
return false;
}
- if (!(preproc = psi_parser_preprocess(P, scanned))) {
+ if (!(preproc = psi_parser_preprocess(P, &scanned))) {
psi_plist_free(scanned);
return false;
}
return false;
}
+ psi_parser_postprocess(P);
+
psi_plist_free(preproc);
return true;
}
++I->lines
#define NEWTOKEN(t) \
- token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
+ if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
+ token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
+ } else { \
+ token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
+ } \
tokens = psi_plist_add(tokens, &token); \
if (P->flags & PSI_DEBUG) { \
fprintf(stderr, "PSI< "); \
psi_token_dump(2, token); \
}
-union int_suffix {
- char s[SIZEOF_UINT32_T];
- uint32_t i;
-};
+
+
struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
{
struct psi_token *token;
const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
unsigned parens;
+ bool escaped;
+ token_t char_width;
+
+ PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
tok = mrk = eol = cur = I->buffer;
lim = I->buffer + I->length;
I->lines = 1;
- tokens = psi_plist_init((void (*)(void *)) psi_token_free);
+ tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
start: ;
+ char_width = 1;
ctxmrk = NULL;
tok = cur;
+ (void) ctxmrk;
+
/*!re2c
re2c:indent:top = 2;
re2c:yyfill:parameter = 0;
W = [a-zA-Z0-9_\x80-\xff];
- SP = [ \t];
+ SP = [ \t\f];
EOL = [\r\n];
- NAME = [a-zA-Z_\x80-\xff]W*;
+ NAME = [a-zA-Z_\x80-\xff] W*;
NSNAME = (NAME)? ("\\" NAME)+;
DOLLAR_NAME = '$' W+;
- QUOTED_STRING = "L"? "\"" ([^"])+ "\"";
- QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'";
CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
- CPP_ATTRIBUTE = "__attribute__" W* "((";
- CPP_PRAGMA_ONCE = "pragma" W+ "once";
+ CPP_ATTRIBUTE = "__attribute__" SP* "((";
DEC_CONST = [1-9] [0-9]*;
OCT_CONST = "0" [0-7]*;
HEX_CONST = '0x' [0-9a-fA-F]+;
INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
- INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?;
- INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?;
- FLT_HEX_FRAC = [0-9a-fA-F]*;
- FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?;
- FLT_HEX_EXPO = 'p' [+-]? [0-9]+;
- FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO;
+ FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
FLT_DEC_NUM = "0" | DEC_CONST;
- FLT_DEC_FRAC = [0-9]*;
- FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?;
- FLT_DEC_EXPO = 'e' [+-]? [0-9]+;
- FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM "." FLT_DEC_FRAC) | ("." [0-9]+);
+ FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
- FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l'));
- FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?;
[+-]? INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
[+-]? INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
[+-]? FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
[+-]? FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
+ "'" { escaped = false; tok += 1; goto character; }
+ "\"" { escaped = false; tok += 1; goto string; }
+ "u8" / "\"" { char_width = 1; }
+ "u" / ['"] { char_width = 2; }
+ "U" / ['"] { char_width = 4; }
+ "L" / ['"] { char_width = sizeof(wchar_t)/8; }
+
"/*" { goto comment; }
"//" { goto comment_sl; }
+
+ "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
"#" { NEWTOKEN(PSI_T_HASH); goto start; }
"(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
"..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
"?" { NEWTOKEN(PSI_T_IIF); goto start; }
"pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
- "once" { NEWTOKEN(PSI_T_ONCE); goto start; }
+ "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
+ "__inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
+ "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
+ "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
+ "__asm__" { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
+ "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
+ "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
+ "line" { NEWTOKEN(PSI_T_LINE); goto start; }
+ "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
+ "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
+ "union" { NEWTOKEN(PSI_T_UNION); goto start; }
+ "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
+ "const" { NEWTOKEN(PSI_T_CONST); goto start; }
+ "void" { NEWTOKEN(PSI_T_VOID); goto start; }
+ "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
+ "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
+ "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
+ "int" { NEWTOKEN(PSI_T_INT); goto start; }
+ "long" { NEWTOKEN(PSI_T_LONG); goto start; }
+ "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
+ "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
+ "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
+ "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
'IF' { NEWTOKEN(PSI_T_IF); goto start; }
'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
- 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
- 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
- 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
- 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
- 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
- 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
- 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
- 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
- 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
- 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
- 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
- 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
- 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
- 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
- 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
- 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
- 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
- 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
- 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
- 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
- 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
- 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
- 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
'LET' { NEWTOKEN(PSI_T_LET); goto start; }
'SET' { NEWTOKEN(PSI_T_SET); goto start; }
'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
+ 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
NAME { NEWTOKEN(PSI_T_NAME); goto start; }
NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
- QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
- QUOTED_CHAR { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; }
- CPP_HEADER { NEWTOKEN(PSI_T_CPP_HEADER); goto start; }
+ CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
*/
+ character: ;
+ /*!re2c
+
+ EOL { NEWLINE(); goto character; }
+ "\\" { escaped = !escaped; goto character; }
+ "'" {
+ if (escaped) {
+ escaped = false;
+ goto character;
+ }
+ cur -= 1;
+ NEWTOKEN(PSI_T_QUOTED_CHAR);
+ cur += 1;
+ token->flags = char_width;
+ goto start;
+ }
+ * { escaped = false; goto character; }
+
+ */
+
+ string: ;
+ /*!re2c
+
+ EOL { NEWLINE(); goto string; }
+ "\\" { escaped = !escaped; goto string; }
+ "\"" {
+ if (escaped) {
+ escaped = false;
+ goto string;
+ }
+ cur -= 1;
+ NEWTOKEN(PSI_T_QUOTED_STRING);
+ cur += 1;
+ token->flags = char_width;
+ goto start;
+ }
+ * { escaped = false; goto string; }
+
+ */
+
comment: ;
/*!re2c
error: ;
P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
- token->type, token->size, token->text, tok - eol + 1);
+ token->type, token->text->len, token->text->val, tok - eol + 1);
psi_plist_free(tokens);
return NULL;