interned strings++
[m6w6/ext-psi] / src / parser.re
index e57353662c59fef7bef95e4adffeeba710074900..ebd1e1628de46248bf97d3cafe3589a189786d4f 100644 (file)
 #include "php_psi_stdinc.h"
 #include <sys/mman.h>
 #include <assert.h>
+#include <errno.h>
 #include <stdarg.h>
 
+#include <Zend/zend_smart_str.h>
+
 #include "parser.h"
 
 /*!max:re2c*/
@@ -38,7 +41,7 @@
 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
 {
        if (!P) {
-               P = malloc(sizeof(*P));
+               P = pemalloc(sizeof(*P), 1);
        }
        memset(P, 0, sizeof(*P));
 
@@ -46,8 +49,6 @@ struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, uns
 
        P->preproc = psi_cpp_init(P);
 
-       psi_cpp_load_defaults(P->preproc);
-
        return P;
 }
 
@@ -66,7 +67,7 @@ struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *
                return NULL;
        }
 
-       if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
+       if (!(fb = pemalloc(sizeof(*fb) + sb.st_size + YYMAXFILL, 1))) {
                if (report_errors) {
                        P->error(PSI_DATA(P), NULL, PSI_WARNING,
                                        "Could not allocate %zu bytes for reading '%s': %s",
@@ -96,10 +97,8 @@ struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *
                return NULL;
        }
 
-       memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
        fb->length = sb.st_size;
-       fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
-       memcpy(fb->file, filename, strlen(filename) + 1);
+       fb->file = zend_string_init_interned(filename, strlen(filename), 1);
 
        return fb;
 }
@@ -108,7 +107,7 @@ struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char
 {
        struct psi_parser_input *sb;
 
-       if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
+       if (!(sb = pemalloc(sizeof(*sb) + length + YYMAXFILL, 1))) {
                P->error(PSI_DATA(P), NULL, PSI_WARNING,
                                "Could not allocate %zu bytes: %s",
                                length + YYMAXFILL, strerror(errno));
@@ -119,68 +118,11 @@ struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char
        memset(sb->buffer + length, 0, YYMAXFILL);
 
        sb->length = length;
-       sb->file = &sb->buffer[length + YYMAXFILL];
-       memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
+       sb->file = zend_string_init_interned("<stdin>", strlen("<stdin>"), 1);
 
        return sb;
 }
 
-#if 0
-static void psi_parser_register_constants(struct psi_parser *P)
-{
-       zend_string *key;
-       zval *val;
-
-       ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
-       {
-               struct psi_impl_def_val *iv;
-               struct psi_const_type *ct;
-               struct psi_const *c;
-               const char *ctn;
-               token_t ctt;
-               impl_val tmp;
-               zend_string *str;
-
-               ZVAL_DEREF(val);
-               switch (Z_TYPE_P(val)) {
-               case IS_TRUE:
-               case IS_FALSE:
-                       ctt = PSI_T_BOOL;
-                       ctn = "bool";
-                       tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
-                       break;
-               case IS_LONG:
-                       ctt = PSI_T_INT;
-                       ctn = "int";
-                       tmp.zend.lval = Z_LVAL_P(val);
-                       break;
-               case IS_DOUBLE:
-                       ctt = PSI_T_FLOAT;
-                       ctn = "float";
-                       tmp.dval = Z_DVAL_P(val);
-                       break;
-               default:
-                       ctt = PSI_T_STRING;
-                       ctn = "string";
-                       str = zval_get_string(val);
-                       tmp.zend.str = zend_string_dup(str, 1);
-                       zend_string_release(str);
-                       break;
-               }
-
-               iv = psi_impl_def_val_init(ctt, NULL);
-               iv->ival = tmp;
-               ct = psi_const_type_init(ctt, ctn);
-               c = psi_const_init(ct, key->val, iv);
-               if (!P->consts) {
-                       P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
-               }
-               P->consts = psi_plist_add(P->consts, &c);
-       }
-       ZEND_HASH_FOREACH_END();
-}
-#endif
-
 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
 {
        if (psi_cpp_process(P->preproc, tokens)) {
@@ -197,6 +139,79 @@ bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *
        return true;
 }
 
+void psi_parser_postprocess(struct psi_parser *P)
+{
+       unsigned flags;
+       zend_string *name;
+       struct psi_validate_scope scope = {0};
+
+       psi_validate_scope_ctor(&scope);
+       scope.defs = &P->preproc->defs;
+
+       flags = P->flags;
+       P->flags |= PSI_SILENT;
+
+       /* register const macros */
+       ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
+       {
+               if (scope.macro->sig) {
+               } else if (scope.macro->exp) {
+                       if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
+                               struct psi_impl_type *type;
+                               struct psi_impl_def_val *def;
+                               struct psi_const *cnst;
+                               struct psi_num_exp *num;
+                               smart_str ns_name = {0};
+                               zend_string *name_str, *type_str;
+
+                               smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
+                               smart_str_append_ex(&ns_name, name, 1);
+                               name_str = smart_str_extract(&ns_name);
+                               type_str = zend_string_init_interned(ZEND_STRL("<eval number>"), 1);
+
+                               num = psi_num_exp_copy(scope.macro->exp);
+                               def = psi_impl_def_val_init(PSI_T_NUMBER, num);
+                               type = psi_impl_type_init(PSI_T_NUMBER, type_str);
+                               cnst = psi_const_init(type, name_str, def);
+                               P->consts = psi_plist_add(P->consts, &cnst);
+                               zend_string_release(name_str);
+                               zend_string_release(type_str);
+                       }
+               } else {
+                       if (psi_plist_count(scope.macro->tokens) == 1) {
+                               struct psi_token *t;
+
+                               if (psi_plist_get(scope.macro->tokens, 0, &t)) {
+                                       if (t->type == PSI_T_QUOTED_STRING) {
+                                               struct psi_impl_type *type;
+                                               struct psi_impl_def_val *def;
+                                               struct psi_const *cnst;
+                                               smart_str ns_name = {0};
+                                               zend_string *name_str, *type_str;
+
+                                               smart_str_appendl_ex(&ns_name, ZEND_STRL("psi\\"), 1);
+                                               smart_str_append_ex(&ns_name, name, 1);
+                                               name_str = smart_str_extract(&ns_name);
+                                               type_str = zend_string_init_interned(ZEND_STRL("string"), 1);
+
+                                               type = psi_impl_type_init(PSI_T_STRING, type_str);
+                                               def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
+                                               cnst = psi_const_init(type, name_str, def);
+                                               P->consts = psi_plist_add(P->consts, &cnst);
+                                               zend_string_release(name_str);
+                                               zend_string_release(type_str);
+                                       }
+                               }
+                       }
+               }
+       }
+       ZEND_HASH_FOREACH_END();
+
+       P->flags = flags;
+
+       psi_validate_scope_dtor(&scope);
+}
+
 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
 {
        struct psi_plist *scanned, *preproc;
@@ -216,6 +231,8 @@ bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
                return false;
        }
 
+       psi_parser_postprocess(P);
+
        psi_plist_free(preproc);
        return true;
 }
@@ -242,17 +259,19 @@ void psi_parser_free(struct psi_parser **P)
        ++I->lines
 
 #define NEWTOKEN(t) \
-       token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
+       if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
+               token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
+       } else { \
+               token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
+       } \
        tokens = psi_plist_add(tokens, &token); \
        if (P->flags & PSI_DEBUG) { \
                fprintf(stderr, "PSI< "); \
                psi_token_dump(2, token); \
        }
 
-union int_suffix {
-       char s[SIZEOF_UINT32_T];
-       uint32_t i;
-};
+
+
 
 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
 {
@@ -263,6 +282,8 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
        bool escaped;
        token_t char_width;
 
+       PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
+
        tok = mrk = eol = cur = I->buffer;
        lim = I->buffer + I->length;
        I->lines = 1;
@@ -273,6 +294,8 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                ctxmrk = NULL;
                tok = cur;
 
+               (void) ctxmrk;
+
                /*!re2c
 
                re2c:indent:top = 2;
@@ -321,7 +344,7 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                "u8" / "\""             { char_width = 1; }
                "u" / ['"]              { char_width = 2; }
                "U" / ['"]              { char_width = 4; }
-               "L" / ['"]              { char_width = SIZEOF_WCHAR_T/8; }
+               "L" / ['"]              { char_width = sizeof(wchar_t)/8; }
 
                "/*"                    { goto comment; }
                "//"                    { goto comment_sl; }
@@ -364,10 +387,28 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                "?"                             { NEWTOKEN(PSI_T_IIF); goto start; }
                "pragma"                { NEWTOKEN(PSI_T_PRAGMA); goto start; }
                "pragma" W+ "once"      { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
+               "__inline"              { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
                "__restrict"    { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
                "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
                "__asm__"               { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
+               "volatile"              { NEWTOKEN(PSI_T_VOLATILE); goto start; }
+               "sizeof"                { NEWTOKEN(PSI_T_SIZEOF); goto start; }
                "line"                  { NEWTOKEN(PSI_T_LINE); goto start; }
+               "typedef"               { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
+               "struct"                { NEWTOKEN(PSI_T_STRUCT); goto start; }
+               "union"                 { NEWTOKEN(PSI_T_UNION); goto start; }
+               "enum"                  { NEWTOKEN(PSI_T_ENUM); goto start; }
+               "const"                 { NEWTOKEN(PSI_T_CONST); goto start; }
+               "void"                  { NEWTOKEN(PSI_T_VOID); goto start; }
+               "bool"                  { NEWTOKEN(PSI_T_BOOL); goto start; }
+               "char"                  { NEWTOKEN(PSI_T_CHAR); goto start; }
+               "short"                 { NEWTOKEN(PSI_T_SHORT); goto start; }
+               "int"                   { NEWTOKEN(PSI_T_INT); goto start; }
+               "long"                  { NEWTOKEN(PSI_T_LONG); goto start; }
+               "float"                 { NEWTOKEN(PSI_T_FLOAT); goto start; }
+               "double"                { NEWTOKEN(PSI_T_DOUBLE); goto start; }
+               "unsigned"              { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
+               "signed"                { NEWTOKEN(PSI_T_SIGNED); goto start; }
                'IF'                    { NEWTOKEN(PSI_T_IF); goto start; }
                'IFDEF'                 { NEWTOKEN(PSI_T_IFDEF); goto start; }
                'IFNDEF'                { NEWTOKEN(PSI_T_IFNDEF); goto start; }
@@ -386,41 +427,19 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                'NULL'                  { NEWTOKEN(PSI_T_NULL); goto start; }
                'MIXED'                 { NEWTOKEN(PSI_T_MIXED); goto start; }
                'CALLABLE'              { NEWTOKEN(PSI_T_CALLABLE); goto start; }
-               'VOID'                  { NEWTOKEN(PSI_T_VOID); goto start; }
-               'BOOL'                  { NEWTOKEN(PSI_T_BOOL); goto start; }
-               'CHAR'                  { NEWTOKEN(PSI_T_CHAR); goto start; }
-               'SHORT'                 { NEWTOKEN(PSI_T_SHORT); goto start; }
-               'INT'                   { NEWTOKEN(PSI_T_INT); goto start; }
-               'LONG'                  { NEWTOKEN(PSI_T_LONG); goto start; }
-               'FLOAT'                 { NEWTOKEN(PSI_T_FLOAT); goto start; }
-               'DOUBLE'                { NEWTOKEN(PSI_T_DOUBLE); goto start; }
-               'INT8_T'                { NEWTOKEN(PSI_T_INT8); goto start; }
-               'UINT8_T'               { NEWTOKEN(PSI_T_UINT8); goto start; }
-               'INT16_T'               { NEWTOKEN(PSI_T_INT16); goto start; }
-               'UINT16_T'              { NEWTOKEN(PSI_T_UINT16); goto start; }
-               'INT32_T'               { NEWTOKEN(PSI_T_INT32); goto start; }
-               'UINT32_T'              { NEWTOKEN(PSI_T_UINT32); goto start; }
-               'INT64_T'               { NEWTOKEN(PSI_T_INT64); goto start; }
-               'UINT64_T'              { NEWTOKEN(PSI_T_UINT64); goto start; }
-               'UNSIGNED'              { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
-               'SIGNED'                { NEWTOKEN(PSI_T_SIGNED); goto start; }
                'STRING'                { NEWTOKEN(PSI_T_STRING); goto start; }
                'ARRAY'                 { NEWTOKEN(PSI_T_ARRAY); goto start; }
                'OBJECT'                { NEWTOKEN(PSI_T_OBJECT); goto start; }
                'CALLBACK'              { NEWTOKEN(PSI_T_CALLBACK); goto start; }
                'STATIC'                { NEWTOKEN(PSI_T_STATIC); goto start; }
                'FUNCTION'              { NEWTOKEN(PSI_T_FUNCTION); goto start; }
-               'TYPEDEF'               { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
-               'STRUCT'                { NEWTOKEN(PSI_T_STRUCT); goto start; }
-               'UNION'                 { NEWTOKEN(PSI_T_UNION); goto start; }
-               'ENUM'                  { NEWTOKEN(PSI_T_ENUM); goto start; }
-               'CONST'                 { NEWTOKEN(PSI_T_CONST); goto start; }
                'LIB'                   { NEWTOKEN(PSI_T_LIB); goto start; }
                'LET'                   { NEWTOKEN(PSI_T_LET); goto start; }
                'SET'                   { NEWTOKEN(PSI_T_SET); goto start; }
                'PRE_ASSERT'    { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
                'POST_ASSERT'   { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
                'RETURN'                { NEWTOKEN(PSI_T_RETURN); goto start; }
+               'AS'                    { NEWTOKEN(PSI_T_AS); goto start; }
                'FREE'                  { NEWTOKEN(PSI_T_FREE); goto start; }
                'TEMP'                  { NEWTOKEN(PSI_T_TEMP); goto start; }
                'STRLEN'                { NEWTOKEN(PSI_T_STRLEN); goto start; }
@@ -522,7 +541,7 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
 error: ;
 
        P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
-                       token->type, token->size, token->text, tok - eol + 1);
+                       token->type, token->text->len, token->text->val, tok - eol + 1);
        psi_plist_free(tokens);
        return NULL;