lift single lib statement restriction
[m6w6/ext-psi] / src / parser.re
index 59c8bdaf141d12136b62afe2b794d884166fb2e9..7892b14d544f15eed503fb45f735fcdbbc7ac716 100644 (file)
@@ -26,6 +26,7 @@
 #include "php_psi_stdinc.h"
 #include <sys/mman.h>
 #include <assert.h>
+#include <errno.h>
 #include <stdarg.h>
 
 #include "parser.h"
@@ -181,10 +182,10 @@ static void psi_parser_register_constants(struct psi_parser *P)
 }
 #endif
 
-struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist *tokens)
+struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
 {
-       if (psi_cpp_process(P->preproc, &tokens)) {
-               return tokens;
+       if (psi_cpp_process(P->preproc, tokens)) {
+               return *tokens;
        }
        return NULL;
 }
@@ -197,6 +198,67 @@ bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *
        return true;
 }
 
+void psi_parser_postprocess(struct psi_parser *P)
+{
+       unsigned flags;
+       zend_string *name;
+       struct psi_validate_scope scope = {0};
+
+       psi_validate_scope_ctor(&scope);
+       scope.defs = &P->preproc->defs;
+
+       flags = P->flags;
+       P->flags |= PSI_SILENT;
+
+       /* register const macros */
+       ZEND_HASH_FOREACH_STR_KEY_PTR(&P->preproc->defs, name, scope.macro)
+       {
+               if (scope.macro->sig) {
+               } else if (scope.macro->exp) {
+                       if (psi_num_exp_validate(PSI_DATA(P), scope.macro->exp, &scope)) {
+                               struct psi_impl_type *type;
+                               struct psi_impl_def_val *def;
+                               struct psi_const *cnst;
+                               struct psi_num_exp *num;
+                               char *name_str = malloc(name->len + sizeof("psi\\"));
+
+                               strcat(strcpy(name_str, "psi\\"), name->val);
+                               num = psi_num_exp_copy(scope.macro->exp);
+                               def = psi_impl_def_val_init(PSI_T_NUMBER, num);
+                               type = psi_impl_type_init(PSI_T_NUMBER, "<eval number>");
+                               cnst = psi_const_init(type, name_str, def);
+                               P->consts = psi_plist_add(P->consts, &cnst);
+                               free(name_str);
+                       }
+               } else {
+                       if (psi_plist_count(scope.macro->tokens) == 1) {
+                               struct psi_token *t;
+
+                               if (psi_plist_get(scope.macro->tokens, 0, &t)) {
+                                       if (t->type == PSI_T_QUOTED_STRING) {
+                                               struct psi_impl_type *type;
+                                               struct psi_impl_def_val *def;
+                                               struct psi_const *cnst;
+                                               char *name_str = malloc(name->len + sizeof("psi\\"));
+
+                                               strcat(strcpy(name_str, "psi\\"), name->val);
+                                               type = psi_impl_type_init(PSI_T_STRING, "string");
+                                               def = psi_impl_def_val_init(PSI_T_QUOTED_STRING, t->text);
+                                               cnst = psi_const_init(type, name_str, def);
+                                               P->consts = psi_plist_add(P->consts, &cnst);
+                                               free(name_str);
+                                       }
+                               }
+                       }
+               }
+       }
+       ZEND_HASH_FOREACH_END();
+
+       P->flags = flags;
+
+       psi_validate_scope_dtor(&scope);
+}
+
 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
 {
        struct psi_plist *scanned, *preproc;
@@ -206,7 +268,7 @@ bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
                return false;
        }
 
-       if (!(preproc = psi_parser_preprocess(P, scanned))) {
+       if (!(preproc = psi_parser_preprocess(P, &scanned))) {
                psi_plist_free(scanned);
                return false;
        }
@@ -216,6 +278,8 @@ bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
                return false;
        }
 
+       psi_parser_postprocess(P);
+
        psi_plist_free(preproc);
        return true;
 }
@@ -250,7 +314,7 @@ void psi_parser_free(struct psi_parser **P)
        }
 
 union int_suffix {
-       char s[SIZEOF_UINT32_T];
+       char s[4];
        uint32_t i;
 };
 
@@ -260,16 +324,23 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
        struct psi_token *token;
        const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
        unsigned parens;
+       bool escaped;
+       token_t char_width;
+
+       PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file);
 
        tok = mrk = eol = cur = I->buffer;
        lim = I->buffer + I->length;
        I->lines = 1;
-       tokens = psi_plist_init((void (*)(void *)) psi_token_free);
+       tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
 
        start: ;
+               char_width = 1;
                ctxmrk = NULL;
                tok = cur;
 
+               (void) ctxmrk;
+
                /*!re2c
 
                re2c:indent:top = 2;
@@ -282,36 +353,23 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                re2c:yyfill:parameter = 0;
 
                W = [a-zA-Z0-9_\x80-\xff];
-               SP = [ \t];
+               SP = [ \t\f];
                EOL = [\r\n];
-               NAME = [a-zA-Z_\x80-\xff]W*;
+               NAME = [a-zA-Z_\x80-\xff] W*;
                NSNAME = (NAME)? ("\\" NAME)+;
                DOLLAR_NAME = '$' W+;
-               QUOTED_STRING = "L"? "\"" ([^"])+ "\"";
-               QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'";
                CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
-               CPP_ATTRIBUTE = "__attribute__" W* "((";
-               CPP_PRAGMA_ONCE = "pragma" W+ "once";
+               CPP_ATTRIBUTE = "__attribute__" SP* "((";
 
                DEC_CONST = [1-9] [0-9]*;
                OCT_CONST = "0" [0-7]*;
                HEX_CONST = '0x' [0-9a-fA-F]+;
                INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
-               INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?;
-               INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?;
 
-               FLT_HEX_FRAC = [0-9a-fA-F]*;
-               FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?;
-               FLT_HEX_EXPO = 'p' [+-]? [0-9]+;
-               FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO;
+               FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
                FLT_DEC_NUM = "0" | DEC_CONST;
-               FLT_DEC_FRAC = [0-9]*;
-               FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?;
-               FLT_DEC_EXPO = 'e' [+-]? [0-9]+;
-               FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM "." FLT_DEC_FRAC) | ("." [0-9]+);
+               FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
                FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
-               FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l'));
-               FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?;
 
                [+-]? INT_CONST                                         { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
                [+-]? INT_CONST / 'u'                           { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
@@ -326,8 +384,17 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                [+-]? FLT_CONST / 'dd'                  { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
                [+-]? FLT_CONST / 'dl'                  { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
 
+               "'"                             { escaped = false; tok += 1; goto character; }
+               "\""                    { escaped = false; tok += 1; goto string; }
+               "u8" / "\""             { char_width = 1; }
+               "u" / ['"]              { char_width = 2; }
+               "U" / ['"]              { char_width = 4; }
+               "L" / ['"]              { char_width = sizeof(wchar_t)/8; }
+
                "/*"                    { goto comment; }
                "//"                    { goto comment_sl; }
+
+               "##"                    { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
                "#"                             { NEWTOKEN(PSI_T_HASH); goto start; }
                "("                             { NEWTOKEN(PSI_T_LPAREN); goto start; }
                ")"                             { NEWTOKEN(PSI_T_RPAREN); goto start; }
@@ -364,7 +431,29 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                "..."                   { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
                "?"                             { NEWTOKEN(PSI_T_IIF); goto start; }
                "pragma"                { NEWTOKEN(PSI_T_PRAGMA); goto start; }
-               "once"                  { NEWTOKEN(PSI_T_ONCE); goto start; }
+               "pragma" W+ "once"      { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
+               "__inline"              { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
+               "__restrict"    { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
+               "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
+               "__asm__"               { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
+               "volatile"              { NEWTOKEN(PSI_T_VOLATILE); goto start; }
+               "sizeof"                { NEWTOKEN(PSI_T_SIZEOF); goto start; }
+               "line"                  { NEWTOKEN(PSI_T_LINE); goto start; }
+               "typedef"               { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
+               "struct"                { NEWTOKEN(PSI_T_STRUCT); goto start; }
+               "union"                 { NEWTOKEN(PSI_T_UNION); goto start; }
+               "enum"                  { NEWTOKEN(PSI_T_ENUM); goto start; }
+               "const"                 { NEWTOKEN(PSI_T_CONST); goto start; }
+               "void"                  { NEWTOKEN(PSI_T_VOID); goto start; }
+               "bool"                  { NEWTOKEN(PSI_T_BOOL); goto start; }
+               "char"                  { NEWTOKEN(PSI_T_CHAR); goto start; }
+               "short"                 { NEWTOKEN(PSI_T_SHORT); goto start; }
+               "int"                   { NEWTOKEN(PSI_T_INT); goto start; }
+               "long"                  { NEWTOKEN(PSI_T_LONG); goto start; }
+               "float"                 { NEWTOKEN(PSI_T_FLOAT); goto start; }
+               "double"                { NEWTOKEN(PSI_T_DOUBLE); goto start; }
+               "unsigned"              { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
+               "signed"                { NEWTOKEN(PSI_T_SIGNED); goto start; }
                'IF'                    { NEWTOKEN(PSI_T_IF); goto start; }
                'IFDEF'                 { NEWTOKEN(PSI_T_IFDEF); goto start; }
                'IFNDEF'                { NEWTOKEN(PSI_T_IFNDEF); goto start; }
@@ -383,41 +472,19 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                'NULL'                  { NEWTOKEN(PSI_T_NULL); goto start; }
                'MIXED'                 { NEWTOKEN(PSI_T_MIXED); goto start; }
                'CALLABLE'              { NEWTOKEN(PSI_T_CALLABLE); goto start; }
-               'VOID'                  { NEWTOKEN(PSI_T_VOID); goto start; }
-               'BOOL'                  { NEWTOKEN(PSI_T_BOOL); goto start; }
-               'CHAR'                  { NEWTOKEN(PSI_T_CHAR); goto start; }
-               'SHORT'                 { NEWTOKEN(PSI_T_SHORT); goto start; }
-               'INT'                   { NEWTOKEN(PSI_T_INT); goto start; }
-               'LONG'                  { NEWTOKEN(PSI_T_LONG); goto start; }
-               'FLOAT'                 { NEWTOKEN(PSI_T_FLOAT); goto start; }
-               'DOUBLE'                { NEWTOKEN(PSI_T_DOUBLE); goto start; }
-               'INT8_T'                { NEWTOKEN(PSI_T_INT8); goto start; }
-               'UINT8_T'               { NEWTOKEN(PSI_T_UINT8); goto start; }
-               'INT16_T'               { NEWTOKEN(PSI_T_INT16); goto start; }
-               'UINT16_T'              { NEWTOKEN(PSI_T_UINT16); goto start; }
-               'INT32_T'               { NEWTOKEN(PSI_T_INT32); goto start; }
-               'UINT32_T'              { NEWTOKEN(PSI_T_UINT32); goto start; }
-               'INT64_T'               { NEWTOKEN(PSI_T_INT64); goto start; }
-               'UINT64_T'              { NEWTOKEN(PSI_T_UINT64); goto start; }
-               'UNSIGNED'              { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
-               'SIGNED'                { NEWTOKEN(PSI_T_SIGNED); goto start; }
                'STRING'                { NEWTOKEN(PSI_T_STRING); goto start; }
                'ARRAY'                 { NEWTOKEN(PSI_T_ARRAY); goto start; }
                'OBJECT'                { NEWTOKEN(PSI_T_OBJECT); goto start; }
                'CALLBACK'              { NEWTOKEN(PSI_T_CALLBACK); goto start; }
                'STATIC'                { NEWTOKEN(PSI_T_STATIC); goto start; }
                'FUNCTION'              { NEWTOKEN(PSI_T_FUNCTION); goto start; }
-               'TYPEDEF'               { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
-               'STRUCT'                { NEWTOKEN(PSI_T_STRUCT); goto start; }
-               'UNION'                 { NEWTOKEN(PSI_T_UNION); goto start; }
-               'ENUM'                  { NEWTOKEN(PSI_T_ENUM); goto start; }
-               'CONST'                 { NEWTOKEN(PSI_T_CONST); goto start; }
                'LIB'                   { NEWTOKEN(PSI_T_LIB); goto start; }
                'LET'                   { NEWTOKEN(PSI_T_LET); goto start; }
                'SET'                   { NEWTOKEN(PSI_T_SET); goto start; }
                'PRE_ASSERT'    { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
                'POST_ASSERT'   { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
                'RETURN'                { NEWTOKEN(PSI_T_RETURN); goto start; }
+               'AS'                    { NEWTOKEN(PSI_T_AS); goto start; }
                'FREE'                  { NEWTOKEN(PSI_T_FREE); goto start; }
                'TEMP'                  { NEWTOKEN(PSI_T_TEMP); goto start; }
                'STRLEN'                { NEWTOKEN(PSI_T_STRLEN); goto start; }
@@ -440,9 +507,7 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
                NAME                    { NEWTOKEN(PSI_T_NAME); goto start; }
                NSNAME                  { NEWTOKEN(PSI_T_NSNAME); goto start; }
                DOLLAR_NAME             { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
-               QUOTED_STRING   { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
-               QUOTED_CHAR             { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; }
-               CPP_HEADER              { NEWTOKEN(PSI_T_CPP_HEADER); goto start; }
+               CPP_HEADER              { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
                CPP_ATTRIBUTE   { parens = 2; goto cpp_attribute; }
                EOL                             { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
                SP+                             { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
@@ -451,6 +516,46 @@ struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input
 
                */
 
+       character: ;
+               /*!re2c
+
+               EOL             { NEWLINE(); goto character; }
+               "\\"    { escaped = !escaped;  goto character; }
+               "'"             {
+                       if (escaped) {
+                               escaped = false;
+                               goto character;
+                       }
+                       cur -= 1;
+                       NEWTOKEN(PSI_T_QUOTED_CHAR);
+                       cur += 1;
+                       token->flags = char_width;
+                       goto start;
+               }
+               *               { escaped = false; goto character; }
+
+               */
+
+       string: ;
+               /*!re2c
+
+               EOL             { NEWLINE(); goto string; }
+               "\\"    { escaped = !escaped; goto string; }
+               "\""    {
+                       if (escaped) {
+                               escaped = false;
+                               goto string;
+                       }
+                       cur -= 1;
+                       NEWTOKEN(PSI_T_QUOTED_STRING);
+                       cur += 1;
+                       token->flags = char_width;
+                       goto start;
+               }
+               *               { escaped = false; goto string; }
+
+               */
+
        comment: ;
                /*!re2c