cpp: fix relative includes
[m6w6/ext-psi] / src / parser.re
index b56385d49e0f42c5e7a4d4ed32b6485f9eeb6c22..fb4e35bb3933e3fa4847ad04eb0f98d89bf2e12e 100644 (file)
@@ -1,13 +1,39 @@
+/*******************************************************************************
+ Copyright (c) 2016, Michael Wallner <mike@php.net>.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+     * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+     * Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
 #include "php_psi_stdinc.h"
 #include <sys/mman.h>
 #include <assert.h>
+#include <stdarg.h>
 
 #include "parser.h"
 
-void *psi_parser_proc_init(void);
-void psi_parser_proc_free(void **parser_proc);
-void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser);
-void psi_parser_proc_trace(FILE *out, char *prefix);
+/*!max:re2c*/
+#ifndef YYMAXFILL
+# define YYMAXFILL 256
+#endif
 
 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
 {
@@ -18,171 +44,185 @@ struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, uns
 
        psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
 
-       P->col = 1;
-       P->line = 1;
-       P->proc = psi_parser_proc_init();
+       P->preproc = psi_cpp_init(P);
 
-       if (flags & PSI_DEBUG) {
-               psi_parser_proc_trace(stderr, "PSI> ");
-       }
+       psi_cpp_load_defaults(P->preproc);
 
        return P;
 }
 
-bool psi_parser_open_file(struct psi_parser *P, const char *filename)
+struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
 {
-       FILE *fp = fopen(filename, "r");
-
-       if (!fp) {
-               P->error(PSI_DATA(P), NULL, PSI_WARNING,
-                               "Could not open '%s' for reading: %s",
-                               filename, strerror(errno));
-               return false;
+       struct stat sb;
+       FILE *fp;
+       struct psi_parser_input *fb;
+
+       if (stat(filename, &sb)) {
+               if (report_errors) {
+                       P->error(PSI_DATA(P), NULL, PSI_WARNING,
+                                       "Could not stat '%s': %s",
+                                       filename, strerror(errno));
+               }
+               return NULL;
        }
 
-       P->input.type = PSI_PARSE_FILE;
-       P->input.data.file.handle = fp;
-
-#if HAVE_MMAP
-       struct stat sb;
-       int fd = fileno(fp);
+       if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
+               if (report_errors) {
+                       P->error(PSI_DATA(P), NULL, PSI_WARNING,
+                                       "Could not allocate %zu bytes for reading '%s': %s",
+                                       sb.st_size + YYMAXFILL, filename, strerror(errno));
+               }
+               return NULL;
+       }
 
-       if (fstat(fd, &sb)) {
-               P->error(PSI_DATA(P), NULL, PSI_WARNING,
-                               "Could not stat '%s': %s",
-                               filename, strerror(errno));
-               return false;
+       if (!(fp = fopen(filename, "r"))) {
+               free(fb);
+               if (report_errors) {
+                       P->error(PSI_DATA(P), NULL, PSI_WARNING,
+                                       "Could not open '%s' for reading: %s",
+                                       filename, strerror(errno));
+               }
+               return NULL;
        }
 
-       P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
-       if (MAP_FAILED == P->input.data.file.buffer) {
-               P->error(PSI_DATA(P), NULL, PSI_WARNING,
-                               "Could not map '%s' for reading: %s",
-                               filename, strerror(errno));
-               return false;
+       if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
+               free(fb);
+               fclose(fp);
+               if (report_errors) {
+                       P->error(PSI_DATA(P), NULL, PSI_WARNING,
+                                       "Could not read %zu bytes from '%s': %s",
+                                       sb.st_size + YYMAXFILL, filename, strerror(errno));
+               }
+               return NULL;
        }
-       P->input.data.file.length = sb.st_size;
-#else
-       P->input.data.file.buffer = malloc(BSIZE);
-#endif
 
-       P->file.fn = strdup(filename);
+       memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
+       fb->length = sb.st_size;
+       fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
+       memcpy(fb->file, filename, strlen(filename) + 1);
 
-       return true;
+       return fb;
 }
 
-bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
+struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
 {
-       P->input.type = PSI_PARSE_STRING;
-       P->input.data.string.length = length;
-       if (!(P->input.data.string.buffer = strndup(string, length))) {
-               return false;
+       struct psi_parser_input *sb;
+
+       if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
+               P->error(PSI_DATA(P), NULL, PSI_WARNING,
+                               "Could not allocate %zu bytes: %s",
+                               length + YYMAXFILL, strerror(errno));
+               return NULL;
        }
 
-       P->file.fn = strdup("<input>");
+       memcpy(sb->buffer, string, length);
+       memset(sb->buffer + length, 0, YYMAXFILL);
 
-       return true;
+       sb->length = length;
+       sb->file = &sb->buffer[length + YYMAXFILL];
+       memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
+
+       return sb;
 }
 
-static ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
+#if 0
+static void psi_parser_register_constants(struct psi_parser *P)
 {
-       PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type);
-
-       /* init if n==0 */
-       if (!n) {
-               switch (P->input.type) {
-               case PSI_PARSE_FILE:
-                       P->cur = P->tok = P->mrk = P->input.data.file.buffer;
-#if HAVE_MMAP
-                       P->eof = P->input.data.file.buffer + P->input.data.file.length;
-                       P->lim = P->eof;
-#else
-                       P->eof = NULL;
-                       P->lim = P->input.data.file.buffer;
-#endif
+       zend_string *key;
+       zval *val;
+
+       ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
+       {
+               struct psi_impl_def_val *iv;
+               struct psi_const_type *ct;
+               struct psi_const *c;
+               const char *ctn;
+               token_t ctt;
+               impl_val tmp;
+               zend_string *str;
+
+               ZVAL_DEREF(val);
+               switch (Z_TYPE_P(val)) {
+               case IS_TRUE:
+               case IS_FALSE:
+                       ctt = PSI_T_BOOL;
+                       ctn = "bool";
+                       tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
                        break;
-
-               case PSI_PARSE_STRING:
-                       P->cur = P->tok = P->mrk = P->input.data.string.buffer;
-                       P->eof = P->input.data.string.buffer + P->input.data.string.length;
-                       P->lim = P->eof;
+               case IS_LONG:
+                       ctt = PSI_T_INT;
+                       ctn = "int";
+                       tmp.zend.lval = Z_LVAL_P(val);
+                       break;
+               case IS_DOUBLE:
+                       ctt = PSI_T_FLOAT;
+                       ctn = "float";
+                       tmp.dval = Z_DVAL_P(val);
+                       break;
+               default:
+                       ctt = PSI_T_STRING;
+                       ctn = "string";
+                       str = zval_get_string(val);
+                       tmp.zend.str = zend_string_dup(str, 1);
+                       zend_string_release(str);
                        break;
                }
 
-               PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof);
-       }
-
-       switch (P->input.type) {
-       case PSI_PARSE_STRING:
-               break;
-
-       case PSI_PARSE_FILE:
-#if !HAVE_MMAP
-               if (!P->eof) {
-                       size_t consumed = P->tok - P->buf;
-                       size_t reserved = P->lim - P->tok;
-                       size_t available = BSIZE - reserved;
-                       size_t didread;
-
-                       if (consumed) {
-                               memmove(P->buf, P->tok, reserved);
-                               P->tok -= consumed;
-                               P->cur -= consumed;
-                               P->lim -= consumed;
-                               P->mrk -= consumed;
-                       }
-
-                       didread = fread(P->lim, 1, available, P->fp);
-                       P->lim += didread;
-                       if (didread < available) {
-                               P->eof = P->lim;
-                       }
-                       PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
-                                       consumed, reserved, available, didread);
+               iv = psi_impl_def_val_init(ctt, NULL);
+               iv->ival = tmp;
+               ct = psi_const_type_init(ctt, ctn);
+               c = psi_const_init(ct, key->val, iv);
+               if (!P->consts) {
+                       P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
                }
-#endif
-               break;
+               P->consts = psi_plist_add(P->consts, &c);
        }
+       ZEND_HASH_FOREACH_END();
+}
+#endif
 
-       PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur);
-
-       return P->lim - P->cur;
+struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist **tokens)
+{
+       if (psi_cpp_process(P->preproc, tokens)) {
+               return *tokens;
+       }
+       return NULL;
 }
 
-void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
+bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
 {
-       if (T) {
-               psi_parser_proc_parse(P->proc, T->type, T, P);
-       } else {
-               psi_parser_proc_parse(P->proc, 0, NULL, P);
+       if (psi_plist_count(tokens)) {
+               return 0 == psi_parser_proc_parse(P, tokens, processed);
        }
+       return true;
 }
 
-void psi_parser_dtor(struct psi_parser *P)
+bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
 {
-       psi_parser_proc_free(&P->proc);
-
-       switch (P->input.type) {
-       case PSI_PARSE_FILE:
-               if (P->input.data.file.buffer) {
-#if HAVE_MMAP
-                       munmap(P->input.data.file.buffer, P->input.data.file.length);
-#else
-                       free(P->input.data.file.buffer);
-#endif
-               }
-               if (P->input.data.file.handle) {
-                       fclose(P->input.data.file.handle);
-               }
-               break;
+       struct psi_plist *scanned, *preproc;
+       size_t processed = 0;
 
-       case PSI_PARSE_STRING:
-               if (P->input.data.string.buffer) {
-                       free(P->input.data.string.buffer);
-               }
-               break;
+       if (!(scanned = psi_parser_scan(P, I))) {
+               return false;
+       }
+
+       if (!(preproc = psi_parser_preprocess(P, &scanned))) {
+               psi_plist_free(scanned);
+               return false;
        }
 
+       if (!psi_parser_process(P, preproc, &processed)) {
+               psi_plist_free(preproc);
+               return false;
+       }
+
+       psi_plist_free(preproc);
+       return true;
+}
+
+void psi_parser_dtor(struct psi_parser *P)
+{
+       psi_cpp_free(&P->preproc);
        psi_data_dtor(PSI_DATA(P));
 
        memset(P, 0, sizeof(*P));
@@ -197,152 +237,300 @@ void psi_parser_free(struct psi_parser **P)
        }
 }
 
-/*!max:re2c*/
-#if BSIZE < YYMAXFILL
-# error BSIZE must be greater than YYMAXFILL
-#endif
-
-#define RETURN(t) do { \
-       P->num = t; \
-       PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
-                               P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
-                               P->file.fn, P->line, P->col); \
-       return t; \
-} while(1)
+#define NEWLINE() \
+       eol = cur; \
+       ++I->lines
 
-#define ADDCOLS \
-       P->col += P->cur - P->tok
+#define NEWTOKEN(t) \
+       token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
+       tokens = psi_plist_add(tokens, &token); \
+       if (P->flags & PSI_DEBUG) { \
+               fprintf(stderr, "PSI< "); \
+               psi_token_dump(2, token); \
+       }
 
-#define NEWLINE(label) \
-       P->col = 1; \
-       ++P->line; \
-       goto label
+union int_suffix {
+       char s[SIZEOF_UINT32_T];
+       uint32_t i;
+};
 
-token_t psi_parser_scan(struct psi_parser *P)
+struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
 {
-       if (!P->cur) {
-               psi_parser_fill(P, 0);
-       }
-       for (;;) {
-               ADDCOLS;
-       nextline:
-               P->tok = P->cur;
+       struct psi_plist *tokens;
+       struct psi_token *token;
+       const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
+       unsigned parens;
+       bool escaped;
+       token_t char_width;
+
+       PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file);
+
+       tok = mrk = eol = cur = I->buffer;
+       lim = I->buffer + I->length;
+       I->lines = 1;
+       tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
+
+       start: ;
+               char_width = 1;
+               ctxmrk = NULL;
+               tok = cur;
+
                /*!re2c
+
                re2c:indent:top = 2;
                re2c:define:YYCTYPE = "unsigned char";
-               re2c:define:YYCURSOR = P->cur;
-               re2c:define:YYLIMIT = P->lim;
-               re2c:define:YYMARKER = P->mrk;
-               re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
+               re2c:define:YYCURSOR = cur;
+               re2c:define:YYLIMIT = lim;
+               re2c:define:YYMARKER = mrk;
+               re2c:define:YYCTXMARKER = ctxmrk;
+               re2c:define:YYFILL = "if (cur >= lim) goto done;";
                re2c:yyfill:parameter = 0;
 
-               B = [^a-zA-Z0-9_];
-               W = [a-zA-Z0-9_];
-               NAME = [a-zA-Z_]W*;
+               W = [a-zA-Z0-9_\x80-\xff];
+               SP = [ \t\f];
+               EOL = [\r\n];
+               NAME = [a-zA-Z_\x80-\xff] W*;
                NSNAME = (NAME)? ("\\" NAME)+;
                DOLLAR_NAME = '$' W+;
-               QUOTED_STRING = "\"" ([^\"])+ "\"";
-               NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
-
-               "/*" { goto comment; }
-               ("#"|"//") .* "\n" { NEWLINE(nextline); }
-               "(" {RETURN(PSI_T_LPAREN);}
-               ")" {RETURN(PSI_T_RPAREN);}
-               ";" {RETURN(PSI_T_EOS);}
-               "," {RETURN(PSI_T_COMMA);}
-               ":" {RETURN(PSI_T_COLON);}
-               "{" {RETURN(PSI_T_LBRACE);}
-               "}" {RETURN(PSI_T_RBRACE);}
-               "[" {RETURN(PSI_T_LBRACKET);}
-               "]" {RETURN(PSI_T_RBRACKET);}
-               "=" {RETURN(PSI_T_EQUALS);}
-               "*" {RETURN(PSI_T_ASTERISK);}
-               "~" {RETURN(PSI_T_TILDE);}
-               "!" {RETURN(PSI_T_NOT);}
-               "%" {RETURN(PSI_T_MODULO);}
-               "&" {RETURN(PSI_T_AMPERSAND);}
-               "+" {RETURN(PSI_T_PLUS);}
-               "-" {RETURN(PSI_T_MINUS);}
-               "/" {RETURN(PSI_T_SLASH);}
-               "|" {RETURN(PSI_T_PIPE);}
-               "^" {RETURN(PSI_T_CARET);}
-               "<<" {RETURN(PSI_T_LSHIFT);}
-               ">>" {RETURN(PSI_T_RSHIFT);}
-               "..." {RETURN(PSI_T_ELLIPSIS);}
-               [\r\n] { NEWLINE(nextline); }
-               [\t ]+ { continue; }
-               'TRUE' {RETURN(PSI_T_TRUE);}
-               'FALSE' {RETURN(PSI_T_FALSE);}
-               'NULL' {RETURN(PSI_T_NULL);}
-               'MIXED' {RETURN(PSI_T_MIXED);}
-               'CALLABLE' {RETURN(PSI_T_CALLABLE);}
-               'VOID' {RETURN(PSI_T_VOID);}
-               'BOOL' {RETURN(PSI_T_BOOL);}
-               'CHAR' {RETURN(PSI_T_CHAR);}
-               'SHORT' {RETURN(PSI_T_SHORT);}
-               'INT' {RETURN(PSI_T_INT);}
-               'LONG' {RETURN(PSI_T_LONG);}
-               'FLOAT' {RETURN(PSI_T_FLOAT);}
-               'DOUBLE' {RETURN(PSI_T_DOUBLE);}
-               'INT8_T' {RETURN(PSI_T_INT8);}
-               'UINT8_T' {RETURN(PSI_T_UINT8);}
-               'INT16_T' {RETURN(PSI_T_INT16);}
-               'UINT16_T' {RETURN(PSI_T_UINT16);}
-               'INT32_T' {RETURN(PSI_T_INT32);}
-               'UINT32_T' {RETURN(PSI_T_UINT32);}
-               'INT64_T' {RETURN(PSI_T_INT64);}
-               'UINT64_T' {RETURN(PSI_T_UINT64);}
-               'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
-               'SIGNED' {RETURN(PSI_T_SIGNED);}
-               'STRING' {RETURN(PSI_T_STRING);}
-               'ARRAY' {RETURN(PSI_T_ARRAY);}
-               'OBJECT' {RETURN(PSI_T_OBJECT);}
-               'CALLBACK' {RETURN(PSI_T_CALLBACK);}
-               'STATIC' {RETURN(PSI_T_STATIC);}
-               'FUNCTION' {RETURN(PSI_T_FUNCTION);}
-               'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
-               'STRUCT' {RETURN(PSI_T_STRUCT);}
-               'UNION' {RETURN(PSI_T_UNION);}
-               'ENUM' {RETURN(PSI_T_ENUM);}
-               'CONST' {RETURN(PSI_T_CONST);}
-               'LIB' {RETURN(PSI_T_LIB);}
-               'LET' {RETURN(PSI_T_LET);}
-               'SET' {RETURN(PSI_T_SET);}
-               'RETURN' {RETURN(PSI_T_RETURN);}
-               'FREE' {RETURN(PSI_T_FREE);}
-               'TEMP' {RETURN(PSI_T_TEMP);}
-               'STRLEN' {RETURN(PSI_T_STRLEN);}
-               'STRVAL' {RETURN(PSI_T_STRVAL);}
-               'PATHVAL' {RETURN(PSI_T_PATHVAL);}
-               'INTVAL' {RETURN(PSI_T_INTVAL);}
-               'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
-               'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
-               'ARRVAL' {RETURN(PSI_T_ARRVAL);}
-               'OBJVAL' {RETURN(PSI_T_OBJVAL);}
-               'ZVAL' {RETURN(PSI_T_ZVAL);}
-               'COUNT' {RETURN(PSI_T_COUNT);}
-               'CALLOC' {RETURN(PSI_T_CALLOC);}
-               'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
-               'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
-               'TO_STRING' {RETURN(PSI_T_TO_STRING);}
-               'TO_INT' {RETURN(PSI_T_TO_INT);}
-               'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
-               'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
-               NUMBER {RETURN(PSI_T_NUMBER);}
-               NAME {RETURN(PSI_T_NAME);}
-               NSNAME {RETURN(PSI_T_NSNAME);}
-               DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
-               QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
-               [^] {break;}
+               CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
+               CPP_ATTRIBUTE = "__attribute__" SP* "((";
+
+               DEC_CONST = [1-9] [0-9]*;
+               OCT_CONST = "0" [0-7]*;
+               HEX_CONST = '0x' [0-9a-fA-F]+;
+               INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
+
+               FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
+               FLT_DEC_NUM = "0" | DEC_CONST;
+               FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
+               FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
+
+               [+-]? INT_CONST                                         { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
+               [+-]? INT_CONST / 'u'                           { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
+               [+-]? INT_CONST / 'l'                           { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
+               [+-]? INT_CONST / ('lu' | 'ul')         { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
+               [+-]? INT_CONST / ('llu' | 'ull')       { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
+
+               [+-]? FLT_CONST                                 { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
+               [+-]? FLT_CONST / 'f'                   { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
+               [+-]? FLT_CONST / 'l'                   { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
+               [+-]? FLT_CONST / 'df'                  { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
+               [+-]? FLT_CONST / 'dd'                  { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
+               [+-]? FLT_CONST / 'dl'                  { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
+
+               "'"                             { escaped = false; tok += 1; goto character; }
+               "\""                    { escaped = false; tok += 1; goto string; }
+               "u8" / "\""             { char_width = 1; }
+               "u" / ['"]              { char_width = 2; }
+               "U" / ['"]              { char_width = 4; }
+               "L" / ['"]              { char_width = SIZEOF_WCHAR_T/8; }
+
+               "/*"                    { goto comment; }
+               "//"                    { goto comment_sl; }
+
+               "##"                    { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
+               "#"                             { NEWTOKEN(PSI_T_HASH); goto start; }
+               "("                             { NEWTOKEN(PSI_T_LPAREN); goto start; }
+               ")"                             { NEWTOKEN(PSI_T_RPAREN); goto start; }
+               ";"                             { NEWTOKEN(PSI_T_EOS); goto start; }
+               ","                             { NEWTOKEN(PSI_T_COMMA); goto start; }
+               ":"                             { NEWTOKEN(PSI_T_COLON); goto start; }
+               "{"                             { NEWTOKEN(PSI_T_LBRACE); goto start; }
+               "}"                             { NEWTOKEN(PSI_T_RBRACE); goto start; }
+               "["                             { NEWTOKEN(PSI_T_LBRACKET); goto start; }
+               "]"                             { NEWTOKEN(PSI_T_RBRACKET); goto start; }
+               "!="                    { NEWTOKEN(PSI_T_CMP_NE); goto start; }
+               "=="                    { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
+               "&&"                    { NEWTOKEN(PSI_T_AND); goto start; }
+               "||"                    { NEWTOKEN(PSI_T_OR); goto start; }
+               "="                             { NEWTOKEN(PSI_T_EQUALS); goto start; }
+               "*"                             { NEWTOKEN(PSI_T_ASTERISK); goto start; }
+               "~"                             { NEWTOKEN(PSI_T_TILDE); goto start; }
+               "!"                             { NEWTOKEN(PSI_T_NOT); goto start; }
+               "%"                             { NEWTOKEN(PSI_T_MODULO); goto start; }
+               "&"                             { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
+               "+"                             { NEWTOKEN(PSI_T_PLUS); goto start; }
+               "-"                             { NEWTOKEN(PSI_T_MINUS); goto start; }
+               "/"                             { NEWTOKEN(PSI_T_SLASH); goto start; }
+               "\\"                    { NEWTOKEN(PSI_T_BSLASH); goto start; }
+               "|"                             { NEWTOKEN(PSI_T_PIPE); goto start; }
+               "^"                             { NEWTOKEN(PSI_T_CARET); goto start; }
+               "<<"                    { NEWTOKEN(PSI_T_LSHIFT); goto start; }
+               ">>"                    { NEWTOKEN(PSI_T_RSHIFT); goto start; }
+               "<="                    { NEWTOKEN(PSI_T_CMP_LE); goto start; }
+               ">="                    { NEWTOKEN(PSI_T_CMP_GE); goto start; }
+               "<"                             { NEWTOKEN(PSI_T_LCHEVR); goto start; }
+               ">"                             { NEWTOKEN(PSI_T_RCHEVR); goto start; }
+               "."                             { NEWTOKEN(PSI_T_PERIOD); goto start; }
+               "..."                   { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
+               "?"                             { NEWTOKEN(PSI_T_IIF); goto start; }
+               "pragma"                { NEWTOKEN(PSI_T_PRAGMA); goto start; }
+               "pragma" W+ "once"      { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
+               "__restrict"    { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
+               "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
+               "__asm__"               { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
+               "line"                  { NEWTOKEN(PSI_T_LINE); goto start; }
+               "typedef"               { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
+               "struct"                { NEWTOKEN(PSI_T_STRUCT); goto start; }
+               "union"                 { NEWTOKEN(PSI_T_UNION); goto start; }
+               "enum"                  { NEWTOKEN(PSI_T_ENUM); goto start; }
+               "const"                 { NEWTOKEN(PSI_T_CONST); goto start; }
+               "void"                  { NEWTOKEN(PSI_T_VOID); goto start; }
+               "bool"                  { NEWTOKEN(PSI_T_BOOL); goto start; }
+               "char"                  { NEWTOKEN(PSI_T_CHAR); goto start; }
+               "short"                 { NEWTOKEN(PSI_T_SHORT); goto start; }
+               "int"                   { NEWTOKEN(PSI_T_INT); goto start; }
+               "long"                  { NEWTOKEN(PSI_T_LONG); goto start; }
+               "float"                 { NEWTOKEN(PSI_T_FLOAT); goto start; }
+               "double"                { NEWTOKEN(PSI_T_DOUBLE); goto start; }
+               "int8_t"                { NEWTOKEN(PSI_T_INT8); goto start; }
+               "uint8_t"               { NEWTOKEN(PSI_T_UINT8); goto start; }
+               "int16_t"               { NEWTOKEN(PSI_T_INT16); goto start; }
+               "uint16_t"              { NEWTOKEN(PSI_T_UINT16); goto start; }
+               "int32_t"               { NEWTOKEN(PSI_T_INT32); goto start; }
+               "uint32_t"              { NEWTOKEN(PSI_T_UINT32); goto start; }
+               "int64_t"               { NEWTOKEN(PSI_T_INT64); goto start; }
+               "uint64_t"              { NEWTOKEN(PSI_T_UINT64); goto start; }
+               "unsigned"              { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
+               "signed"                { NEWTOKEN(PSI_T_SIGNED); goto start; }
+               'IF'                    { NEWTOKEN(PSI_T_IF); goto start; }
+               'IFDEF'                 { NEWTOKEN(PSI_T_IFDEF); goto start; }
+               'IFNDEF'                { NEWTOKEN(PSI_T_IFNDEF); goto start; }
+               'ELSE'                  { NEWTOKEN(PSI_T_ELSE); goto start; }
+               'ELIF'                  { NEWTOKEN(PSI_T_ELIF); goto start; }
+               'ENDIF'                 { NEWTOKEN(PSI_T_ENDIF); goto start; }
+               'DEFINE'                { NEWTOKEN(PSI_T_DEFINE); goto start; }
+               'DEFINED'               { NEWTOKEN(PSI_T_DEFINED); goto start; }
+               'UNDEF'                 { NEWTOKEN(PSI_T_UNDEF); goto start; }
+               'WARNING'               { NEWTOKEN(PSI_T_WARNING); goto start; }
+               'ERROR'                 { NEWTOKEN(PSI_T_ERROR); goto start; }
+               'INCLUDE'               { NEWTOKEN(PSI_T_INCLUDE); goto start; }
+               'INCLUDE_NEXT'  { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
+               'TRUE'                  { NEWTOKEN(PSI_T_TRUE); goto start; }
+               'FALSE'                 { NEWTOKEN(PSI_T_FALSE); goto start; }
+               'NULL'                  { NEWTOKEN(PSI_T_NULL); goto start; }
+               'MIXED'                 { NEWTOKEN(PSI_T_MIXED); goto start; }
+               'CALLABLE'              { NEWTOKEN(PSI_T_CALLABLE); goto start; }
+               'STRING'                { NEWTOKEN(PSI_T_STRING); goto start; }
+               'ARRAY'                 { NEWTOKEN(PSI_T_ARRAY); goto start; }
+               'OBJECT'                { NEWTOKEN(PSI_T_OBJECT); goto start; }
+               'CALLBACK'              { NEWTOKEN(PSI_T_CALLBACK); goto start; }
+               'STATIC'                { NEWTOKEN(PSI_T_STATIC); goto start; }
+               'FUNCTION'              { NEWTOKEN(PSI_T_FUNCTION); goto start; }
+               'LIB'                   { NEWTOKEN(PSI_T_LIB); goto start; }
+               'LET'                   { NEWTOKEN(PSI_T_LET); goto start; }
+               'SET'                   { NEWTOKEN(PSI_T_SET); goto start; }
+               'PRE_ASSERT'    { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
+               'POST_ASSERT'   { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
+               'RETURN'                { NEWTOKEN(PSI_T_RETURN); goto start; }
+               'FREE'                  { NEWTOKEN(PSI_T_FREE); goto start; }
+               'TEMP'                  { NEWTOKEN(PSI_T_TEMP); goto start; }
+               'STRLEN'                { NEWTOKEN(PSI_T_STRLEN); goto start; }
+               'STRVAL'                { NEWTOKEN(PSI_T_STRVAL); goto start; }
+               'PATHVAL'               { NEWTOKEN(PSI_T_PATHVAL); goto start; }
+               'INTVAL'                { NEWTOKEN(PSI_T_INTVAL); goto start; }
+               'FLOATVAL'              { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
+               'BOOLVAL'               { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
+               'ARRVAL'                { NEWTOKEN(PSI_T_ARRVAL); goto start; }
+               'OBJVAL'                { NEWTOKEN(PSI_T_OBJVAL); goto start; }
+               'ZVAL'                  { NEWTOKEN(PSI_T_ZVAL); goto start; }
+               'COUNT'                 { NEWTOKEN(PSI_T_COUNT); goto start; }
+               'CALLOC'                { NEWTOKEN(PSI_T_CALLOC); goto start; }
+               'TO_OBJECT'             { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
+               'TO_ARRAY'              { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
+               'TO_STRING'             { NEWTOKEN(PSI_T_TO_STRING); goto start; }
+               'TO_INT'                { NEWTOKEN(PSI_T_TO_INT); goto start; }
+               'TO_FLOAT'              { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
+               'TO_BOOL'               { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
+               NAME                    { NEWTOKEN(PSI_T_NAME); goto start; }
+               NSNAME                  { NEWTOKEN(PSI_T_NSNAME); goto start; }
+               DOLLAR_NAME             { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
+               CPP_HEADER              { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
+               CPP_ATTRIBUTE   { parens = 2; goto cpp_attribute; }
+               EOL                             { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
+               SP+                             { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
+               [^]                             { NEWTOKEN(-2); goto error; }
+               *                               { NEWTOKEN(-1); goto error; }
+
                */
 
-       comment:
-               P->tok = P->cur;
+       character: ;
                /*!re2c
-               "\n" { NEWLINE(comment); }
-               "*" "/" { continue; }
-               [^] { goto comment; }
+
+               EOL             { NEWLINE(); goto character; }
+               "\\"    { escaped = !escaped;  goto character; }
+               "'"             {
+                       if (escaped) {
+                               escaped = false;
+                               goto character;
+                       }
+                       cur -= 1;
+                       NEWTOKEN(PSI_T_QUOTED_CHAR);
+                       cur += 1;
+                       token->flags = char_width;
+                       goto start;
+               }
+               *               { escaped = false; goto character; }
+
                */
-       }
-       return -1;
+
+       string: ;
+               /*!re2c
+
+               EOL             { NEWLINE(); goto string; }
+               "\\"    { escaped = !escaped; goto string; }
+               "\""    {
+                       if (escaped) {
+                               escaped = false;
+                               goto string;
+                       }
+                       cur -= 1;
+                       NEWTOKEN(PSI_T_QUOTED_STRING);
+                       cur += 1;
+                       token->flags = char_width;
+                       goto start;
+               }
+               *               { escaped = false; goto string; }
+
+               */
+
+       comment: ;
+               /*!re2c
+
+               EOL             { NEWLINE(); goto comment; }
+               "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
+                *              { goto comment; }
+
+               */
+
+       comment_sl: ;
+               /*!re2c
+
+               EOL     { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
+               *       { goto comment_sl; }
+
+               */
+
+       cpp_attribute: ;
+
+               /*!re2c
+
+               "("     { ++parens; goto cpp_attribute; }
+               ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
+               EOL     { NEWLINE(); goto cpp_attribute; }
+                *      { goto cpp_attribute; }
+
+               */
+error: ;
+
+       P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
+                       token->type, token->size, token->text, tok - eol + 1);
+       psi_plist_free(tokens);
+       return NULL;
+
+done:
+
+       PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
+
+       return tokens;
 }