d2edd5ea15cd69de7ddadbe6b6322162b82c3c2e
[m6w6/ext-psi] / src / parser.re
1 #include "php_psi_stdinc.h"
2 #include <assert.h>
3
4 #include "parser.h"
5
6 void *psi_parser_proc_Alloc(void*(unsigned long));
7 void psi_parser_proc_Free(void*, void(*)(void*));
8 void psi_parser_proc_(void *, token_t, struct psi_token *, struct psi_parser *);
9 void psi_parser_proc_Trace(FILE *, const char*);
10
11 struct psi_parser *psi_parser_init(struct psi_parser *P, const char *filename, psi_error_cb error, unsigned flags)
12 {
13 FILE *fp;
14
15 fp = fopen(filename, "r");
16
17 if (!fp) {
18 if (!(flags & PSI_SILENT)) {
19 error(NULL, NULL, PSI_WARNING, "Could not open '%s' for reading: %s",
20 filename, strerror(errno));
21 }
22 return NULL;
23 }
24
25 if (!P) {
26 P = malloc(sizeof(*P));
27 }
28 memset(P, 0, sizeof(*P));
29
30 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
31 P->file.fn = strdup(filename);
32 P->fp = fp;
33 P->col = 1;
34 P->line = 1;
35 P->proc = psi_parser_proc_Alloc(malloc);
36
37 if (flags & PSI_DEBUG) {
38 psi_parser_proc_Trace(stderr, "PSI> ");
39 }
40
41 psi_parser_fill(P, 0);
42
43 return P;
44 }
45
46 ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
47 {
48 if (P->flags & PSI_DEBUG) {
49 fprintf(stderr, "PSI> Fill: n=%zu\n", n);
50 }
51 if (!n) {
52 P->cur = P->tok = P->lim = P->mrk = P->buf;
53 P->eof = NULL;
54 }
55
56 if (!P->eof) {
57 size_t consumed = P->tok - P->buf;
58 size_t reserved = P->lim - P->tok;
59 size_t available = BSIZE - reserved;
60 size_t didread;
61
62 if (consumed) {
63 memmove(P->buf, P->tok, reserved);
64 P->tok -= consumed;
65 P->cur -= consumed;
66 P->lim -= consumed;
67 P->mrk -= consumed;
68 }
69
70 didread = fread(P->lim, 1, available, P->fp);
71 P->lim += didread;
72 if (didread < available) {
73 P->eof = P->lim;
74 }
75
76 if (P->flags & PSI_DEBUG) {
77 fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
78 consumed, reserved, available, didread);
79 }
80 }
81 if (P->flags & PSI_DEBUG) {
82 fprintf(stderr, "PSI> Fill: avail=%td\n", P->lim - P->cur);
83 }
84 return P->lim - P->cur;
85 }
86
87 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
88 {
89 if (T) {
90 psi_parser_proc_(P->proc, T->type, T, P);
91 } else {
92 psi_parser_proc_(P->proc, 0, NULL, P);
93 }
94 }
95
96 void psi_parser_dtor(struct psi_parser *P)
97 {
98 psi_parser_proc_Free(P->proc, free);
99
100 if (P->fp) {
101 fclose(P->fp);
102 }
103
104 psi_data_dtor(PSI_DATA(P));
105
106 memset(P, 0, sizeof(*P));
107 }
108
109 void psi_parser_free(struct psi_parser **P)
110 {
111 if (*P) {
112 psi_parser_dtor(*P);
113 free(*P);
114 *P = NULL;
115 }
116 }
117
118 /*!max:re2c*/
119 #define BSIZE 256
120
121 #if BSIZE < YYMAXFILL
122 # error BSIZE must be greater than YYMAXFILL
123 #endif
124
125 #define RETURN(t) do { \
126 P->num = t; \
127 if (P->flags & PSI_DEBUG) { \
128 fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
129 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
130 P->file.fn, P->line, P->col); \
131 } \
132 return t; \
133 } while(1)
134
135 #define ADDCOLS \
136 P->col += P->cur - P->tok
137
138 #define NEWLINE(label) \
139 P->col = 1; \
140 ++P->line; \
141 goto label
142
143 token_t psi_parser_scan(struct psi_parser *P)
144 {
145 for (;;) {
146 ADDCOLS;
147 nextline:
148 P->tok = P->cur;
149 /*!re2c
150 re2c:indent:top = 2;
151 re2c:define:YYCTYPE = "unsigned char";
152 re2c:define:YYCURSOR = P->cur;
153 re2c:define:YYLIMIT = P->lim;
154 re2c:define:YYMARKER = P->mrk;
155 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
156 re2c:yyfill:parameter = 0;
157
158 B = [^a-zA-Z0-9_];
159 W = [a-zA-Z0-9_];
160 NAME = [a-zA-Z_]W*;
161 NSNAME = (NAME)? ("\\" NAME)+;
162 DOLLAR_NAME = '$' W+;
163 QUOTED_STRING = "\"" ([^\"])+ "\"";
164 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
165
166 "/*" { goto comment; }
167 ("#"|"//") .* "\n" { NEWLINE(nextline); }
168 "(" {RETURN(PSI_T_LPAREN);}
169 ")" {RETURN(PSI_T_RPAREN);}
170 ";" {RETURN(PSI_T_EOS);}
171 "," {RETURN(PSI_T_COMMA);}
172 ":" {RETURN(PSI_T_COLON);}
173 "{" {RETURN(PSI_T_LBRACE);}
174 "}" {RETURN(PSI_T_RBRACE);}
175 "[" {RETURN(PSI_T_LBRACKET);}
176 "]" {RETURN(PSI_T_RBRACKET);}
177 "=" {RETURN(PSI_T_EQUALS);}
178 "*" {RETURN(PSI_T_ASTERISK);}
179 "&" {RETURN(PSI_T_AMPERSAND);}
180 "+" {RETURN(PSI_T_PLUS);}
181 "-" {RETURN(PSI_T_MINUS);}
182 "/" {RETURN(PSI_T_SLASH);}
183 "..." {RETURN(PSI_T_ELLIPSIS);}
184 [\r\n] { NEWLINE(nextline); }
185 [\t ]+ { continue; }
186 'TRUE' {RETURN(PSI_T_TRUE);}
187 'FALSE' {RETURN(PSI_T_FALSE);}
188 'NULL' {RETURN(PSI_T_NULL);}
189 'MIXED' {RETURN(PSI_T_MIXED);}
190 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
191 'VOID' {RETURN(PSI_T_VOID);}
192 'BOOL' {RETURN(PSI_T_BOOL);}
193 'CHAR' {RETURN(PSI_T_CHAR);}
194 'SHORT' {RETURN(PSI_T_SHORT);}
195 'INT' {RETURN(PSI_T_INT);}
196 'LONG' {RETURN(PSI_T_LONG);}
197 'FLOAT' {RETURN(PSI_T_FLOAT);}
198 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
199 'INT8_T' {RETURN(PSI_T_INT8);}
200 'UINT8_T' {RETURN(PSI_T_UINT8);}
201 'INT16_T' {RETURN(PSI_T_INT16);}
202 'UINT16_T' {RETURN(PSI_T_UINT16);}
203 'INT32_T' {RETURN(PSI_T_INT32);}
204 'UINT32_T' {RETURN(PSI_T_UINT32);}
205 'INT64_T' {RETURN(PSI_T_INT64);}
206 'UINT64_T' {RETURN(PSI_T_UINT64);}
207 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
208 'SIGNED' {RETURN(PSI_T_SIGNED);}
209 'STRING' {RETURN(PSI_T_STRING);}
210 'ARRAY' {RETURN(PSI_T_ARRAY);}
211 'OBJECT' {RETURN(PSI_T_OBJECT);}
212 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
213 'STATIC' {RETURN(PSI_T_STATIC);}
214 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
215 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
216 'STRUCT' {RETURN(PSI_T_STRUCT);}
217 'UNION' {RETURN(PSI_T_UNION);}
218 'ENUM' {RETURN(PSI_T_ENUM);}
219 'CONST' {RETURN(PSI_T_CONST);}
220 'LIB' {RETURN(PSI_T_LIB);}
221 'LET' {RETURN(PSI_T_LET);}
222 'SET' {RETURN(PSI_T_SET);}
223 'RETURN' {RETURN(PSI_T_RETURN);}
224 'FREE' {RETURN(PSI_T_FREE);}
225 'TEMP' {RETURN(PSI_T_TEMP);}
226 'STRLEN' {RETURN(PSI_T_STRLEN);}
227 'STRVAL' {RETURN(PSI_T_STRVAL);}
228 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
229 'INTVAL' {RETURN(PSI_T_INTVAL);}
230 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
231 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
232 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
233 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
234 'ZVAL' {RETURN(PSI_T_ZVAL);}
235 'COUNT' {RETURN(PSI_T_COUNT);}
236 'CALLOC' {RETURN(PSI_T_CALLOC);}
237 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
238 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
239 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
240 'TO_INT' {RETURN(PSI_T_TO_INT);}
241 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
242 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
243 NUMBER {RETURN(PSI_T_NUMBER);}
244 NAME {RETURN(PSI_T_NAME);}
245 NSNAME {RETURN(PSI_T_NSNAME);}
246 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
247 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
248 [^] {break;}
249 */
250
251 comment:
252 P->tok = P->cur;
253 /*!re2c
254 "\n" { NEWLINE(comment); }
255 "*" "/" { continue; }
256 [^] { goto comment; }
257 */
258 }
259 return -1;
260 }