num_exp: re-parseable dumps
[m6w6/ext-psi] / src / parser.re
1 #include "php_psi_stdinc.h"
2 #include <sys/mman.h>
3 #include <assert.h>
4
5 #include "parser.h"
6
7 void *psi_parser_proc_init(void);
8 void psi_parser_proc_free(void **parser_proc);
9 void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser);
10 void psi_parser_proc_trace(FILE *out, char *prefix);
11
12 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
13 {
14 if (!P) {
15 P = malloc(sizeof(*P));
16 }
17 memset(P, 0, sizeof(*P));
18
19 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
20
21 P->col = 1;
22 P->line = 1;
23 P->proc = psi_parser_proc_init();
24
25 if (flags & PSI_DEBUG) {
26 psi_parser_proc_trace(stderr, "PSI> ");
27 }
28
29 return P;
30 }
31
32 bool psi_parser_open_file(struct psi_parser *P, const char *filename)
33 {
34 FILE *fp = fopen(filename, "r");
35
36 if (!fp) {
37 P->error(PSI_DATA(P), NULL, PSI_WARNING,
38 "Could not open '%s' for reading: %s",
39 filename, strerror(errno));
40 return false;
41 }
42
43 P->input.type = PSI_PARSE_FILE;
44 P->input.data.file.handle = fp;
45
46 #if HAVE_MMAP
47 struct stat sb;
48 int fd = fileno(fp);
49
50 if (fstat(fd, &sb)) {
51 P->error(PSI_DATA(P), NULL, PSI_WARNING,
52 "Could not stat '%s': %s",
53 filename, strerror(errno));
54 return false;
55 }
56
57 P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
58 if (MAP_FAILED == P->input.data.file.buffer) {
59 P->error(PSI_DATA(P), NULL, PSI_WARNING,
60 "Could not map '%s' for reading: %s",
61 filename, strerror(errno));
62 return false;
63 }
64 P->input.data.file.length = sb.st_size;
65 #else
66 P->input.data.file.buffer = malloc(BSIZE);
67 #endif
68
69 P->file.fn = strdup(filename);
70
71 return true;
72 }
73
74 bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
75 {
76 P->input.type = PSI_PARSE_STRING;
77 P->input.data.string.length = length;
78 if (!(P->input.data.string.buffer = strndup(string, length))) {
79 return false;
80 }
81
82 P->file.fn = strdup("<input>");
83
84 return true;
85 }
86
87 static ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
88 {
89 PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type);
90
91 /* init if n==0 */
92 if (!n) {
93 switch (P->input.type) {
94 case PSI_PARSE_FILE:
95 P->cur = P->tok = P->mrk = P->input.data.file.buffer;
96 #if HAVE_MMAP
97 P->eof = P->input.data.file.buffer + P->input.data.file.length;
98 P->lim = P->eof;
99 #else
100 P->eof = NULL;
101 P->lim = P->input.data.file.buffer;
102 #endif
103 break;
104
105 case PSI_PARSE_STRING:
106 P->cur = P->tok = P->mrk = P->input.data.string.buffer;
107 P->eof = P->input.data.string.buffer + P->input.data.string.length;
108 P->lim = P->eof;
109 break;
110 }
111
112 PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof);
113 }
114
115 switch (P->input.type) {
116 case PSI_PARSE_STRING:
117 break;
118
119 case PSI_PARSE_FILE:
120 #if !HAVE_MMAP
121 if (!P->eof) {
122 size_t consumed = P->tok - P->buf;
123 size_t reserved = P->lim - P->tok;
124 size_t available = BSIZE - reserved;
125 size_t didread;
126
127 if (consumed) {
128 memmove(P->buf, P->tok, reserved);
129 P->tok -= consumed;
130 P->cur -= consumed;
131 P->lim -= consumed;
132 P->mrk -= consumed;
133 }
134
135 didread = fread(P->lim, 1, available, P->fp);
136 P->lim += didread;
137 if (didread < available) {
138 P->eof = P->lim;
139 }
140 PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
141 consumed, reserved, available, didread);
142 }
143 #endif
144 break;
145 }
146
147 PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur);
148
149 return P->lim - P->cur;
150 }
151
152 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
153 {
154 if (T) {
155 psi_parser_proc_parse(P->proc, T->type, T, P);
156 } else {
157 psi_parser_proc_parse(P->proc, 0, NULL, P);
158 }
159 }
160
161 void psi_parser_dtor(struct psi_parser *P)
162 {
163 psi_parser_proc_free(&P->proc);
164
165 switch (P->input.type) {
166 case PSI_PARSE_FILE:
167 if (P->input.data.file.buffer) {
168 #if HAVE_MMAP
169 munmap(P->input.data.file.buffer, P->input.data.file.length);
170 #else
171 free(P->input.data.file.buffer);
172 #endif
173 }
174 if (P->input.data.file.handle) {
175 fclose(P->input.data.file.handle);
176 }
177 break;
178
179 case PSI_PARSE_STRING:
180 if (P->input.data.string.buffer) {
181 free(P->input.data.string.buffer);
182 }
183 break;
184 }
185
186 psi_data_dtor(PSI_DATA(P));
187
188 memset(P, 0, sizeof(*P));
189 }
190
191 void psi_parser_free(struct psi_parser **P)
192 {
193 if (*P) {
194 psi_parser_dtor(*P);
195 free(*P);
196 *P = NULL;
197 }
198 }
199
200 /*!max:re2c*/
201 #if BSIZE < YYMAXFILL
202 # error BSIZE must be greater than YYMAXFILL
203 #endif
204
205 #define RETURN(t) do { \
206 P->num = t; \
207 PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
208 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
209 P->file.fn, P->line, P->col); \
210 return t; \
211 } while(1)
212
213 #define ADDCOLS \
214 P->col += P->cur - P->tok
215
216 #define NEWLINE(label) \
217 P->col = 1; \
218 ++P->line; \
219 goto label
220
221 token_t psi_parser_scan(struct psi_parser *P)
222 {
223 if (!P->cur) {
224 psi_parser_fill(P, 0);
225 }
226 for (;;) {
227 ADDCOLS;
228 nextline:
229 P->tok = P->cur;
230 /*!re2c
231 re2c:indent:top = 2;
232 re2c:define:YYCTYPE = "unsigned char";
233 re2c:define:YYCURSOR = P->cur;
234 re2c:define:YYLIMIT = P->lim;
235 re2c:define:YYMARKER = P->mrk;
236 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
237 re2c:yyfill:parameter = 0;
238
239 B = [^a-zA-Z0-9_];
240 W = [a-zA-Z0-9_];
241 NAME = [a-zA-Z_]W*;
242 NSNAME = (NAME)? ("\\" NAME)+;
243 DOLLAR_NAME = '$' W+;
244 QUOTED_STRING = "\"" ([^\"])+ "\"";
245 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
246
247 "/*" { goto comment; }
248 ("#"|"//") .* "\n" { NEWLINE(nextline); }
249 "(" {RETURN(PSI_T_LPAREN);}
250 ")" {RETURN(PSI_T_RPAREN);}
251 ";" {RETURN(PSI_T_EOS);}
252 "," {RETURN(PSI_T_COMMA);}
253 ":" {RETURN(PSI_T_COLON);}
254 "{" {RETURN(PSI_T_LBRACE);}
255 "}" {RETURN(PSI_T_RBRACE);}
256 "[" {RETURN(PSI_T_LBRACKET);}
257 "]" {RETURN(PSI_T_RBRACKET);}
258 "!=" {RETURN(PSI_T_CMP_NE);}
259 "==" {RETURN(PSI_T_CMP_EQ);}
260 "&&" {RETURN(PSI_T_AND);}
261 "||" {RETURN(PSI_T_OR);}
262 "=" {RETURN(PSI_T_EQUALS);}
263 "*" {RETURN(PSI_T_ASTERISK);}
264 "~" {RETURN(PSI_T_TILDE);}
265 "!" {RETURN(PSI_T_NOT);}
266 "%" {RETURN(PSI_T_MODULO);}
267 "&" {RETURN(PSI_T_AMPERSAND);}
268 "+" {RETURN(PSI_T_PLUS);}
269 "-" {RETURN(PSI_T_MINUS);}
270 "/" {RETURN(PSI_T_SLASH);}
271 "|" {RETURN(PSI_T_PIPE);}
272 "^" {RETURN(PSI_T_CARET);}
273 "<<" {RETURN(PSI_T_LSHIFT);}
274 ">>" {RETURN(PSI_T_RSHIFT);}
275 "<=" {RETURN(PSI_T_CMP_LE);}
276 ">=" {RETURN(PSI_T_CMP_GE);}
277 "<" {RETURN(PSI_T_LCHEVR);}
278 ">" {RETURN(PSI_T_RCHEVR);}
279 "..." {RETURN(PSI_T_ELLIPSIS);}
280 [\r\n] { NEWLINE(nextline); }
281 [\t ]+ { continue; }
282 'TRUE' {RETURN(PSI_T_TRUE);}
283 'FALSE' {RETURN(PSI_T_FALSE);}
284 'NULL' {RETURN(PSI_T_NULL);}
285 'MIXED' {RETURN(PSI_T_MIXED);}
286 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
287 'VOID' {RETURN(PSI_T_VOID);}
288 'BOOL' {RETURN(PSI_T_BOOL);}
289 'CHAR' {RETURN(PSI_T_CHAR);}
290 'SHORT' {RETURN(PSI_T_SHORT);}
291 'INT' {RETURN(PSI_T_INT);}
292 'LONG' {RETURN(PSI_T_LONG);}
293 'FLOAT' {RETURN(PSI_T_FLOAT);}
294 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
295 'INT8_T' {RETURN(PSI_T_INT8);}
296 'UINT8_T' {RETURN(PSI_T_UINT8);}
297 'INT16_T' {RETURN(PSI_T_INT16);}
298 'UINT16_T' {RETURN(PSI_T_UINT16);}
299 'INT32_T' {RETURN(PSI_T_INT32);}
300 'UINT32_T' {RETURN(PSI_T_UINT32);}
301 'INT64_T' {RETURN(PSI_T_INT64);}
302 'UINT64_T' {RETURN(PSI_T_UINT64);}
303 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
304 'SIGNED' {RETURN(PSI_T_SIGNED);}
305 'STRING' {RETURN(PSI_T_STRING);}
306 'ARRAY' {RETURN(PSI_T_ARRAY);}
307 'OBJECT' {RETURN(PSI_T_OBJECT);}
308 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
309 'STATIC' {RETURN(PSI_T_STATIC);}
310 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
311 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
312 'STRUCT' {RETURN(PSI_T_STRUCT);}
313 'UNION' {RETURN(PSI_T_UNION);}
314 'ENUM' {RETURN(PSI_T_ENUM);}
315 'CONST' {RETURN(PSI_T_CONST);}
316 'LIB' {RETURN(PSI_T_LIB);}
317 'LET' {RETURN(PSI_T_LET);}
318 'SET' {RETURN(PSI_T_SET);}
319 'PRE_ASSERT' {RETURN(PSI_T_PRE_ASSERT);}
320 'POST_ASSERT' {RETURN(PSI_T_POST_ASSERT);}
321 'RETURN' {RETURN(PSI_T_RETURN);}
322 'FREE' {RETURN(PSI_T_FREE);}
323 'TEMP' {RETURN(PSI_T_TEMP);}
324 'STRLEN' {RETURN(PSI_T_STRLEN);}
325 'STRVAL' {RETURN(PSI_T_STRVAL);}
326 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
327 'INTVAL' {RETURN(PSI_T_INTVAL);}
328 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
329 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
330 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
331 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
332 'ZVAL' {RETURN(PSI_T_ZVAL);}
333 'COUNT' {RETURN(PSI_T_COUNT);}
334 'CALLOC' {RETURN(PSI_T_CALLOC);}
335 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
336 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
337 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
338 'TO_INT' {RETURN(PSI_T_TO_INT);}
339 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
340 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
341 NUMBER {RETURN(PSI_T_NUMBER);}
342 NAME {RETURN(PSI_T_NAME);}
343 NSNAME {RETURN(PSI_T_NSNAME);}
344 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
345 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
346 [^] {break;}
347 */
348
349 comment:
350 P->tok = P->cur;
351 /*!re2c
352 "\n" { NEWLINE(comment); }
353 "*" "/" { continue; }
354 [^] { goto comment; }
355 */
356 }
357 return -1;
358 }