fb460803bf37af692f85bdbaa5180bfdfcc573be
[m6w6/ext-psi] / src / parser.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27 #include <sys/mman.h>
28 #include <assert.h>
29
30 #include "parser.h"
31
32 void *psi_parser_proc_init(void);
33 void psi_parser_proc_free(void **parser_proc);
34 void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser);
35 void psi_parser_proc_trace(FILE *out, char *prefix);
36
37 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
38 {
39 if (!P) {
40 P = malloc(sizeof(*P));
41 }
42 memset(P, 0, sizeof(*P));
43
44 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
45
46 P->col = 1;
47 P->line = 1;
48 P->proc = psi_parser_proc_init();
49
50 if (flags & PSI_DEBUG) {
51 psi_parser_proc_trace(stderr, "PSI> ");
52 }
53
54 return P;
55 }
56
57 bool psi_parser_open_file(struct psi_parser *P, const char *filename)
58 {
59 FILE *fp = fopen(filename, "r");
60
61 if (!fp) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not open '%s' for reading: %s",
64 filename, strerror(errno));
65 return false;
66 }
67
68 P->input.type = PSI_PARSE_FILE;
69 P->input.data.file.handle = fp;
70
71 #if HAVE_MMAP
72 struct stat sb;
73 int fd = fileno(fp);
74
75 if (fstat(fd, &sb)) {
76 P->error(PSI_DATA(P), NULL, PSI_WARNING,
77 "Could not stat '%s': %s",
78 filename, strerror(errno));
79 return false;
80 }
81
82 P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
83 if (MAP_FAILED == P->input.data.file.buffer) {
84 P->error(PSI_DATA(P), NULL, PSI_WARNING,
85 "Could not map '%s' for reading: %s",
86 filename, strerror(errno));
87 return false;
88 }
89 P->input.data.file.length = sb.st_size;
90 #else
91 P->input.data.file.buffer = malloc(BSIZE);
92 #endif
93
94 P->file.fn = strdup(filename);
95
96 return true;
97 }
98
99 bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
100 {
101 P->input.type = PSI_PARSE_STRING;
102 P->input.data.string.length = length;
103 if (!(P->input.data.string.buffer = strndup(string, length))) {
104 return false;
105 }
106
107 P->file.fn = strdup("<input>");
108
109 return true;
110 }
111
112 static ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
113 {
114 PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type);
115
116 /* init if n==0 */
117 if (!n) {
118 switch (P->input.type) {
119 case PSI_PARSE_FILE:
120 P->cur = P->tok = P->mrk = P->input.data.file.buffer;
121 #if HAVE_MMAP
122 P->eof = P->input.data.file.buffer + P->input.data.file.length;
123 P->lim = P->eof;
124 #else
125 P->eof = NULL;
126 P->lim = P->input.data.file.buffer;
127 #endif
128 break;
129
130 case PSI_PARSE_STRING:
131 P->cur = P->tok = P->mrk = P->input.data.string.buffer;
132 P->eof = P->input.data.string.buffer + P->input.data.string.length;
133 P->lim = P->eof;
134 break;
135 }
136
137 PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof);
138 }
139
140 switch (P->input.type) {
141 case PSI_PARSE_STRING:
142 break;
143
144 case PSI_PARSE_FILE:
145 #if !HAVE_MMAP
146 if (!P->eof) {
147 size_t consumed = P->tok - P->buf;
148 size_t reserved = P->lim - P->tok;
149 size_t available = BSIZE - reserved;
150 size_t didread;
151
152 if (consumed) {
153 memmove(P->buf, P->tok, reserved);
154 P->tok -= consumed;
155 P->cur -= consumed;
156 P->lim -= consumed;
157 P->mrk -= consumed;
158 }
159
160 didread = fread(P->lim, 1, available, P->fp);
161 P->lim += didread;
162 if (didread < available) {
163 P->eof = P->lim;
164 }
165 PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
166 consumed, reserved, available, didread);
167 }
168 #endif
169 break;
170 }
171
172 PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur);
173
174 return P->lim - P->cur;
175 }
176
177 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
178 {
179 if (T) {
180 psi_parser_proc_parse(P->proc, T->type, T, P);
181 } else {
182 psi_parser_proc_parse(P->proc, 0, NULL, P);
183 }
184 }
185
186 void psi_parser_dtor(struct psi_parser *P)
187 {
188 psi_parser_proc_free(&P->proc);
189
190 switch (P->input.type) {
191 case PSI_PARSE_FILE:
192 if (P->input.data.file.buffer) {
193 #if HAVE_MMAP
194 munmap(P->input.data.file.buffer, P->input.data.file.length);
195 #else
196 free(P->input.data.file.buffer);
197 #endif
198 }
199 if (P->input.data.file.handle) {
200 fclose(P->input.data.file.handle);
201 }
202 break;
203
204 case PSI_PARSE_STRING:
205 if (P->input.data.string.buffer) {
206 free(P->input.data.string.buffer);
207 }
208 break;
209 }
210
211 psi_data_dtor(PSI_DATA(P));
212
213 memset(P, 0, sizeof(*P));
214 }
215
216 void psi_parser_free(struct psi_parser **P)
217 {
218 if (*P) {
219 psi_parser_dtor(*P);
220 free(*P);
221 *P = NULL;
222 }
223 }
224
225 /*!max:re2c*/
226 #if BSIZE < YYMAXFILL
227 # error BSIZE must be greater than YYMAXFILL
228 #endif
229
230 #define RETURN(t) do { \
231 P->num = t; \
232 PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
233 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
234 P->file.fn, P->line, P->col); \
235 return t; \
236 } while(1)
237
238 #define ADDCOLS \
239 P->col += P->cur - P->tok
240
241 #define NEWLINE(label) \
242 P->col = 1; \
243 ++P->line; \
244 goto label
245
246 token_t psi_parser_scan(struct psi_parser *P)
247 {
248 if (!P->cur) {
249 psi_parser_fill(P, 0);
250 }
251 for (;;) {
252 ADDCOLS;
253 nextline:
254 P->tok = P->cur;
255 /*!re2c
256 re2c:indent:top = 2;
257 re2c:define:YYCTYPE = "unsigned char";
258 re2c:define:YYCURSOR = P->cur;
259 re2c:define:YYLIMIT = P->lim;
260 re2c:define:YYMARKER = P->mrk;
261 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
262 re2c:yyfill:parameter = 0;
263
264 B = [^a-zA-Z0-9_];
265 W = [a-zA-Z0-9_];
266 NAME = [a-zA-Z_]W*;
267 NSNAME = (NAME)? ("\\" NAME)+;
268 DOLLAR_NAME = '$' W+;
269 QUOTED_STRING = "\"" ([^\"])+ "\"";
270 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
271
272 "/*" { goto comment; }
273 ("#"|"//") .* "\n" { NEWLINE(nextline); }
274 "(" {RETURN(PSI_T_LPAREN);}
275 ")" {RETURN(PSI_T_RPAREN);}
276 ";" {RETURN(PSI_T_EOS);}
277 "," {RETURN(PSI_T_COMMA);}
278 ":" {RETURN(PSI_T_COLON);}
279 "{" {RETURN(PSI_T_LBRACE);}
280 "}" {RETURN(PSI_T_RBRACE);}
281 "[" {RETURN(PSI_T_LBRACKET);}
282 "]" {RETURN(PSI_T_RBRACKET);}
283 "!=" {RETURN(PSI_T_CMP_NE);}
284 "==" {RETURN(PSI_T_CMP_EQ);}
285 "&&" {RETURN(PSI_T_AND);}
286 "||" {RETURN(PSI_T_OR);}
287 "=" {RETURN(PSI_T_EQUALS);}
288 "*" {RETURN(PSI_T_ASTERISK);}
289 "~" {RETURN(PSI_T_TILDE);}
290 "!" {RETURN(PSI_T_NOT);}
291 "%" {RETURN(PSI_T_MODULO);}
292 "&" {RETURN(PSI_T_AMPERSAND);}
293 "+" {RETURN(PSI_T_PLUS);}
294 "-" {RETURN(PSI_T_MINUS);}
295 "/" {RETURN(PSI_T_SLASH);}
296 "|" {RETURN(PSI_T_PIPE);}
297 "^" {RETURN(PSI_T_CARET);}
298 "<<" {RETURN(PSI_T_LSHIFT);}
299 ">>" {RETURN(PSI_T_RSHIFT);}
300 "<=" {RETURN(PSI_T_CMP_LE);}
301 ">=" {RETURN(PSI_T_CMP_GE);}
302 "<" {RETURN(PSI_T_LCHEVR);}
303 ">" {RETURN(PSI_T_RCHEVR);}
304 "..." {RETURN(PSI_T_ELLIPSIS);}
305 [\r\n] { NEWLINE(nextline); }
306 [\t ]+ { continue; }
307 'TRUE' {RETURN(PSI_T_TRUE);}
308 'FALSE' {RETURN(PSI_T_FALSE);}
309 'NULL' {RETURN(PSI_T_NULL);}
310 'MIXED' {RETURN(PSI_T_MIXED);}
311 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
312 'VOID' {RETURN(PSI_T_VOID);}
313 'BOOL' {RETURN(PSI_T_BOOL);}
314 'CHAR' {RETURN(PSI_T_CHAR);}
315 'SHORT' {RETURN(PSI_T_SHORT);}
316 'INT' {RETURN(PSI_T_INT);}
317 'LONG' {RETURN(PSI_T_LONG);}
318 'FLOAT' {RETURN(PSI_T_FLOAT);}
319 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
320 'INT8_T' {RETURN(PSI_T_INT8);}
321 'UINT8_T' {RETURN(PSI_T_UINT8);}
322 'INT16_T' {RETURN(PSI_T_INT16);}
323 'UINT16_T' {RETURN(PSI_T_UINT16);}
324 'INT32_T' {RETURN(PSI_T_INT32);}
325 'UINT32_T' {RETURN(PSI_T_UINT32);}
326 'INT64_T' {RETURN(PSI_T_INT64);}
327 'UINT64_T' {RETURN(PSI_T_UINT64);}
328 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
329 'SIGNED' {RETURN(PSI_T_SIGNED);}
330 'STRING' {RETURN(PSI_T_STRING);}
331 'ARRAY' {RETURN(PSI_T_ARRAY);}
332 'OBJECT' {RETURN(PSI_T_OBJECT);}
333 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
334 'STATIC' {RETURN(PSI_T_STATIC);}
335 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
336 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
337 'STRUCT' {RETURN(PSI_T_STRUCT);}
338 'UNION' {RETURN(PSI_T_UNION);}
339 'ENUM' {RETURN(PSI_T_ENUM);}
340 'CONST' {RETURN(PSI_T_CONST);}
341 'LIB' {RETURN(PSI_T_LIB);}
342 'LET' {RETURN(PSI_T_LET);}
343 'SET' {RETURN(PSI_T_SET);}
344 'PRE_ASSERT' {RETURN(PSI_T_PRE_ASSERT);}
345 'POST_ASSERT' {RETURN(PSI_T_POST_ASSERT);}
346 'RETURN' {RETURN(PSI_T_RETURN);}
347 'FREE' {RETURN(PSI_T_FREE);}
348 'TEMP' {RETURN(PSI_T_TEMP);}
349 'STRLEN' {RETURN(PSI_T_STRLEN);}
350 'STRVAL' {RETURN(PSI_T_STRVAL);}
351 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
352 'INTVAL' {RETURN(PSI_T_INTVAL);}
353 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
354 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
355 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
356 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
357 'ZVAL' {RETURN(PSI_T_ZVAL);}
358 'COUNT' {RETURN(PSI_T_COUNT);}
359 'CALLOC' {RETURN(PSI_T_CALLOC);}
360 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
361 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
362 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
363 'TO_INT' {RETURN(PSI_T_TO_INT);}
364 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
365 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
366 NUMBER {RETURN(PSI_T_NUMBER);}
367 NAME {RETURN(PSI_T_NAME);}
368 NSNAME {RETURN(PSI_T_NSNAME);}
369 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
370 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
371 [^] {break;}
372 */
373
374 comment:
375 P->tok = P->cur;
376 /*!re2c
377 "\n" { NEWLINE(comment); }
378 "*" "/" { continue; }
379 [^] { goto comment; }
380 */
381 }
382 return -1;
383 }