fix awk re
[m6w6/ext-psi] / src / parser.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27 #include <sys/mman.h>
28 #include <assert.h>
29 #include <stdarg.h>
30
31 #include "parser.h"
32
33 /*!max:re2c*/
34 #ifndef YYMAXFILL
35 # define YYMAXFILL 256
36 #endif
37
38 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
39 {
40 if (!P) {
41 P = malloc(sizeof(*P));
42 }
43 memset(P, 0, sizeof(*P));
44
45 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
46
47 P->preproc = psi_cpp_init(P);
48
49 psi_cpp_load_defaults(P->preproc);
50
51 return P;
52 }
53
54 struct psi_parser_input *psi_parser_open_file(struct psi_parser *P, const char *filename, bool report_errors)
55 {
56 struct stat sb;
57 FILE *fp;
58 struct psi_parser_input *fb;
59
60 if (stat(filename, &sb)) {
61 if (report_errors) {
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not stat '%s': %s",
64 filename, strerror(errno));
65 }
66 return NULL;
67 }
68
69 if (!(fb = malloc(sizeof(*fb) + strlen(filename) + 1 + sb.st_size + YYMAXFILL))) {
70 if (report_errors) {
71 P->error(PSI_DATA(P), NULL, PSI_WARNING,
72 "Could not allocate %zu bytes for reading '%s': %s",
73 sb.st_size + YYMAXFILL, filename, strerror(errno));
74 }
75 return NULL;
76 }
77
78 if (!(fp = fopen(filename, "r"))) {
79 free(fb);
80 if (report_errors) {
81 P->error(PSI_DATA(P), NULL, PSI_WARNING,
82 "Could not open '%s' for reading: %s",
83 filename, strerror(errno));
84 }
85 return NULL;
86 }
87
88 if (sb.st_size != fread(fb->buffer, 1, sb.st_size, fp)) {
89 free(fb);
90 fclose(fp);
91 if (report_errors) {
92 P->error(PSI_DATA(P), NULL, PSI_WARNING,
93 "Could not read %zu bytes from '%s': %s",
94 sb.st_size + YYMAXFILL, filename, strerror(errno));
95 }
96 return NULL;
97 }
98
99 memset(fb->buffer + sb.st_size, 0, YYMAXFILL);
100 fb->length = sb.st_size;
101 fb->file = &fb->buffer[sb.st_size + YYMAXFILL];
102 memcpy(fb->file, filename, strlen(filename) + 1);
103
104 return fb;
105 }
106
107 struct psi_parser_input *psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
108 {
109 struct psi_parser_input *sb;
110
111 if (!(sb = malloc(sizeof(*sb) + sizeof("<stdin>") + length + YYMAXFILL))) {
112 P->error(PSI_DATA(P), NULL, PSI_WARNING,
113 "Could not allocate %zu bytes: %s",
114 length + YYMAXFILL, strerror(errno));
115 return NULL;
116 }
117
118 memcpy(sb->buffer, string, length);
119 memset(sb->buffer + length, 0, YYMAXFILL);
120
121 sb->length = length;
122 sb->file = &sb->buffer[length + YYMAXFILL];
123 memcpy(sb->file, "<stdin>", sizeof("<stdin>"));
124
125 return sb;
126 }
127
128 #if 0
129 static void psi_parser_register_constants(struct psi_parser *P)
130 {
131 zend_string *key;
132 zval *val;
133
134 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
135 {
136 struct psi_impl_def_val *iv;
137 struct psi_const_type *ct;
138 struct psi_const *c;
139 const char *ctn;
140 token_t ctt;
141 impl_val tmp;
142 zend_string *str;
143
144 ZVAL_DEREF(val);
145 switch (Z_TYPE_P(val)) {
146 case IS_TRUE:
147 case IS_FALSE:
148 ctt = PSI_T_BOOL;
149 ctn = "bool";
150 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
151 break;
152 case IS_LONG:
153 ctt = PSI_T_INT;
154 ctn = "int";
155 tmp.zend.lval = Z_LVAL_P(val);
156 break;
157 case IS_DOUBLE:
158 ctt = PSI_T_FLOAT;
159 ctn = "float";
160 tmp.dval = Z_DVAL_P(val);
161 break;
162 default:
163 ctt = PSI_T_STRING;
164 ctn = "string";
165 str = zval_get_string(val);
166 tmp.zend.str = zend_string_dup(str, 1);
167 zend_string_release(str);
168 break;
169 }
170
171 iv = psi_impl_def_val_init(ctt, NULL);
172 iv->ival = tmp;
173 ct = psi_const_type_init(ctt, ctn);
174 c = psi_const_init(ct, key->val, iv);
175 if (!P->consts) {
176 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
177 }
178 P->consts = psi_plist_add(P->consts, &c);
179 }
180 ZEND_HASH_FOREACH_END();
181 }
182 #endif
183
184 struct psi_plist *psi_parser_preprocess(struct psi_parser *P, struct psi_plist *tokens)
185 {
186 if (psi_cpp_process(P->preproc, &tokens)) {
187 return tokens;
188 }
189 return NULL;
190 }
191
192 bool psi_parser_process(struct psi_parser *P, struct psi_plist *tokens, size_t *processed)
193 {
194 if (psi_plist_count(tokens)) {
195 int rc;
196
197 if (P->flags & PSI_DEBUG) {
198 psi_parser_proc_debug = 1;
199 }
200 rc = psi_parser_proc_parse(P, tokens, processed);
201 if (P->flags & PSI_DEBUG) {
202 psi_parser_proc_debug = 0;
203 }
204 return rc == 0;
205 }
206 return true;
207 }
208
209 bool psi_parser_parse(struct psi_parser *P, struct psi_parser_input *I)
210 {
211 struct psi_plist *scanned, *preproc;
212 size_t processed = 0;
213
214 if (!(scanned = psi_parser_scan(P, I))) {
215 return false;
216 }
217
218 if (!(preproc = psi_parser_preprocess(P, scanned))) {
219 psi_plist_free(scanned);
220 return false;
221 }
222
223 if (!psi_parser_process(P, preproc, &processed)) {
224 psi_plist_free(preproc);
225 return false;
226 }
227
228 psi_plist_free(preproc);
229 return true;
230 }
231
232 void psi_parser_dtor(struct psi_parser *P)
233 {
234 psi_cpp_free(&P->preproc);
235 psi_data_dtor(PSI_DATA(P));
236
237 memset(P, 0, sizeof(*P));
238 }
239
240 void psi_parser_free(struct psi_parser **P)
241 {
242 if (*P) {
243 psi_parser_dtor(*P);
244 free(*P);
245 *P = NULL;
246 }
247 }
248
249 #define NEWLINE() \
250 eol = cur; \
251 ++I->lines
252
253 #define NEWTOKEN(t) \
254 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
255 tokens = psi_plist_add(tokens, &token); \
256 if (P->flags & PSI_DEBUG) { \
257 fprintf(stderr, "PSI< "); \
258 psi_token_dump(2, token); \
259 }
260
261
262 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
263 {
264 struct psi_plist *tokens;
265 struct psi_token *token;
266 const char *tok, *cur, *lim, *mrk, *eol;
267
268 tok = mrk = eol = cur = I->buffer;
269 lim = I->buffer + I->length;
270 I->lines = 1;
271 tokens = psi_plist_init((void (*)(void *)) psi_token_free);
272
273 start: ;
274 tok = cur;
275
276 /*!re2c
277
278 re2c:indent:top = 2;
279 re2c:define:YYCTYPE = "unsigned char";
280 re2c:define:YYCURSOR = cur;
281 re2c:define:YYLIMIT = lim;
282 re2c:define:YYMARKER = mrk;
283 re2c:define:YYFILL = "if (cur >= lim) goto done;";
284 re2c:yyfill:parameter = 0;
285
286 W = [a-zA-Z0-9_\x80-\xff];
287 SP = [ \t];
288 EOL = [\r\n];
289 NAME = [a-zA-Z_\x80-\xff]W*;
290 NSNAME = (NAME)? ("\\" NAME)+;
291 DOLLAR_NAME = '$' W+;
292 QUOTED_STRING = "L"? "\"" ([^"])+ "\"";
293 QUOTED_CHAR = "L"? "'" ([^']+ "\\'"?)+ "'";
294 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
295
296 DEC_CONST = [1-9] [0-9]*;
297 OCT_CONST = "0" [0-7]*;
298 HEX_CONST = '0x' [0-9a-fA-F]+;
299 INT_SUFFIX = 'u'('l' 'l'? )? | 'l'('l'? 'u')?;
300 INT_NUMBER = (DEC_CONST | OCT_CONST | HEX_CONST) INT_SUFFIX?;
301
302 FLT_HEX_FRAC = [0-9a-fA-F]*;
303 FLT_HEX_SIG = HEX_CONST ("." FLT_HEX_FRAC)?;
304 FLT_HEX_EXPO = 'p' [+-]? [0-9]+;
305 FLT_HEX_CONST = FLT_HEX_SIG FLT_HEX_EXPO;
306 FLT_DEC_NUM = "0" | DEC_CONST;
307 FLT_DEC_FRAC = [0-9]*;
308 FLT_DEC_SIG = FLT_DEC_NUM ("." FLT_DEC_FRAC)?;
309 FLT_DEC_EXPO = 'e' [+-]? [0-9]+;
310 FLT_DEC_CONST = (FLT_DEC_SIG FLT_DEC_EXPO) | (FLT_DEC_NUM? "." FLT_DEC_FRAC);
311 FLT_SUFFIX = 'f' | 'l' | ('d' ('f' | 'd' | 'l'));
312 FLT_NUMBER = (FLT_DEC_CONST | FLT_HEX_CONST) FLT_SUFFIX?;
313
314 NUMBER = [+-]? (INT_NUMBER | FLT_NUMBER);
315
316 "/*" { goto comment; }
317 "//" { goto comment_sl; }
318 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
319 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
320 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
321 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
322 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
323 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
324 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
325 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
326 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
327 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
328 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
329 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
330 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
331 "||" { NEWTOKEN(PSI_T_OR); goto start; }
332 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
333 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
334 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
335 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
336 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
337 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
338 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
339 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
340 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
341 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
342 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
343 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
344 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
345 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
346 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
347 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
348 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
349 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
350 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
351 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
352 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
353 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
354 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
355 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
356 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
357 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
358 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
359 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
360 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
361 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
362 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
363 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
364 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
365 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
366 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
367 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
368 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
369 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
370 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
371 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
372 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
373 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
374 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
375 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
376 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
377 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
378 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
379 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
380 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
381 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
382 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
383 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
384 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
385 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
386 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
387 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
388 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
389 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
390 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
391 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
392 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
393 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
394 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
395 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
396 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
397 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
398 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
399 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
400 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
401 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
402 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
403 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
404 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
405 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
406 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
407 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
408 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
409 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
410 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
411 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
412 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
413 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
414 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
415 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
416 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
417 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
418 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
419 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
420 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
421 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
422 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
423 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
424 NUMBER { NEWTOKEN(PSI_T_NUMBER); goto start; }
425 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
426 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
427 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
428 QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
429 QUOTED_CHAR { NEWTOKEN(PSI_T_QUOTED_CHAR); goto start; }
430 CPP_HEADER { NEWTOKEN(PSI_T_CPP_HEADER); goto start; }
431 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
432 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
433 [^] { NEWTOKEN(-2); goto error; }
434 * { NEWTOKEN(-1); goto error; }
435
436 */
437
438 comment: ;
439 /*!re2c
440
441 EOL { NEWLINE(); goto comment; }
442 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
443 * { goto comment; }
444
445 */
446
447 comment_sl: ;
448 /*!re2c
449
450 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
451 * { goto comment_sl; }
452
453 */
454 error: ;
455
456 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
457 token->type, token->size, token->text, tok - eol + 1);
458 psi_plist_free(tokens);
459 return NULL;
460
461 done:
462
463 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
464
465 return tokens;
466 }