parser: NAME fallback ERROR,WARNING
[m6w6/ext-psi] / src / parser.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27 #include <sys/mman.h>
28 #include <assert.h>
29 #include <stdarg.h>
30
31 #include "parser.h"
32
33 /*!max:re2c*/
34 #ifndef YYMAXFILL
35 # define YYMAXFILL 256
36 #endif
37 /*!re2c
38
39 re2c:indent:top = 2;
40 re2c:define:YYCTYPE = "unsigned char";
41 re2c:define:YYCURSOR = P->cur;
42 re2c:define:YYLIMIT = P->lim;
43 re2c:define:YYMARKER = P->mrk;
44 re2c:define:YYFILL = "if (P->cur >= P->lim) goto done;";
45 re2c:yyfill:parameter = 0;
46
47 B = [^a-zA-Z0-9_];
48 W = [a-zA-Z0-9_];
49 SP = [ \t];
50 EOL = [\r\n];
51 NAME = [a-zA-Z_]W*;
52 NSNAME = (NAME)? ("\\" NAME)+;
53 DOLLAR_NAME = '$' W+;
54 QUOTED_STRING = "\"" ([^"])+ "\"";
55 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
56
57 */
58
59 static void free_cpp_def(zval *p)
60 {
61 if (Z_TYPE_P(p) == IS_PTR) {
62 psi_cpp_macro_decl_free((void *) &Z_PTR_P(p));
63 } else if (Z_REFCOUNTED_P(p)) {
64 zval_ptr_dtor(p);
65 }
66 }
67
68 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
69 {
70 if (!P) {
71 P = malloc(sizeof(*P));
72 }
73 memset(P, 0, sizeof(*P));
74
75 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
76
77 P->col = 1;
78 P->line = 1;
79 P->proc = psi_parser_proc_init();
80
81 zend_hash_init(&P->cpp.defs, 0, NULL, free_cpp_def, 1);
82 zval tmp;
83 ZVAL_ARR(&tmp, &P->cpp.defs);
84 add_assoc_string(&tmp, "PHP_OS", PHP_OS);
85
86 if (flags & PSI_DEBUG) {
87 psi_parser_proc_trace(stderr, "PSI> ");
88 }
89
90 return P;
91 }
92
93 void psi_parser_reset(struct psi_parser *P)
94 {
95 P->cur = P->tok = P->mrk = P->input.buffer;
96 P->lim = P->input.buffer + P->input.length;
97 }
98
99 bool psi_parser_open_file(struct psi_parser *P, const char *filename)
100 {
101 struct stat sb;
102 FILE *fp;
103 char *fb;
104
105 if (stat(filename, &sb)) {
106 P->error(PSI_DATA(P), NULL, PSI_WARNING,
107 "Could not stat '%s': %s",
108 filename, strerror(errno));
109 return false;
110 }
111
112 if (!(fb = malloc(sb.st_size + YYMAXFILL))) {
113 P->error(PSI_DATA(P), NULL, PSI_WARNING,
114 "Could not allocate %zu bytes for reading '%s': %s",
115 sb.st_size + YYMAXFILL, filename, strerror(errno));
116 return false;
117 }
118
119 if (!(fp = fopen(filename, "r"))) {
120 free(fb);
121 P->error(PSI_DATA(P), NULL, PSI_WARNING,
122 "Could not open '%s' for reading: %s",
123 filename, strerror(errno));
124 return false;
125 }
126
127 if (sb.st_size != fread(fb, 1, sb.st_size, fp)) {
128 free(fb);
129 fclose(fp);
130 P->error(PSI_DATA(P), NULL, PSI_WARNING,
131 "Could not read %zu bytes from '%s': %s",
132 sb.st_size + YYMAXFILL, filename, strerror(errno));
133 return false;
134 }
135 memset(fb + sb.st_size, 0, YYMAXFILL);
136
137 if (P->input.buffer) {
138 free(P->input.buffer);
139 }
140 P->input.buffer = fb;
141 P->input.length = sb.st_size;
142
143 P->file.fn = strdup(filename);
144
145 psi_parser_reset(P);
146
147 return true;
148 }
149
150 bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
151 {
152 char *sb;
153
154 if (!(sb = malloc(length + YYMAXFILL))) {
155 P->error(PSI_DATA(P), NULL, PSI_WARNING,
156 "Could not allocate %zu bytes: %s",
157 length + YYMAXFILL, strerror(errno));
158 return false;
159 }
160
161 memcpy(sb, string, length);
162 memset(sb + length, 0, YYMAXFILL);
163
164 if (P->input.buffer) {
165 free(P->input.buffer);
166 }
167 P->input.buffer = sb;
168 P->input.length = length;
169
170 P->file.fn = strdup("<input>");
171
172 psi_parser_reset(P);
173
174 return true;
175 }
176
177 #if 0
178 static void psi_parser_register_constants(struct psi_parser *P)
179 {
180 zend_string *key;
181 zval *val;
182
183 ZEND_HASH_FOREACH_STR_KEY_VAL(&P->cpp.defs, key, val)
184 {
185 struct psi_impl_def_val *iv;
186 struct psi_const_type *ct;
187 struct psi_const *c;
188 const char *ctn;
189 token_t ctt;
190 impl_val tmp;
191 zend_string *str;
192
193 ZVAL_DEREF(val);
194 switch (Z_TYPE_P(val)) {
195 case IS_TRUE:
196 case IS_FALSE:
197 ctt = PSI_T_BOOL;
198 ctn = "bool";
199 tmp.zend.bval = Z_TYPE_P(val) == IS_TRUE;
200 break;
201 case IS_LONG:
202 ctt = PSI_T_INT;
203 ctn = "int";
204 tmp.zend.lval = Z_LVAL_P(val);
205 break;
206 case IS_DOUBLE:
207 ctt = PSI_T_FLOAT;
208 ctn = "float";
209 tmp.dval = Z_DVAL_P(val);
210 break;
211 default:
212 ctt = PSI_T_STRING;
213 ctn = "string";
214 str = zval_get_string(val);
215 tmp.zend.str = zend_string_dup(str, 1);
216 zend_string_release(str);
217 break;
218 }
219
220 iv = psi_impl_def_val_init(ctt, NULL);
221 iv->ival = tmp;
222 ct = psi_const_type_init(ctt, ctn);
223 c = psi_const_init(ct, key->val, iv);
224 if (!P->consts) {
225 P->consts = psi_plist_init((psi_plist_dtor) psi_const_free);
226 }
227 P->consts = psi_plist_add(P->consts, &c);
228 }
229 ZEND_HASH_FOREACH_END();
230 }
231 #endif
232
233 void psi_parser_parse(struct psi_parser *P)
234 {
235 size_t i = 0;
236 struct psi_token *T;
237
238 P->cpp.tokens = psi_parser_scan(P);
239
240 psi_cpp_preprocess(P, &P->cpp);
241
242 if (psi_plist_count(P->cpp.tokens)) {
243 while (psi_plist_get(P->cpp.tokens, i++, &T)) {
244 if (P->flags & PSI_DEBUG) {
245 fprintf(stderr, "PSI> ");
246 psi_token_dump(2, T);
247 }
248 psi_parser_proc_parse(P->proc, T->type, T, P);
249 }
250 psi_parser_proc_parse(P->proc, 0, NULL, P);
251 }
252
253 psi_plist_free(P->cpp.tokens);
254 P->cpp.tokens = NULL;
255 }
256
257 void psi_parser_dtor(struct psi_parser *P)
258 {
259 psi_parser_proc_free(&P->proc);
260
261 if (P->input.buffer) {
262 free(P->input.buffer);
263 P->input.buffer = NULL;
264 }
265
266 psi_data_dtor(PSI_DATA(P));
267
268 zend_hash_destroy(&P->cpp.defs);
269
270 memset(P, 0, sizeof(*P));
271 }
272
273 void psi_parser_free(struct psi_parser **P)
274 {
275 if (*P) {
276 psi_parser_dtor(*P);
277 free(*P);
278 *P = NULL;
279 }
280 }
281
282 #define NEWLINE() \
283 P->col = 1; \
284 ++P->line
285
286 #define NEWTOKEN(t) \
287 P->num = t; \
288 token = psi_token_alloc(P); \
289 tokens = psi_plist_add(tokens, &token); \
290 P->col += P->cur - P->tok; \
291 if (P->flags & PSI_DEBUG) { \
292 fprintf(stderr, "PSI< "); \
293 psi_token_dump(2, token); \
294 } \
295 token = NULL
296
297
298 struct psi_plist *psi_parser_scan(struct psi_parser *P)
299 {
300 struct psi_plist *tokens;
301 struct psi_token *token;
302
303 if (!P->cur) {
304 return NULL;
305 }
306
307 tokens = psi_plist_init(NULL);
308
309 start: ;
310 P->tok = P->cur;
311
312 /*!re2c
313
314 "/*" { goto comment; }
315 "//" { goto comment_sl; }
316 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
317 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
318 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
319 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
320 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
321 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
322 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
323 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
324 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
325 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
326 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
327 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
328 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
329 "||" { NEWTOKEN(PSI_T_OR); goto start; }
330 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
331 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
332 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
333 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
334 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
335 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
336 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
337 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
338 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
339 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
340 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
341 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
342 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
343 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
344 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
345 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
346 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
347 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
348 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
349 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
350 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
351 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
352 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
353 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
354 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
355 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
356 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
357 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
358 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
359 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
360 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
361 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
362 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
363 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
364 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
365 'VOID' { NEWTOKEN(PSI_T_VOID); goto start; }
366 'BOOL' { NEWTOKEN(PSI_T_BOOL); goto start; }
367 'CHAR' { NEWTOKEN(PSI_T_CHAR); goto start; }
368 'SHORT' { NEWTOKEN(PSI_T_SHORT); goto start; }
369 'INT' { NEWTOKEN(PSI_T_INT); goto start; }
370 'LONG' { NEWTOKEN(PSI_T_LONG); goto start; }
371 'FLOAT' { NEWTOKEN(PSI_T_FLOAT); goto start; }
372 'DOUBLE' { NEWTOKEN(PSI_T_DOUBLE); goto start; }
373 'INT8_T' { NEWTOKEN(PSI_T_INT8); goto start; }
374 'UINT8_T' { NEWTOKEN(PSI_T_UINT8); goto start; }
375 'INT16_T' { NEWTOKEN(PSI_T_INT16); goto start; }
376 'UINT16_T' { NEWTOKEN(PSI_T_UINT16); goto start; }
377 'INT32_T' { NEWTOKEN(PSI_T_INT32); goto start; }
378 'UINT32_T' { NEWTOKEN(PSI_T_UINT32); goto start; }
379 'INT64_T' { NEWTOKEN(PSI_T_INT64); goto start; }
380 'UINT64_T' { NEWTOKEN(PSI_T_UINT64); goto start; }
381 'UNSIGNED' { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
382 'SIGNED' { NEWTOKEN(PSI_T_SIGNED); goto start; }
383 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
384 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
385 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
386 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
387 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
388 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
389 'TYPEDEF' { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
390 'STRUCT' { NEWTOKEN(PSI_T_STRUCT); goto start; }
391 'UNION' { NEWTOKEN(PSI_T_UNION); goto start; }
392 'ENUM' { NEWTOKEN(PSI_T_ENUM); goto start; }
393 'CONST' { NEWTOKEN(PSI_T_CONST); goto start; }
394 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
395 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
396 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
397 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
398 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
399 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
400 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
401 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
402 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
403 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
404 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
405 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
406 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
407 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
408 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
409 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
410 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
411 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
412 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
413 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
414 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
415 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
416 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
417 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
418 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
419 NUMBER { NEWTOKEN(PSI_T_NUMBER); goto start; }
420 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
421 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
422 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
423 QUOTED_STRING { NEWTOKEN(PSI_T_QUOTED_STRING); goto start; }
424 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
425 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
426 * { goto error; }
427
428 */
429
430 comment: ;
431 /*!re2c
432
433 EOL { NEWLINE(); goto comment; }
434 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
435 * { goto comment; }
436
437 */
438
439 comment_sl: ;
440 /*!re2c
441
442 EOL { NEWTOKEN(PSI_T_COMMENT); NEWLINE(); goto start; }
443 * { goto comment_sl; }
444
445 */
446 error:
447 psi_plist_free(tokens);
448 return NULL;
449 done:
450 return tokens;
451 }