043243fd81e12c91052f792a8570c441e288a993
[m6w6/ext-psi] / src / parser_scan.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "parser.h"
33 #include "plist.h"
34
35 /*!max:re2c*/
36 #ifndef YYMAXFILL
37 # define YYMAXFILL 256
38 #endif
39
40 size_t psi_parser_maxfill(void) {
41 return YYMAXFILL;
42 }
43
44 #define NEWLINE() \
45 eol = cur; \
46 ++I->lines
47
48 #define NEWTOKEN(t) do { \
49 if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
50 token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
51 } else { \
52 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
53 } \
54 tokens = psi_plist_add(tokens, &token); \
55 PSI_DEBUG_PRINT(P, "PSI: scanned < "); \
56 PSI_DEBUG_DUMP(P, psi_token_dump, token); \
57 } while(0)
58
59 #define CHECKEOF() if (cur >= lim - YYMAXFILL) goto done
60
61 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
62 {
63 struct psi_plist *tokens;
64 struct psi_token *token;
65 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
66 unsigned parens;
67 bool escaped;
68 token_t char_width;
69
70 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
71
72 tok = mrk = eol = cur = I->buffer;
73 lim = I->buffer + I->length + YYMAXFILL;
74 I->lines = 1;
75 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
76
77 start: ;
78 char_width = 1;
79 ctxmrk = NULL;
80 tok = cur;
81
82 (void) ctxmrk;
83
84 /*!re2c
85
86 re2c:indent:top = 2;
87 re2c:define:YYCTYPE = "unsigned char";
88 re2c:define:YYCURSOR = cur;
89 re2c:define:YYLIMIT = lim;
90 re2c:define:YYMARKER = mrk;
91 re2c:define:YYCTXMARKER = ctxmrk;
92 re2c:define:YYFILL = "CHECKEOF();";
93 re2c:yyfill:parameter = 0;
94
95 W = [a-zA-Z0-9_\x80-\xff];
96 SP = [ \t\f];
97 EOL = [\r\n];
98 NAME = [a-zA-Z_\x80-\xff] W*;
99 NSNAME = (NAME)? ("\\" NAME)+;
100 DOLLAR_NAME = '$' W+;
101 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
102 CPP_ATTRIBUTE = "__attribute__" SP* "((";
103
104 DEC_CONST = [1-9] [0-9]*;
105 OCT_CONST = "0" [0-7]*;
106 HEX_CONST = '0x' [0-9a-fA-F]+;
107 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
108
109 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
110 FLT_DEC_NUM = "0" | DEC_CONST;
111 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
112 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
113
114 INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
115 INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
116 INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
117 INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
118 INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
119
120 FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
121 FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
122 FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
123 FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
124 FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
125 FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
126
127 "'" { escaped = false; tok += 1; goto character; }
128 "\"" { escaped = false; tok += 1; goto string; }
129 "u8" / "\"" { char_width = 1; }
130 "u" / ['"] { char_width = 2; }
131 "U" / ['"] { char_width = 4; }
132 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
133
134 "/*" { goto comment; }
135 "//" { goto comment_sl; }
136
137 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
138 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
139 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
140 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
141 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
142 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
143 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
144 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
145 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
146 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
147 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
148 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
149 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
150 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
151 "||" { NEWTOKEN(PSI_T_OR); goto start; }
152 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
153 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
154 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
155 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
156 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
157 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
158 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
159 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
160 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
161 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
162 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
163 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
164 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
165 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
166 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
167 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
168 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
169 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
170 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
171 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
172 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
173 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
174 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
175 "__"? "inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
176 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
177 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
178 "__asm" ("__")? { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
179 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
180 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
181 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
182 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
183 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
184 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
185 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
186 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
187 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
188 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
189 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
190 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
191 "int" { NEWTOKEN(PSI_T_INT); goto start; }
192 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
193 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
194 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
195 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
196 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
197 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
198 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
199 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
200 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
201 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
202 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
203 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
204 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
205 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
206 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
207 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
208 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
209 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
210 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
211 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
212 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
213 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
214 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
215 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
216 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
217 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
218 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
219 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
220 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
221 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
222 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
223 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
224 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
225 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
226 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
227 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
228 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
229 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
230 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
231 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
232 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
233 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
234 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
235 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
236 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
237 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
238 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
239 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
240 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
241 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
242 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
243 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
244 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
245 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
246 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
247 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
248 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
249 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
250 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
251 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
252 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
253 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
254 [^] { CHECKEOF(); NEWTOKEN(-2); goto error; }
255 * { CHECKEOF(); NEWTOKEN(-1); goto error; }
256
257 */
258
259 character: ;
260 /*!re2c
261
262 EOL { NEWLINE(); goto character; }
263 "\\" { escaped = !escaped; goto character; }
264 "'" {
265 if (escaped) {
266 escaped = false;
267 goto character;
268 }
269 cur -= 1;
270 NEWTOKEN(PSI_T_QUOTED_CHAR);
271 cur += 1;
272 token->flags = char_width;
273 goto start;
274 }
275 * { escaped = false; goto character; }
276
277 */
278
279 string: ;
280 /*!re2c
281
282 EOL { NEWLINE(); goto string; }
283 "\\" { escaped = !escaped; goto string; }
284 "\"" {
285 if (escaped) {
286 escaped = false;
287 goto string;
288 }
289 cur -= 1;
290 NEWTOKEN(PSI_T_QUOTED_STRING);
291 cur += 1;
292 token->flags = char_width;
293 goto start;
294 }
295 * { escaped = false; goto string; }
296
297 */
298
299 comment: ;
300 /*!re2c
301
302 EOL { NEWLINE(); goto comment; }
303 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
304 * { goto comment; }
305
306 */
307
308 comment_sl: ;
309 /*!re2c
310
311 EOL { NEWTOKEN(PSI_T_COMMENT); tok = cur - 1; NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
312 * { goto comment_sl; }
313
314 */
315
316 cpp_attribute: ;
317
318 /*!re2c
319
320 "(" { ++parens; goto cpp_attribute; }
321 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
322 EOL { NEWLINE(); goto cpp_attribute; }
323 * { goto cpp_attribute; }
324
325 */
326 error: ;
327
328 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
329 token->type, token->text->len, token->text->val, tok - eol + 1);
330 psi_plist_free(tokens);
331 return NULL;
332
333 done: ;
334
335 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
336
337 return tokens;
338 }