f280b86ba03f4491d75cdc5b00ff816a8d7cd2b4
[m6w6/ext-psi] / src / parser_scan.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "parser.h"
33 #include "plist.h"
34 #include "debug.h"
35
36 /*!max:re2c*/
37 #ifndef YYMAXFILL
38 # define YYMAXFILL 256
39 #endif
40
41 size_t psi_parser_maxfill(void) {
42 return YYMAXFILL;
43 }
44
45 #define NEWLINE() \
46 eol = cur; \
47 ++I->lines
48
49 #define NEWTOKEN(t) do { \
50 if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
51 token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
52 } else { \
53 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
54 } \
55 tokens = psi_plist_add(tokens, &token); \
56 PSI_DEBUG_LOCK(P, \
57 PSI_DEBUG_PRINT(P, "PSI: scanned < "); \
58 PSI_DEBUG_DUMP(P, psi_token_dump, token); \
59 ); \
60 } while(0)
61
62 #define CHECKEOF() if (cur >= lim - YYMAXFILL) goto done
63
64 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
65 {
66 struct psi_plist *tokens;
67 struct psi_token *token;
68 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
69 unsigned parens;
70 bool escaped;
71 token_t char_width;
72
73 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
74
75 tok = mrk = eol = cur = I->buffer;
76 lim = I->buffer + I->length + YYMAXFILL;
77 I->lines = 1;
78 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
79
80 start: ;
81 char_width = 1;
82 ctxmrk = NULL;
83 tok = cur;
84
85 (void) ctxmrk;
86
87 /*!re2c
88
89 re2c:indent:top = 2;
90 re2c:define:YYCTYPE = "unsigned char";
91 re2c:define:YYCURSOR = cur;
92 re2c:define:YYLIMIT = lim;
93 re2c:define:YYMARKER = mrk;
94 re2c:define:YYCTXMARKER = ctxmrk;
95 re2c:define:YYFILL = "CHECKEOF();";
96 re2c:yyfill:parameter = 0;
97
98 W = [a-zA-Z0-9_\x80-\xff];
99 SP = [ \t\f];
100 EOL = [\r\n];
101 NAME = [a-zA-Z_\x80-\xff] W*;
102 NSNAME = (NAME)? ("\\" NAME)+;
103 DOLLAR_NAME = '$' W+;
104 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
105 CPP_ATTRIBUTE = "__attribute__" SP* "((";
106
107 DEC_CONST = [1-9] [0-9]*;
108 OCT_CONST = "0" [0-7]*;
109 HEX_CONST = '0x' [0-9a-fA-F]+;
110 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
111
112 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
113 FLT_DEC_NUM = "0" | DEC_CONST;
114 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
115 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
116
117 INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
118 INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
119 INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
120 INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
121 INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
122
123 FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
124 FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
125 FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
126 FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
127 FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
128 FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
129
130 "'" { escaped = false; tok += 1; goto character; }
131 "\"" { escaped = false; tok += 1; goto string; }
132 "u8" / "\"" { char_width = 1; }
133 "u" / ['"] { char_width = 2; }
134 "U" / ['"] { char_width = 4; }
135 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
136
137 "/*" { goto comment; }
138 "//" { goto comment_sl; }
139
140 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
141 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
142 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
143 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
144 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
145 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
146 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
147 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
148 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
149 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
150 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
151 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
152 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
153 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
154 "||" { NEWTOKEN(PSI_T_OR); goto start; }
155 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
156 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
157 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
158 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
159 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
160 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
161 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
162 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
163 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
164 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
165 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
166 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
167 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
168 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
169 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
170 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
171 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
172 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
173 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
174 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
175 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
176 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
177 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
178 "__"? "inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
179 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
180 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
181 "__asm" ("__")? { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
182 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
183 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
184 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
185 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
186 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
187 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
188 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
189 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
190 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
191 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
192 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
193 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
194 "int" { NEWTOKEN(PSI_T_INT); goto start; }
195 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
196 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
197 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
198 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
199 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
200 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
201 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
202 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
203 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
204 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
205 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
206 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
207 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
208 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
209 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
210 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
211 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
212 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
213 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
214 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
215 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
216 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
217 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
218 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
219 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
220 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
221 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
222 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
223 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
224 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
225 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
226 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
227 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
228 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
229 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
230 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
231 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
232 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
233 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
234 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
235 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
236 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
237 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
238 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
239 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
240 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
241 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
242 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
243 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
244 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
245 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
246 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
247 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
248 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
249 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
250 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
251 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
252 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
253 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
254 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
255 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
256 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
257 [^] { CHECKEOF(); NEWTOKEN(-2); goto error; }
258 * { CHECKEOF(); NEWTOKEN(-1); goto error; }
259
260 */
261
262 character: ;
263 /*!re2c
264
265 EOL { NEWLINE(); goto character; }
266 "\\" { escaped = !escaped; goto character; }
267 "'" {
268 if (escaped) {
269 escaped = false;
270 goto character;
271 }
272 cur -= 1;
273 NEWTOKEN(PSI_T_QUOTED_CHAR);
274 cur += 1;
275 token->flags = char_width;
276 goto start;
277 }
278 * { escaped = false; goto character; }
279
280 */
281
282 string: ;
283 /*!re2c
284
285 EOL { NEWLINE(); goto string; }
286 "\\" { escaped = !escaped; goto string; }
287 "\"" {
288 if (escaped) {
289 escaped = false;
290 goto string;
291 }
292 cur -= 1;
293 NEWTOKEN(PSI_T_QUOTED_STRING);
294 cur += 1;
295 token->flags = char_width;
296 goto start;
297 }
298 * { escaped = false; goto string; }
299
300 */
301
302 comment: ;
303 /*!re2c
304
305 EOL { NEWLINE(); goto comment; }
306 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
307 * { goto comment; }
308
309 */
310
311 comment_sl: ;
312 /*!re2c
313
314 EOL { NEWTOKEN(PSI_T_COMMENT); tok = cur - 1; NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
315 * { goto comment_sl; }
316
317 */
318
319 cpp_attribute: ;
320
321 /*!re2c
322
323 "(" { ++parens; goto cpp_attribute; }
324 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
325 EOL { NEWLINE(); goto cpp_attribute; }
326 * { goto cpp_attribute; }
327
328 */
329 error: ;
330
331 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
332 token->type, token->text->len, token->text->val, tok - eol + 1);
333 psi_plist_free(tokens);
334 return NULL;
335
336 done: ;
337
338 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
339
340 return tokens;
341 }