administrativa
[m6w6/ext-psi] / src / parser_scan.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include <php_psi_stdinc.h>
27
28 #include "parser.h"
29 #include "plist.h"
30
31 /*!max:re2c*/
32 #ifndef YYMAXFILL
33 # define YYMAXFILL 256
34 #endif
35
36 size_t psi_parser_maxfill(void) {
37 return YYMAXFILL;
38 }
39
40 #define NEWLINE() \
41 eol = cur; \
42 ++I->lines
43
44 #define NEWTOKEN(t) do { \
45 if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
46 token = psi_token_init(t, "", 0, tok - eol + 1, I->lines, I->file); \
47 } else { \
48 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, I->lines, I->file); \
49 } \
50 tokens = psi_plist_add(tokens, &token); \
51 PSI_DEBUG_PRINT(P, "PSI: scanned < "); \
52 PSI_DEBUG_DUMP(P, psi_token_dump, token); \
53 } while(0)
54
55 #define CHECKEOF() if (cur >= lim - YYMAXFILL) goto done
56
57 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
58 {
59 struct psi_plist *tokens;
60 struct psi_token *token;
61 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
62 unsigned parens;
63 bool escaped;
64 token_t char_width;
65
66 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
67
68 tok = mrk = eol = cur = I->buffer;
69 lim = I->buffer + I->length + YYMAXFILL;
70 I->lines = 1;
71 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
72
73 start: ;
74 char_width = 1;
75 ctxmrk = NULL;
76 tok = cur;
77
78 (void) ctxmrk;
79
80 /*!re2c
81
82 re2c:indent:top = 2;
83 re2c:define:YYCTYPE = "unsigned char";
84 re2c:define:YYCURSOR = cur;
85 re2c:define:YYLIMIT = lim;
86 re2c:define:YYMARKER = mrk;
87 re2c:define:YYCTXMARKER = ctxmrk;
88 re2c:define:YYFILL = "CHECKEOF();";
89 re2c:yyfill:parameter = 0;
90
91 W = [a-zA-Z0-9_\x80-\xff];
92 SP = [ \t\f];
93 EOL = [\r\n];
94 NAME = [a-zA-Z_\x80-\xff] W*;
95 NSNAME = (NAME)? ("\\" NAME)+;
96 DOLLAR_NAME = '$' W+;
97 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
98 CPP_ATTRIBUTE = "__attribute__" SP* "((";
99
100 DEC_CONST = [1-9] [0-9]*;
101 OCT_CONST = "0" [0-7]*;
102 HEX_CONST = '0x' [0-9a-fA-F]+;
103 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
104
105 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
106 FLT_DEC_NUM = "0" | DEC_CONST;
107 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
108 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
109
110 INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
111 INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
112 INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
113 INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
114 INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
115
116 FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
117 FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
118 FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
119 FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
120 FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
121 FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
122
123 "'" { escaped = false; tok += 1; goto character; }
124 "\"" { escaped = false; tok += 1; goto string; }
125 "u8" / "\"" { char_width = 1; }
126 "u" / ['"] { char_width = 2; }
127 "U" / ['"] { char_width = 4; }
128 "L" / ['"] { char_width = sizeof(wchar_t)/8; }
129
130 "/*" { goto comment; }
131 "//" { goto comment_sl; }
132
133 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
134 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
135 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
136 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
137 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
138 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
139 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
140 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
141 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
142 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
143 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
144 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
145 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
146 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
147 "||" { NEWTOKEN(PSI_T_OR); goto start; }
148 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
149 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
150 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
151 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
152 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
153 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
154 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
155 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
156 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
157 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
158 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
159 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
160 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
161 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
162 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
163 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
164 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
165 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
166 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
167 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
168 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
169 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
170 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
171 "__"? "inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
172 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
173 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
174 "__asm" ("__")? { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
175 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
176 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
177 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
178 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
179 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
180 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
181 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
182 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
183 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
184 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
185 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
186 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
187 "int" { NEWTOKEN(PSI_T_INT); goto start; }
188 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
189 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
190 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
191 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
192 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
193 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
194 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
195 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
196 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
197 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
198 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
199 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
200 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
201 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
202 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
203 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
204 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
205 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
206 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
207 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
208 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
209 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
210 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
211 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
212 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
213 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
214 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
215 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
216 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
217 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
218 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
219 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
220 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
221 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
222 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
223 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
224 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
225 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
226 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
227 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
228 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
229 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
230 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
231 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
232 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
233 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
234 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
235 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
236 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
237 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
238 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
239 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
240 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
241 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
242 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
243 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
244 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
245 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
246 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
247 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
248 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
249 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
250 [^] { CHECKEOF(); NEWTOKEN(-2); goto error; }
251 * { CHECKEOF(); NEWTOKEN(-1); goto error; }
252
253 */
254
255 character: ;
256 /*!re2c
257
258 EOL { NEWLINE(); goto character; }
259 "\\" { escaped = !escaped; goto character; }
260 "'" {
261 if (escaped) {
262 escaped = false;
263 goto character;
264 }
265 cur -= 1;
266 NEWTOKEN(PSI_T_QUOTED_CHAR);
267 cur += 1;
268 token->flags = char_width;
269 goto start;
270 }
271 * { escaped = false; goto character; }
272
273 */
274
275 string: ;
276 /*!re2c
277
278 EOL { NEWLINE(); goto string; }
279 "\\" { escaped = !escaped; goto string; }
280 "\"" {
281 if (escaped) {
282 escaped = false;
283 goto string;
284 }
285 cur -= 1;
286 NEWTOKEN(PSI_T_QUOTED_STRING);
287 cur += 1;
288 token->flags = char_width;
289 goto start;
290 }
291 * { escaped = false; goto string; }
292
293 */
294
295 comment: ;
296 /*!re2c
297
298 EOL { NEWLINE(); goto comment; }
299 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
300 * { goto comment; }
301
302 */
303
304 comment_sl: ;
305 /*!re2c
306
307 EOL { NEWTOKEN(PSI_T_COMMENT); tok = cur - 1; NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
308 * { goto comment_sl; }
309
310 */
311
312 cpp_attribute: ;
313
314 /*!re2c
315
316 "(" { ++parens; goto cpp_attribute; }
317 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
318 EOL { NEWLINE(); goto cpp_attribute; }
319 * { goto cpp_attribute; }
320
321 */
322 error: ;
323
324 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
325 token->type, token->text->len, token->text->val, tok - eol + 1);
326 psi_plist_free(tokens);
327 return NULL;
328
329 done: ;
330
331 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
332
333 return tokens;
334 }