fix char_width decl
[m6w6/ext-psi] / src / parser_scan.re
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include "parser.h"
33 #include "plist.h"
34 #include "debug.h"
35
36 /*!max:re2c*/
37 #ifndef YYMAXFILL
38 # define YYMAXFILL 256
39 #endif
40
41 size_t psi_parser_maxfill(void) {
42 return YYMAXFILL;
43 }
44
45 #define NEWLINE() \
46 eol = cur; \
47 ++lines
48
49 #define NEWTOKEN(t) do { \
50 if (t == PSI_T_COMMENT || t == PSI_T_WHITESPACE) { \
51 token = psi_token_init(t, "", 0, tok - eol + 1, lines, I->file); \
52 } else { \
53 token = psi_token_init(t, tok, cur - tok, tok - eol + 1, lines, I->file); \
54 } \
55 tokens = psi_plist_add(tokens, &token); \
56 PSI_DEBUG_LOCK(P, \
57 PSI_DEBUG_PRINT(P, "PSI: scanned < "); \
58 PSI_DEBUG_DUMP(P, psi_token_dump, token); \
59 ); \
60 } while(0)
61
62 #define CHECKEOF() if (cur >= lim - YYMAXFILL) goto done
63
64 struct psi_plist *psi_parser_scan(struct psi_parser *P, struct psi_parser_input *I)
65 {
66 struct psi_plist *tokens;
67 struct psi_token *token;
68 const char *tok, *cur, *lim, *mrk, *eol, *ctxmrk;
69 unsigned char_width, parens, lines = 1;
70 bool escaped;
71
72 PSI_DEBUG_PRINT(P, "PSI: scanning %s\n", I->file->val);
73
74 tok = mrk = eol = cur = I->buffer;
75 lim = I->buffer + I->length + YYMAXFILL;
76 tokens = psi_plist_init((psi_plist_dtor) psi_token_free);
77
78 start: ;
79 char_width = 1;
80 ctxmrk = NULL;
81 tok = cur;
82
83 (void) ctxmrk;
84
85 /*!re2c
86
87 re2c:indent:top = 2;
88 re2c:define:YYCTYPE = "unsigned char";
89 re2c:define:YYCURSOR = cur;
90 re2c:define:YYLIMIT = lim;
91 re2c:define:YYMARKER = mrk;
92 re2c:define:YYCTXMARKER = ctxmrk;
93 re2c:define:YYFILL = "CHECKEOF();";
94 re2c:yyfill:parameter = 0;
95
96 W = [a-zA-Z0-9_\x80-\xff];
97 SP = [ \t\f];
98 EOL = [\r\n];
99 NAME = [a-zA-Z_\x80-\xff] W*;
100 NSNAME = (NAME)? ("\\" NAME)+;
101 DOLLAR_NAME = '$' W+;
102 CPP_HEADER = "<" [-._/a-zA-Z0-9]+ ">";
103 CPP_ATTRIBUTE = "__attribute__" SP* "((";
104
105 DEC_CONST = [1-9] [0-9]*;
106 OCT_CONST = "0" [0-7]*;
107 HEX_CONST = '0x' [0-9a-fA-F]+;
108 INT_CONST = (DEC_CONST | OCT_CONST | HEX_CONST);
109
110 FLT_HEX_CONST = HEX_CONST ("." [0-9a-fA-F]*)? 'p' [+-]? [0-9]+;
111 FLT_DEC_NUM = "0" | DEC_CONST;
112 FLT_DEC_CONST = (FLT_DEC_NUM ("." [0-9]*)? 'e' [+-]? [0-9]+) | (FLT_DEC_NUM "." [0-9]*) | ("." [0-9]+);
113 FLT_CONST = (FLT_DEC_CONST | FLT_HEX_CONST);
114
115 INT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT; goto start; }
116 INT_CONST / 'u' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_U; cur += 1; goto start; }
117 INT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_L; cur += 1; goto start; }
118 INT_CONST / ('lu' | 'ul') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_UL; cur += 2; goto start; }
119 INT_CONST / ('llu' | 'ull') { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_INT | PSI_NUMBER_ULL; cur += 3; goto start; }
120
121 FLT_CONST { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT; goto start; }
122 FLT_CONST / 'f' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_F; cur += 1; goto start; }
123 FLT_CONST / 'l' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_L; cur += 1; goto start; }
124 FLT_CONST / 'df' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DF; cur += 2; goto start; }
125 FLT_CONST / 'dd' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DD; cur += 2; goto start; }
126 FLT_CONST / 'dl' { NEWTOKEN(PSI_T_NUMBER); token->flags = PSI_NUMBER_FLT | PSI_NUMBER_DL; cur += 2; goto start; }
127
128 "'" { escaped = false; tok += 1; goto character; }
129 "\"" { escaped = false; tok += 1; goto string; }
130 "u8" / "\"" { char_width = 1; }
131 "u" / ['"] { char_width = 2; }
132 "U" / ['"] { char_width = 4; }
133 "L" / ['"] { char_width = sizeof(wchar_t); }
134
135 "/*" { goto comment; }
136 "//" { goto comment_sl; }
137
138 "##" { NEWTOKEN(PSI_T_CPP_PASTE); goto start; }
139 "#" { NEWTOKEN(PSI_T_HASH); goto start; }
140 "(" { NEWTOKEN(PSI_T_LPAREN); goto start; }
141 ")" { NEWTOKEN(PSI_T_RPAREN); goto start; }
142 ";" { NEWTOKEN(PSI_T_EOS); goto start; }
143 "," { NEWTOKEN(PSI_T_COMMA); goto start; }
144 ":" { NEWTOKEN(PSI_T_COLON); goto start; }
145 "{" { NEWTOKEN(PSI_T_LBRACE); goto start; }
146 "}" { NEWTOKEN(PSI_T_RBRACE); goto start; }
147 "[" { NEWTOKEN(PSI_T_LBRACKET); goto start; }
148 "]" { NEWTOKEN(PSI_T_RBRACKET); goto start; }
149 "!=" { NEWTOKEN(PSI_T_CMP_NE); goto start; }
150 "==" { NEWTOKEN(PSI_T_CMP_EQ); goto start; }
151 "&&" { NEWTOKEN(PSI_T_AND); goto start; }
152 "||" { NEWTOKEN(PSI_T_OR); goto start; }
153 "=" { NEWTOKEN(PSI_T_EQUALS); goto start; }
154 "*" { NEWTOKEN(PSI_T_ASTERISK); goto start; }
155 "~" { NEWTOKEN(PSI_T_TILDE); goto start; }
156 "!" { NEWTOKEN(PSI_T_NOT); goto start; }
157 "%" { NEWTOKEN(PSI_T_MODULO); goto start; }
158 "&" { NEWTOKEN(PSI_T_AMPERSAND); goto start; }
159 "+" { NEWTOKEN(PSI_T_PLUS); goto start; }
160 "-" { NEWTOKEN(PSI_T_MINUS); goto start; }
161 "/" { NEWTOKEN(PSI_T_SLASH); goto start; }
162 "\\" { NEWTOKEN(PSI_T_BSLASH); goto start; }
163 "|" { NEWTOKEN(PSI_T_PIPE); goto start; }
164 "^" { NEWTOKEN(PSI_T_CARET); goto start; }
165 "<<" { NEWTOKEN(PSI_T_LSHIFT); goto start; }
166 ">>" { NEWTOKEN(PSI_T_RSHIFT); goto start; }
167 "<=" { NEWTOKEN(PSI_T_CMP_LE); goto start; }
168 ">=" { NEWTOKEN(PSI_T_CMP_GE); goto start; }
169 "<" { NEWTOKEN(PSI_T_LCHEVR); goto start; }
170 ">" { NEWTOKEN(PSI_T_RCHEVR); goto start; }
171 "." { NEWTOKEN(PSI_T_PERIOD); goto start; }
172 "..." { NEWTOKEN(PSI_T_ELLIPSIS); goto start; }
173 "?" { NEWTOKEN(PSI_T_IIF); goto start; }
174 "pragma" { NEWTOKEN(PSI_T_PRAGMA); goto start; }
175 "pragma" W+ "once" { NEWTOKEN(PSI_T_PRAGMA_ONCE); goto start; }
176 "__"? "inline" { NEWTOKEN(PSI_T_CPP_INLINE); goto start; }
177 "__restrict" { NEWTOKEN(PSI_T_CPP_RESTRICT); goto start; }
178 "__extension__" { NEWTOKEN(PSI_T_CPP_EXTENSION); goto start; }
179 "__asm" ("__")? { NEWTOKEN(PSI_T_CPP_ASM); goto start; }
180 "volatile" { NEWTOKEN(PSI_T_VOLATILE); goto start; }
181 "sizeof" { NEWTOKEN(PSI_T_SIZEOF); goto start; }
182 "line" { NEWTOKEN(PSI_T_LINE); goto start; }
183 "typedef" { NEWTOKEN(PSI_T_TYPEDEF); goto start; }
184 "struct" { NEWTOKEN(PSI_T_STRUCT); goto start; }
185 "union" { NEWTOKEN(PSI_T_UNION); goto start; }
186 "enum" { NEWTOKEN(PSI_T_ENUM); goto start; }
187 "const" { NEWTOKEN(PSI_T_CONST); goto start; }
188 "void" { NEWTOKEN(PSI_T_VOID); goto start; }
189 "bool" { NEWTOKEN(PSI_T_BOOL); goto start; }
190 "char" { NEWTOKEN(PSI_T_CHAR); goto start; }
191 "short" { NEWTOKEN(PSI_T_SHORT); goto start; }
192 "int" { NEWTOKEN(PSI_T_INT); goto start; }
193 "long" { NEWTOKEN(PSI_T_LONG); goto start; }
194 "float" { NEWTOKEN(PSI_T_FLOAT); goto start; }
195 "double" { NEWTOKEN(PSI_T_DOUBLE); goto start; }
196 "unsigned" { NEWTOKEN(PSI_T_UNSIGNED); goto start; }
197 "signed" { NEWTOKEN(PSI_T_SIGNED); goto start; }
198 'IF' { NEWTOKEN(PSI_T_IF); goto start; }
199 'IFDEF' { NEWTOKEN(PSI_T_IFDEF); goto start; }
200 'IFNDEF' { NEWTOKEN(PSI_T_IFNDEF); goto start; }
201 'ELSE' { NEWTOKEN(PSI_T_ELSE); goto start; }
202 'ELIF' { NEWTOKEN(PSI_T_ELIF); goto start; }
203 'ENDIF' { NEWTOKEN(PSI_T_ENDIF); goto start; }
204 'DEFINE' { NEWTOKEN(PSI_T_DEFINE); goto start; }
205 'DEFINED' { NEWTOKEN(PSI_T_DEFINED); goto start; }
206 'UNDEF' { NEWTOKEN(PSI_T_UNDEF); goto start; }
207 'WARNING' { NEWTOKEN(PSI_T_WARNING); goto start; }
208 'ERROR' { NEWTOKEN(PSI_T_ERROR); goto start; }
209 'INCLUDE' { NEWTOKEN(PSI_T_INCLUDE); goto start; }
210 'INCLUDE_NEXT' { NEWTOKEN(PSI_T_INCLUDE_NEXT); goto start; }
211 'TRUE' { NEWTOKEN(PSI_T_TRUE); goto start; }
212 'FALSE' { NEWTOKEN(PSI_T_FALSE); goto start; }
213 'NULL' { NEWTOKEN(PSI_T_NULL); goto start; }
214 'MIXED' { NEWTOKEN(PSI_T_MIXED); goto start; }
215 'CALLABLE' { NEWTOKEN(PSI_T_CALLABLE); goto start; }
216 'STRING' { NEWTOKEN(PSI_T_STRING); goto start; }
217 'ARRAY' { NEWTOKEN(PSI_T_ARRAY); goto start; }
218 'OBJECT' { NEWTOKEN(PSI_T_OBJECT); goto start; }
219 'CALLBACK' { NEWTOKEN(PSI_T_CALLBACK); goto start; }
220 'STATIC' { NEWTOKEN(PSI_T_STATIC); goto start; }
221 'FUNCTION' { NEWTOKEN(PSI_T_FUNCTION); goto start; }
222 'LIB' { NEWTOKEN(PSI_T_LIB); goto start; }
223 'LET' { NEWTOKEN(PSI_T_LET); goto start; }
224 'SET' { NEWTOKEN(PSI_T_SET); goto start; }
225 'PRE_ASSERT' { NEWTOKEN(PSI_T_PRE_ASSERT); goto start; }
226 'POST_ASSERT' { NEWTOKEN(PSI_T_POST_ASSERT); goto start; }
227 'RETURN' { NEWTOKEN(PSI_T_RETURN); goto start; }
228 'AS' { NEWTOKEN(PSI_T_AS); goto start; }
229 'FREE' { NEWTOKEN(PSI_T_FREE); goto start; }
230 'TEMP' { NEWTOKEN(PSI_T_TEMP); goto start; }
231 'STRLEN' { NEWTOKEN(PSI_T_STRLEN); goto start; }
232 'STRVAL' { NEWTOKEN(PSI_T_STRVAL); goto start; }
233 'PATHVAL' { NEWTOKEN(PSI_T_PATHVAL); goto start; }
234 'INTVAL' { NEWTOKEN(PSI_T_INTVAL); goto start; }
235 'FLOATVAL' { NEWTOKEN(PSI_T_FLOATVAL); goto start; }
236 'BOOLVAL' { NEWTOKEN(PSI_T_BOOLVAL); goto start; }
237 'ARRVAL' { NEWTOKEN(PSI_T_ARRVAL); goto start; }
238 'OBJVAL' { NEWTOKEN(PSI_T_OBJVAL); goto start; }
239 'ZVAL' { NEWTOKEN(PSI_T_ZVAL); goto start; }
240 'COUNT' { NEWTOKEN(PSI_T_COUNT); goto start; }
241 'CALLOC' { NEWTOKEN(PSI_T_CALLOC); goto start; }
242 'TO_OBJECT' { NEWTOKEN(PSI_T_TO_OBJECT); goto start; }
243 'TO_ARRAY' { NEWTOKEN(PSI_T_TO_ARRAY); goto start; }
244 'TO_STRING' { NEWTOKEN(PSI_T_TO_STRING); goto start; }
245 'TO_INT' { NEWTOKEN(PSI_T_TO_INT); goto start; }
246 'TO_FLOAT' { NEWTOKEN(PSI_T_TO_FLOAT); goto start; }
247 'TO_BOOL' { NEWTOKEN(PSI_T_TO_BOOL); goto start; }
248 NAME { NEWTOKEN(PSI_T_NAME); goto start; }
249 NSNAME { NEWTOKEN(PSI_T_NSNAME); goto start; }
250 DOLLAR_NAME { NEWTOKEN(PSI_T_DOLLAR_NAME); goto start; }
251 CPP_HEADER { tok += 1; cur -= 1; NEWTOKEN(PSI_T_CPP_HEADER); cur += 1; goto start; }
252 CPP_ATTRIBUTE { parens = 2; goto cpp_attribute; }
253 EOL { NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
254 SP+ { NEWTOKEN(PSI_T_WHITESPACE); goto start; }
255 [^] { CHECKEOF(); NEWTOKEN(-2); goto error; }
256 * { CHECKEOF(); NEWTOKEN(-1); goto error; }
257
258 */
259
260 character: ;
261 /*!re2c
262
263 EOL { NEWLINE(); goto character; }
264 "\\" { escaped = !escaped; goto character; }
265 "'" {
266 if (escaped) {
267 escaped = false;
268 goto character;
269 }
270 cur -= 1;
271 NEWTOKEN(PSI_T_QUOTED_CHAR);
272 cur += 1;
273 token->flags = char_width;
274 goto start;
275 }
276 * { escaped = false; goto character; }
277
278 */
279
280 string: ;
281 /*!re2c
282
283 EOL { NEWLINE(); goto string; }
284 "\\" { escaped = !escaped; goto string; }
285 "\"" {
286 if (escaped) {
287 escaped = false;
288 goto string;
289 }
290 cur -= 1;
291 NEWTOKEN(PSI_T_QUOTED_STRING);
292 cur += 1;
293 token->flags = char_width;
294 goto start;
295 }
296 * { escaped = false; goto string; }
297
298 */
299
300 comment: ;
301 /*!re2c
302
303 EOL { NEWLINE(); goto comment; }
304 "*" "/" { NEWTOKEN(PSI_T_COMMENT); goto start; }
305 * { goto comment; }
306
307 */
308
309 comment_sl: ;
310 /*!re2c
311
312 EOL { NEWTOKEN(PSI_T_COMMENT); tok = cur - 1; NEWTOKEN(PSI_T_EOL); NEWLINE(); goto start; }
313 * { goto comment_sl; }
314
315 */
316
317 cpp_attribute: ;
318
319 /*!re2c
320
321 "(" { ++parens; goto cpp_attribute; }
322 ")" { if (parens == 1) { NEWTOKEN(PSI_T_CPP_ATTRIBUTE); goto start; } else { --parens; goto cpp_attribute; } }
323 EOL { NEWLINE(); goto cpp_attribute; }
324 * { goto cpp_attribute; }
325
326 */
327 error: ;
328
329 P->error(PSI_DATA(P), token, PSI_WARNING, "PSI syntax error: unexpected input (%d) '%.*s' at col %tu",
330 token->type, token->text->len, token->text->val, tok - eol + 1);
331 psi_plist_free(tokens);
332 return NULL;
333
334 done: ;
335
336 PSI_DEBUG_PRINT(P, "PSI: EOF cur=%p lim=%p\n", cur, lim);
337
338 return tokens;
339 }