1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
26 #include "php_psi_stdinc.h"
32 void *psi_parser_proc_init(void);
33 void psi_parser_proc_free(void **parser_proc);
34 void psi_parser_proc_parse(void *parser_proc, token_t r, struct psi_token *token, struct psi_parser *parser);
35 void psi_parser_proc_trace(FILE *out, char *prefix);
37 struct psi_parser *psi_parser_init(struct psi_parser *P, psi_error_cb error, unsigned flags)
40 P = malloc(sizeof(*P));
42 memset(P, 0, sizeof(*P));
44 psi_data_ctor_with_dtors(PSI_DATA(P), error, flags);
48 P->proc = psi_parser_proc_init();
50 if (flags & PSI_DEBUG) {
51 psi_parser_proc_trace(stderr, "PSI> ");
57 bool psi_parser_open_file(struct psi_parser *P, const char *filename)
59 FILE *fp = fopen(filename, "r");
62 P->error(PSI_DATA(P), NULL, PSI_WARNING,
63 "Could not open '%s' for reading: %s",
64 filename, strerror(errno));
68 P->input.type = PSI_PARSE_FILE;
69 P->input.data.file.handle = fp;
76 P->error(PSI_DATA(P), NULL, PSI_WARNING,
77 "Could not stat '%s': %s",
78 filename, strerror(errno));
82 P->input.data.file.buffer = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
83 if (MAP_FAILED == P->input.data.file.buffer) {
84 P->error(PSI_DATA(P), NULL, PSI_WARNING,
85 "Could not map '%s' for reading: %s",
86 filename, strerror(errno));
89 P->input.data.file.length = sb.st_size;
91 P->input.data.file.buffer = malloc(BSIZE);
94 P->file.fn = strdup(filename);
99 bool psi_parser_open_string(struct psi_parser *P, const char *string, size_t length)
101 P->input.type = PSI_PARSE_STRING;
102 P->input.data.string.length = length;
103 if (!(P->input.data.string.buffer = strndup(string, length))) {
107 P->file.fn = strdup("<input>");
112 static ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
114 PSI_DEBUG_PRINT(P, "PSI< Fill: n=%zu (input.type=%d)\n", n, P->input.type);
118 switch (P->input.type) {
120 P->cur = P->tok = P->mrk = P->input.data.file.buffer;
122 P->eof = P->input.data.file.buffer + P->input.data.file.length;
126 P->lim = P->input.data.file.buffer;
130 case PSI_PARSE_STRING:
131 P->cur = P->tok = P->mrk = P->input.data.string.buffer;
132 P->eof = P->input.data.string.buffer + P->input.data.string.length;
137 PSI_DEBUG_PRINT(P, "PSI< Fill: cur=%p lim=%p eof=%p\n", P->cur, P->lim, P->eof);
140 switch (P->input.type) {
141 case PSI_PARSE_STRING:
147 size_t consumed = P->tok - P->buf;
148 size_t reserved = P->lim - P->tok;
149 size_t available = BSIZE - reserved;
153 memmove(P->buf, P->tok, reserved);
160 didread = fread(P->lim, 1, available, P->fp);
162 if (didread < available) {
165 PSI_DEBUG_PRINT(P, "PSI< Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
166 consumed, reserved, available, didread);
172 PSI_DEBUG_PRINT(P, "PSI< Fill: avail=%td\n", P->lim - P->cur);
174 return P->lim - P->cur;
177 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
180 psi_parser_proc_parse(P->proc, T->type, T, P);
182 psi_parser_proc_parse(P->proc, 0, NULL, P);
186 void psi_parser_dtor(struct psi_parser *P)
188 psi_parser_proc_free(&P->proc);
190 switch (P->input.type) {
192 if (P->input.data.file.buffer) {
194 munmap(P->input.data.file.buffer, P->input.data.file.length);
196 free(P->input.data.file.buffer);
199 if (P->input.data.file.handle) {
200 fclose(P->input.data.file.handle);
204 case PSI_PARSE_STRING:
205 if (P->input.data.string.buffer) {
206 free(P->input.data.string.buffer);
211 psi_data_dtor(PSI_DATA(P));
213 memset(P, 0, sizeof(*P));
216 void psi_parser_free(struct psi_parser **P)
226 #if BSIZE < YYMAXFILL
227 # error BSIZE must be greater than YYMAXFILL
230 #define RETURN(t) do { \
232 PSI_DEBUG_PRINT(P, "PSI< TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
233 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
234 P->file.fn, P->line, P->col); \
239 P->col += P->cur - P->tok
241 #define NEWLINE(label) \
246 token_t psi_parser_scan(struct psi_parser *P)
249 psi_parser_fill(P, 0);
257 re2c:define:YYCTYPE = "unsigned char";
258 re2c:define:YYCURSOR = P->cur;
259 re2c:define:YYLIMIT = P->lim;
260 re2c:define:YYMARKER = P->mrk;
261 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
262 re2c:yyfill:parameter = 0;
267 NSNAME = (NAME)? ("\\" NAME)+;
268 DOLLAR_NAME = '$' W+;
269 QUOTED_STRING = "\"" ([^\"])+ "\"";
270 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
272 "/*" { goto comment; }
273 ("#"|"//") .* "\n" { NEWLINE(nextline); }
274 "(" {RETURN(PSI_T_LPAREN);}
275 ")" {RETURN(PSI_T_RPAREN);}
276 ";" {RETURN(PSI_T_EOS);}
277 "," {RETURN(PSI_T_COMMA);}
278 ":" {RETURN(PSI_T_COLON);}
279 "{" {RETURN(PSI_T_LBRACE);}
280 "}" {RETURN(PSI_T_RBRACE);}
281 "[" {RETURN(PSI_T_LBRACKET);}
282 "]" {RETURN(PSI_T_RBRACKET);}
283 "!=" {RETURN(PSI_T_CMP_NE);}
284 "==" {RETURN(PSI_T_CMP_EQ);}
285 "&&" {RETURN(PSI_T_AND);}
286 "||" {RETURN(PSI_T_OR);}
287 "=" {RETURN(PSI_T_EQUALS);}
288 "*" {RETURN(PSI_T_ASTERISK);}
289 "~" {RETURN(PSI_T_TILDE);}
290 "!" {RETURN(PSI_T_NOT);}
291 "%" {RETURN(PSI_T_MODULO);}
292 "&" {RETURN(PSI_T_AMPERSAND);}
293 "+" {RETURN(PSI_T_PLUS);}
294 "-" {RETURN(PSI_T_MINUS);}
295 "/" {RETURN(PSI_T_SLASH);}
296 "|" {RETURN(PSI_T_PIPE);}
297 "^" {RETURN(PSI_T_CARET);}
298 "<<" {RETURN(PSI_T_LSHIFT);}
299 ">>" {RETURN(PSI_T_RSHIFT);}
300 "<=" {RETURN(PSI_T_CMP_LE);}
301 ">=" {RETURN(PSI_T_CMP_GE);}
302 "<" {RETURN(PSI_T_LCHEVR);}
303 ">" {RETURN(PSI_T_RCHEVR);}
304 "..." {RETURN(PSI_T_ELLIPSIS);}
305 [\r\n] { NEWLINE(nextline); }
307 'TRUE' {RETURN(PSI_T_TRUE);}
308 'FALSE' {RETURN(PSI_T_FALSE);}
309 'NULL' {RETURN(PSI_T_NULL);}
310 'MIXED' {RETURN(PSI_T_MIXED);}
311 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
312 'VOID' {RETURN(PSI_T_VOID);}
313 'BOOL' {RETURN(PSI_T_BOOL);}
314 'CHAR' {RETURN(PSI_T_CHAR);}
315 'SHORT' {RETURN(PSI_T_SHORT);}
316 'INT' {RETURN(PSI_T_INT);}
317 'LONG' {RETURN(PSI_T_LONG);}
318 'FLOAT' {RETURN(PSI_T_FLOAT);}
319 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
320 'INT8_T' {RETURN(PSI_T_INT8);}
321 'UINT8_T' {RETURN(PSI_T_UINT8);}
322 'INT16_T' {RETURN(PSI_T_INT16);}
323 'UINT16_T' {RETURN(PSI_T_UINT16);}
324 'INT32_T' {RETURN(PSI_T_INT32);}
325 'UINT32_T' {RETURN(PSI_T_UINT32);}
326 'INT64_T' {RETURN(PSI_T_INT64);}
327 'UINT64_T' {RETURN(PSI_T_UINT64);}
328 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
329 'SIGNED' {RETURN(PSI_T_SIGNED);}
330 'STRING' {RETURN(PSI_T_STRING);}
331 'ARRAY' {RETURN(PSI_T_ARRAY);}
332 'OBJECT' {RETURN(PSI_T_OBJECT);}
333 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
334 'STATIC' {RETURN(PSI_T_STATIC);}
335 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
336 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
337 'STRUCT' {RETURN(PSI_T_STRUCT);}
338 'UNION' {RETURN(PSI_T_UNION);}
339 'ENUM' {RETURN(PSI_T_ENUM);}
340 'CONST' {RETURN(PSI_T_CONST);}
341 'LIB' {RETURN(PSI_T_LIB);}
342 'LET' {RETURN(PSI_T_LET);}
343 'SET' {RETURN(PSI_T_SET);}
344 'PRE_ASSERT' {RETURN(PSI_T_PRE_ASSERT);}
345 'POST_ASSERT' {RETURN(PSI_T_POST_ASSERT);}
346 'RETURN' {RETURN(PSI_T_RETURN);}
347 'FREE' {RETURN(PSI_T_FREE);}
348 'TEMP' {RETURN(PSI_T_TEMP);}
349 'STRLEN' {RETURN(PSI_T_STRLEN);}
350 'STRVAL' {RETURN(PSI_T_STRVAL);}
351 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
352 'INTVAL' {RETURN(PSI_T_INTVAL);}
353 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
354 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
355 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
356 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
357 'ZVAL' {RETURN(PSI_T_ZVAL);}
358 'COUNT' {RETURN(PSI_T_COUNT);}
359 'CALLOC' {RETURN(PSI_T_CALLOC);}
360 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
361 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
362 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
363 'TO_INT' {RETURN(PSI_T_TO_INT);}
364 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
365 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
366 NUMBER {RETURN(PSI_T_NUMBER);}
367 NAME {RETURN(PSI_T_NAME);}
368 NSNAME {RETURN(PSI_T_NSNAME);}
369 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
370 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
377 "\n" { NEWLINE(comment); }
378 "*" "/" { continue; }
379 [^] { goto comment; }