validation and marshaling of structs/unions
[m6w6/ext-psi] / src / parser.re
1 #ifdef HAVE_CONFIG_H
2 # include "config.h"
3 #else
4 # include "php_config.h"
5 #endif
6
7 #include <stddef.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <errno.h>
11 #include <string.h>
12
13 #include "parser_proc.h"
14
15 #include "parser.h"
16
17 void *psi_parser_proc_Alloc(void*(unsigned long));
18 void psi_parser_proc_Free(void*, void(*)(void*));
19 void psi_parser_proc_(void *, token_t, struct psi_token *, struct psi_parser *);
20 void psi_parser_proc_Trace(FILE *, const char*);
21
22 struct psi_parser *psi_parser_init(struct psi_parser *P, const char *filename, psi_error_cb error, unsigned flags)
23 {
24 FILE *fp;
25
26 fp = fopen(filename, "r");
27
28 if (!fp) {
29 if (!(flags & PSI_PARSER_SILENT)) {
30 error(NULL, NULL, PSI_WARNING, "Could not open '%s' for reading: %s",
31 filename, strerror(errno));
32 }
33 return NULL;
34 }
35
36 if (!P) {
37 P = malloc(sizeof(*P));
38 }
39 memset(P, 0, sizeof(*P));
40
41 P->psi.file.fn = strdup(filename);
42 P->fp = fp;
43 P->col = 1;
44 P->line = 1;
45 P->error = error;
46 P->flags = flags;
47 P->proc = psi_parser_proc_Alloc(malloc);
48
49 if (flags & PSI_PARSER_DEBUG) {
50 psi_parser_proc_Trace(stderr, "PSI> ");
51 }
52
53 psi_parser_fill(P, 0);
54
55 return P;
56 }
57
58 ssize_t psi_parser_fill(struct psi_parser *P, size_t n)
59 {
60 if (P->flags & PSI_PARSER_DEBUG) {
61 fprintf(stderr, "PSI> Fill: n=%zu\n", n);
62 }
63 if (!n) {
64 P->cur = P->tok = P->lim = P->mrk = P->buf;
65 P->eof = NULL;
66 }
67
68 if (!P->eof) {
69 size_t consumed = P->tok - P->buf;
70 size_t reserved = P->lim - P->tok;
71 size_t available = BSIZE - reserved;
72 size_t didread;
73
74 if (consumed) {
75 memmove(P->buf, P->tok, reserved);
76 P->tok -= consumed;
77 P->cur -= consumed;
78 P->lim -= consumed;
79 P->mrk -= consumed;
80 }
81
82 didread = fread(P->lim, 1, available, P->fp);
83 P->lim += didread;
84 if (didread < available) {
85 P->eof = P->lim;
86 }
87
88 if (P->flags & PSI_PARSER_DEBUG) {
89 fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
90 consumed, reserved, available, didread);
91 }
92 }
93 if (P->flags & PSI_PARSER_DEBUG) {
94 fprintf(stderr, "PSI> Fill: avail=%zd\n", P->lim - P->cur);
95 }
96 return P->lim - P->cur;
97 }
98
99 void psi_parser_parse(struct psi_parser *P, struct psi_token *T)
100 {
101 if (T) {
102 psi_parser_proc_(P->proc, T->type, T, P);
103 } else {
104 psi_parser_proc_(P->proc, 0, NULL, P);
105 }
106 }
107
108 void psi_parser_dtor(struct psi_parser *P)
109 {
110 psi_parser_proc_Free(P->proc, free);
111
112 if (P->fp) {
113 fclose(P->fp);
114 }
115
116 psi_data_dtor(PSI_DATA(P));
117
118 memset(P, 0, sizeof(*P));
119 }
120
121 void psi_parser_free(struct psi_parser **P)
122 {
123 if (*P) {
124 psi_parser_dtor(*P);
125 free(*P);
126 *P = NULL;
127 }
128 }
129
130 /*!max:re2c*/
131 #define BSIZE 256
132
133 #if BSIZE < YYMAXFILL
134 # error BSIZE must be greater than YYMAXFILL
135 #endif
136
137 #define RETURN(t) do { \
138 P->num = t; \
139 if (P->flags & PSI_PARSER_DEBUG) { \
140 fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
141 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
142 P->psi.file.fn, P->line, P->col); \
143 } \
144 return t; \
145 } while(1)
146
147 #define ADDCOLS \
148 P->col += P->cur - P->tok
149
150 #define NEWLINE(label) \
151 P->col = 1; \
152 ++P->line; \
153 goto label
154
155 token_t psi_parser_scan(struct psi_parser *P)
156 {
157 for (;;) {
158 ADDCOLS;
159 nextline:
160 P->tok = P->cur;
161 /*!re2c
162 re2c:indent:top = 2;
163 re2c:define:YYCTYPE = "unsigned char";
164 re2c:define:YYCURSOR = P->cur;
165 re2c:define:YYLIMIT = P->lim;
166 re2c:define:YYMARKER = P->mrk;
167 re2c:define:YYFILL = "{ if (!psi_parser_fill(P,@@)) RETURN(PSI_T_EOF); }";
168 re2c:yyfill:parameter = 0;
169
170 B = [^a-zA-Z0-9_];
171 W = [a-zA-Z0-9_];
172 NAME = [a-zA-Z_]W*;
173 NSNAME = (NAME)? ("\\" NAME)+;
174 DOLLAR_NAME = '$' NAME;
175 QUOTED_STRING = "\"" ([^\"])+ "\"";
176 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
177
178 "/*" { goto comment; }
179 ("#"|"//") .* "\n" { NEWLINE(nextline); }
180 "(" {RETURN(PSI_T_LPAREN);}
181 ")" {RETURN(PSI_T_RPAREN);}
182 ";" {RETURN(PSI_T_EOS);}
183 "," {RETURN(PSI_T_COMMA);}
184 ":" {RETURN(PSI_T_COLON);}
185 "{" {RETURN(PSI_T_LBRACE);}
186 "}" {RETURN(PSI_T_RBRACE);}
187 "[" {RETURN(PSI_T_LBRACKET);}
188 "]" {RETURN(PSI_T_RBRACKET);}
189 "=" {RETURN(PSI_T_EQUALS);}
190 "*" {RETURN(PSI_T_ASTERISK);}
191 "&" {RETURN(PSI_T_AMPERSAND);}
192 "+" {RETURN(PSI_T_PLUS);}
193 "-" {RETURN(PSI_T_MINUS);}
194 "/" {RETURN(PSI_T_SLASH);}
195 "..." {RETURN(PSI_T_ELLIPSIS);}
196 [\r\n] { NEWLINE(nextline); }
197 [\t ]+ { continue; }
198 'TRUE' {RETURN(PSI_T_TRUE);}
199 'FALSE' {RETURN(PSI_T_FALSE);}
200 'NULL' {RETURN(PSI_T_NULL);}
201 'MIXED' {RETURN(PSI_T_MIXED);}
202 'CALLABLE' {RETURN(PSI_T_CALLABLE);}
203 'VOID' {RETURN(PSI_T_VOID);}
204 'BOOL' {RETURN(PSI_T_BOOL);}
205 'CHAR' {RETURN(PSI_T_CHAR);}
206 'SHORT' {RETURN(PSI_T_SHORT);}
207 'INT' {RETURN(PSI_T_INT);}
208 'LONG' {RETURN(PSI_T_LONG);}
209 'FLOAT' {RETURN(PSI_T_FLOAT);}
210 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
211 'INT8_T' {RETURN(PSI_T_INT8);}
212 'UINT8_T' {RETURN(PSI_T_UINT8);}
213 'INT16_T' {RETURN(PSI_T_INT16);}
214 'UINT16_T' {RETURN(PSI_T_UINT16);}
215 'INT32_T' {RETURN(PSI_T_INT32);}
216 'UINT32_T' {RETURN(PSI_T_UINT32);}
217 'INT64_T' {RETURN(PSI_T_INT64);}
218 'UINT64_T' {RETURN(PSI_T_UINT64);}
219 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
220 'SIGNED' {RETURN(PSI_T_SIGNED);}
221 'STRING' {RETURN(PSI_T_STRING);}
222 'ARRAY' {RETURN(PSI_T_ARRAY);}
223 'OBJECT' {RETURN(PSI_T_OBJECT);}
224 'CALLBACK' {RETURN(PSI_T_CALLBACK);}
225 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
226 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
227 'STRUCT' {RETURN(PSI_T_STRUCT);}
228 'UNION' {RETURN(PSI_T_UNION);}
229 'ENUM' {RETURN(PSI_T_ENUM);}
230 'CONST' {RETURN(PSI_T_CONST);}
231 'LIB' {RETURN(PSI_T_LIB);}
232 'LET' {RETURN(PSI_T_LET);}
233 'SET' {RETURN(PSI_T_SET);}
234 'RETURN' {RETURN(PSI_T_RETURN);}
235 'FREE' {RETURN(PSI_T_FREE);}
236 'TEMP' {RETURN(PSI_T_TEMP);}
237 'STRLEN' {RETURN(PSI_T_STRLEN);}
238 'STRVAL' {RETURN(PSI_T_STRVAL);}
239 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
240 'INTVAL' {RETURN(PSI_T_INTVAL);}
241 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
242 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
243 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
244 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
245 'ZVAL' {RETURN(PSI_T_ZVAL);}
246 'CALLOC' {RETURN(PSI_T_CALLOC);}
247 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
248 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
249 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
250 'TO_INT' {RETURN(PSI_T_TO_INT);}
251 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
252 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
253 NUMBER {RETURN(PSI_T_NUMBER);}
254 NAME {RETURN(PSI_T_NAME);}
255 NSNAME {RETURN(PSI_T_NSNAME);}
256 DOLLAR_NAME {RETURN(PSI_T_DOLLAR_NAME);}
257 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
258 [^] {break;}
259 */
260
261 comment:
262 P->tok = P->cur;
263 /*!re2c
264 "\n" { NEWLINE(comment); }
265 "*" "/" { continue; }
266 [^] { goto comment; }
267 */
268 }
269 return -1;
270 }