psi_dump() & psi_validate()
[m6w6/ext-psi] / src / parser.re
1 #include <stddef.h>
2 #include <stdio.h>
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6
7 #include "parser.h"
8 #include "parser_proc.h"
9
10 void *PSI_ParserProcAlloc(void*(unsigned long));
11 void PSI_ParserProcFree(void*, void(*)(void*));
12 void PSI_ParserProc(void *, token_t, PSI_Token *, PSI_Parser *);
13 void PSI_ParserProcTrace(FILE *, const char*);
14
15 PSI_Parser *PSI_ParserInit(PSI_Parser *P, const char *filename, psi_error_cb error, unsigned flags)
16 {
17 FILE *fp;
18
19 fp = fopen(filename, "r");
20
21 if (!fp) {
22 error(NULL, PSI_WARNING, "Could not open '%s' for reading: %s",
23 filename, strerror(errno));
24 return NULL;
25 }
26
27 if (!P) {
28 P = malloc(sizeof(*P));
29 }
30 memset(P, 0, sizeof(*P));
31
32 P->psi.file.fn = strdup(filename);
33 P->fp = fp;
34 P->col = 1;
35 P->line = 1;
36 P->error = error;
37 P->flags = flags;
38
39 P->proc = PSI_ParserProcAlloc(malloc);
40 if (flags & PSI_PARSER_DEBUG) {
41 PSI_ParserProcTrace(stderr, "PSI> ");
42 }
43
44 PSI_ParserFill(P, 0);
45
46 return P;
47 }
48
49 size_t PSI_ParserFill(PSI_Parser *P, size_t n)
50 {
51 if (P->flags & PSI_PARSER_DEBUG) {
52 fprintf(stderr, "PSI> Fill: n=%zu\n", n);
53 }
54 if (!n) {
55 P->cur = P->tok = P->lim = P->mrk = P->buf;
56 P->eof = NULL;
57 }
58
59 if (!P->eof) {
60 size_t consumed = P->tok - P->buf;
61 size_t reserved = P->lim - P->tok;
62 size_t available = BSIZE - reserved;
63 size_t didread;
64
65 if (consumed) {
66 memmove(P->buf, P->tok, reserved);
67 P->tok -= consumed;
68 P->cur -= consumed;
69 P->lim -= consumed;
70 P->mrk -= consumed;
71 }
72
73 didread = fread(P->lim, 1, available, P->fp);
74 P->lim += didread;
75 if (didread < available) {
76 P->eof = P->lim;
77 }
78
79 if (P->flags & PSI_PARSER_DEBUG) {
80 fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
81 consumed, reserved, available, didread);
82 }
83 }
84 if (P->flags & PSI_PARSER_DEBUG) {
85 fprintf(stderr, "PSI> Fill: avail=%zu\n", P->lim - P->cur);
86 }
87 return P->lim - P->cur;
88 }
89
90 void PSI_ParserParse(PSI_Parser *P, PSI_Token *T)
91 {
92 if (T) {
93 PSI_ParserProc(P->proc, T->type, T, P);
94 } else {
95 PSI_ParserProc(P->proc, 0, NULL, P);
96 }
97 }
98
99 void PSI_ParserDtor(PSI_Parser *P)
100 {
101 PSI_ParserProcFree(P->proc, free);
102
103 if (P->fp) {
104 fclose(P->fp);
105 }
106
107 PSI_DataDtor((PSI_Data *) P);
108
109 memset(P, 0, sizeof(*P));
110 }
111
112 void PSI_ParserFree(PSI_Parser **P)
113 {
114 if (*P) {
115 PSI_ParserDtor(*P);
116 free(*P);
117 *P = NULL;
118 }
119 }
120
121 /*!max:re2c*/
122 #define BSIZE 256
123
124 #if BSIZE < YYMAXFILL
125 # error BSIZE must be greater than YYMAXFILL
126 #endif
127
128 #define PSI_T(n) \
129 (n) == PSI_T_NAME ? "NAME" : \
130 (n) == PSI_T_PLUS ? "PLUS" : \
131 (n) == PSI_T_MINUS ? "MINUS" : \
132 (n) == PSI_T_SLASH ? "SLASH" : \
133 (n) == PSI_T_ASTERISK ? "ASTERISK" : \
134 (n) == PSI_T_TEMP ? "TEMP" : \
135 (n) == PSI_T_FREE ? "FREE" : \
136 (n) == PSI_T_SET ? "SET" : \
137 (n) == PSI_T_LET ? "LET" : \
138 (n) == PSI_T_RETURN ? "RETURN" : \
139 (n) == PSI_T_LIB ? "LIB" : \
140 (n) == PSI_T_INT ? "INT" : \
141 (n) == PSI_T_UNSIGNED ? "UNSIGNED" : \
142 (n) == PSI_T_EOF ? "EOF" : \
143 (n) == PSI_T_QUOTED_STRING ? "QUOTED_STRING" : \
144 (n) == PSI_T_EOS ? "EOS" : \
145 (n) == PSI_T_STRUCT ? "STRUCT" : \
146 (n) == PSI_T_LBRACE ? "LBRACE" : \
147 (n) == PSI_T_RBRACE ? "RBRACE" : \
148 (n) == PSI_T_COLON ? "COLON" : \
149 (n) == PSI_T_LPAREN ? "LPAREN" : \
150 (n) == PSI_T_NUMBER ? "NUMBER" : \
151 (n) == PSI_T_RPAREN ? "RPAREN" : \
152 (n) == PSI_T_BOOL ? "BOOL" : \
153 (n) == PSI_T_FLOAT ? "FLOAT" : \
154 (n) == PSI_T_STRING ? "STRING" : \
155 (n) == PSI_T_CONST ? "CONST" : \
156 (n) == PSI_T_NSNAME ? "NSNAME" : \
157 (n) == PSI_T_EQUALS ? "EQUALS" : \
158 (n) == PSI_T_TYPEDEF ? "TYPEDEF" : \
159 (n) == PSI_T_VOID ? "VOID" : \
160 (n) == PSI_T_LBRACKET ? "LBRACKET" : \
161 (n) == PSI_T_RBRACKET ? "RBRACKET" : \
162 (n) == PSI_T_COMMA ? "COMMA" : \
163 (n) == PSI_T_ELLIPSIS ? "ELLIPSIS" : \
164 (n) == PSI_T_DOUBLE ? "DOUBLE" : \
165 (n) == PSI_T_INT8 ? "INT8" : \
166 (n) == PSI_T_UINT8 ? "UINT8" : \
167 (n) == PSI_T_INT16 ? "INT16" : \
168 (n) == PSI_T_UINT16 ? "UINT16" : \
169 (n) == PSI_T_INT32 ? "INT32" : \
170 (n) == PSI_T_UINT32 ? "UINT32" : \
171 (n) == PSI_T_INT64 ? "INT64" : \
172 (n) == PSI_T_UINT64 ? "UINT64" : \
173 (n) == PSI_T_FUNCTION ? "FUNCTION" : \
174 (n) == PSI_T_NULL ? "NULL" : \
175 (n) == PSI_T_TRUE ? "TRUE" : \
176 (n) == PSI_T_FALSE ? "FALSE" : \
177 (n) == PSI_T_DOLLAR ? "DOLLAR" : \
178 (n) == PSI_T_CALLOC ? "CALLOC" : \
179 (n) == PSI_T_OBJVAL ? "OBJVAL" : \
180 (n) == PSI_T_ARRVAL ? "ARRVAL" : \
181 (n) == PSI_T_PATHVAL ? "PATHVAL" : \
182 (n) == PSI_T_STRLEN ? "STRLEN" : \
183 (n) == PSI_T_STRVAL ? "STRVAL" : \
184 (n) == PSI_T_FLOATVAL ? "FLOATVAL" : \
185 (n) == PSI_T_INTVAL ? "INTVAL" : \
186 (n) == PSI_T_BOOLVAL ? "BOOLVAL" : \
187 (n) == PSI_T_TO_OBJECT ? "TO_OBJECT" : \
188 (n) == PSI_T_TO_ARRAY ? "TO_ARRAY" : \
189 (n) == PSI_T_TO_STRING ? "TO_STRING" : \
190 (n) == PSI_T_TO_INT ? "TO_INT" : \
191 (n) == PSI_T_TO_FLOAT ? "TO_FLOAT" : \
192 (n) == PSI_T_TO_BOOL ? "TO_BOOL" : \
193 (n) == PSI_T_MIXED ? "MIXED" : \
194 (n) == PSI_T_ARRAY ? "ARRAY" : \
195 (n) == PSI_T_OBJECT ? "OBJECT" : \
196 (n) == PSI_T_AMPERSAND ? "AMPERSAND" : \
197 <UNKNOWN>
198
199 #define RETURN(t) do { \
200 P->num = t; \
201 if (P->flags & PSI_PARSER_DEBUG) { \
202 fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
203 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
204 P->psi.file.fn, P->line, P->col); \
205 } \
206 return t; \
207 } while(1)
208
209 #define ADDCOLS \
210 P->col += P->cur - P->tok
211
212 #define NEWLINE \
213 P->col = 1; \
214 ++P->line; \
215 goto nextline
216
217 token_t PSI_ParserScan(PSI_Parser *P)
218 {
219 for (;;) {
220 ADDCOLS;
221 nextline:
222 P->tok = P->cur;
223 /*!re2c
224 re2c:indent:top = 2;
225 re2c:define:YYCTYPE = "unsigned char";
226 re2c:define:YYCURSOR = P->cur;
227 re2c:define:YYLIMIT = P->lim;
228 re2c:define:YYMARKER = P->mrk;
229 re2c:define:YYFILL = "{ if (!PSI_ParserFill(P,@@)) RETURN(PSI_T_EOF); }";
230 re2c:yyfill:parameter = 0;
231
232 B = [^a-zA-Z0-9_];
233 W = [a-zA-Z0-9_];
234 NAME = [a-zA-Z_]W*;
235 NSNAME = (NAME)? ("\\" NAME)+;
236 QUOTED_STRING = "\"" ([^\"])+ "\"";
237 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
238
239 ("#"|"//") .* "\n" { NEWLINE; }
240 "(" {RETURN(PSI_T_LPAREN);}
241 ")" {RETURN(PSI_T_RPAREN);}
242 ";" {RETURN(PSI_T_EOS);}
243 "," {RETURN(PSI_T_COMMA);}
244 ":" {RETURN(PSI_T_COLON);}
245 "{" {RETURN(PSI_T_LBRACE);}
246 "}" {RETURN(PSI_T_RBRACE);}
247 "[" {RETURN(PSI_T_LBRACKET);}
248 "]" {RETURN(PSI_T_RBRACKET);}
249 "=" {RETURN(PSI_T_EQUALS);}
250 "$" {RETURN(PSI_T_DOLLAR);}
251 "*" {RETURN(PSI_T_ASTERISK);}
252 "&" {RETURN(PSI_T_AMPERSAND);}
253 "+" {RETURN(PSI_T_PLUS);}
254 "-" {RETURN(PSI_T_MINUS);}
255 "/" {RETURN(PSI_T_SLASH);}
256 "..." {RETURN(PSI_T_ELLIPSIS);}
257 [\r\n] { NEWLINE; }
258 [\t ]+ { continue; }
259 'TRUE' {RETURN(PSI_T_TRUE);}
260 'FALSE' {RETURN(PSI_T_FALSE);}
261 'NULL' {RETURN(PSI_T_NULL);}
262 'MIXED' {RETURN(PSI_T_MIXED);}
263 'VOID' {RETURN(PSI_T_VOID);}
264 'BOOL' {RETURN(PSI_T_BOOL);}
265 'CHAR' {RETURN(PSI_T_CHAR);}
266 'SHORT' {RETURN(PSI_T_SHORT);}
267 'INT' {RETURN(PSI_T_INT);}
268 'LONG' {RETURN(PSI_T_LONG);}
269 'FLOAT' {RETURN(PSI_T_FLOAT);}
270 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
271 'INT8_T' {RETURN(PSI_T_INT8);}
272 'UINT8_T' {RETURN(PSI_T_UINT8);}
273 'INT16_T' {RETURN(PSI_T_INT16);}
274 'UINT16_T' {RETURN(PSI_T_UINT16);}
275 'INT32_T' {RETURN(PSI_T_INT32);}
276 'UINT32_T' {RETURN(PSI_T_UINT32);}
277 'INT64_T' {RETURN(PSI_T_INT64);}
278 'UINT64_T' {RETURN(PSI_T_UINT64);}
279 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
280 'SIGNED' {RETURN(PSI_T_SIGNED);}
281 'STRING' {RETURN(PSI_T_STRING);}
282 'ARRAY' {RETURN(PSI_T_ARRAY);}
283 'OBJECT' {RETURN(PSI_T_OBJECT);}
284 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
285 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
286 'STRUCT' {RETURN(PSI_T_STRUCT);}
287 'CONST' {RETURN(PSI_T_CONST);}
288 'LIB' {RETURN(PSI_T_LIB);}
289 'LET' {RETURN(PSI_T_LET);}
290 'SET' {RETURN(PSI_T_SET);}
291 'RETURN' {RETURN(PSI_T_RETURN);}
292 'FREE' {RETURN(PSI_T_FREE);}
293 'TEMP' {RETURN(PSI_T_TEMP);}
294 'STRLEN' {RETURN(PSI_T_STRLEN);}
295 'STRVAL' {RETURN(PSI_T_STRVAL);}
296 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
297 'INTVAL' {RETURN(PSI_T_INTVAL);}
298 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
299 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
300 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
301 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
302 'CALLOC' {RETURN(PSI_T_CALLOC);}
303 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
304 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
305 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
306 'TO_INT' {RETURN(PSI_T_TO_INT);}
307 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
308 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
309 NUMBER {RETURN(PSI_T_NUMBER);}
310 NAME {RETURN(PSI_T_NAME);}
311 NSNAME {RETURN(PSI_T_NSNAME);}
312 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
313 [^] {break;}
314 */
315 }
316 return -1;
317 }