type parser fixes
[m6w6/ext-psi] / src / parser.re
1 #include <stddef.h>
2 #include <stdio.h>
3 #include <assert.h>
4
5 #include "parser.h"
6 #include "parser_proc.h"
7
8 void *PSI_ParserProcAlloc(void*(unsigned long));
9 void PSI_ParserProcFree(void*, void(*)(void*));
10 void PSI_ParserProc(void *, token_t, PSI_Token *, PSI_Parser *);
11 void PSI_ParserProcTrace(FILE *, const char*);
12
13 PSI_Parser *PSI_ParserInit(PSI_Parser *P, const char *filename, psi_error_cb error, unsigned flags)
14 {
15 FILE *fp;
16
17 if (!P) {
18 P = malloc(sizeof(*P));
19 }
20 memset(P, 0, sizeof(*P));
21
22 fp = fopen(filename, "r");
23
24 if (!fp) {
25 perror(filename);
26 return NULL;
27 }
28
29 if (!P) {
30 P = malloc(sizeof(*P));
31 }
32 memset(P, 0, sizeof(*P));
33
34 P->psi.file.fn = strdup(filename);
35 P->fp = fp;
36 P->col = 1;
37 P->line = 1;
38 P->error = error;
39 P->flags = flags;
40
41 P->proc = PSI_ParserProcAlloc(malloc);
42 if (flags & PSI_PARSER_DEBUG) {
43 PSI_ParserProcTrace(stderr, "PSI> ");
44 }
45
46 PSI_ParserFill(P, 0);
47
48 return P;
49 }
50
51 size_t PSI_ParserFill(PSI_Parser *P, size_t n)
52 {
53 if (P->flags & PSI_PARSER_DEBUG) {
54 fprintf(stderr, "PSI> Fill: n=%zu\n", n);
55 }
56 if (!n) {
57 P->cur = P->tok = P->lim = P->mrk = P->buf;
58 P->eof = NULL;
59 }
60
61 if (!P->eof) {
62 size_t consumed = P->tok - P->buf;
63 size_t reserved = P->lim - P->tok;
64 size_t available = BSIZE - reserved;
65 size_t didread;
66
67 if (consumed) {
68 memmove(P->buf, P->tok, reserved);
69 P->tok -= consumed;
70 P->cur -= consumed;
71 P->lim -= consumed;
72 P->mrk -= consumed;
73 }
74
75 didread = fread(P->lim, 1, available, P->fp);
76 P->lim += didread;
77 if (didread < available) {
78 P->eof = P->lim;
79 }
80
81 if (P->flags & PSI_PARSER_DEBUG) {
82 fprintf(stderr, "PSI> Fill: consumed=%zu reserved=%zu available=%zu didread=%zu\n",
83 consumed, reserved, available, didread);
84 }
85 }
86 if (P->flags & PSI_PARSER_DEBUG) {
87 fprintf(stderr, "PSI> Fill: avail=%zu\n", P->lim - P->cur);
88 }
89 return P->lim - P->cur;
90 }
91
92 void PSI_ParserParse(PSI_Parser *P, PSI_Token *T)
93 {
94 if (T) {
95 PSI_ParserProc(P->proc, T->type, T, P);
96 } else {
97 PSI_ParserProc(P->proc, 0, NULL, P);
98 }
99 }
100
101 void PSI_ParserDtor(PSI_Parser *P)
102 {
103 PSI_ParserProcFree(P->proc, free);
104
105 if (P->fp) {
106 fclose(P->fp);
107 }
108
109 PSI_DataDtor((PSI_Data *) P);
110
111 memset(P, 0, sizeof(*P));
112 }
113
114 void PSI_ParserFree(PSI_Parser **P)
115 {
116 if (*P) {
117 PSI_ParserDtor(*P);
118 free(*P);
119 *P = NULL;
120 }
121 }
122
123 /*!max:re2c*/
124 #define BSIZE 256
125
126 #if BSIZE < YYMAXFILL
127 # error BSIZE must be greater than YYMAXFILL
128 #endif
129
130 #define PSI_T(n) \
131 (n) == PSI_T_NAME ? "NAME" : \
132 (n) == PSI_T_PLUS ? "PLUS" : \
133 (n) == PSI_T_MINUS ? "MINUS" : \
134 (n) == PSI_T_SLASH ? "SLASH" : \
135 (n) == PSI_T_ASTERISK ? "ASTERISK" : \
136 (n) == PSI_T_TEMP ? "TEMP" : \
137 (n) == PSI_T_FREE ? "FREE" : \
138 (n) == PSI_T_SET ? "SET" : \
139 (n) == PSI_T_LET ? "LET" : \
140 (n) == PSI_T_RETURN ? "RETURN" : \
141 (n) == PSI_T_LIB ? "LIB" : \
142 (n) == PSI_T_INT ? "INT" : \
143 (n) == PSI_T_UNSIGNED ? "UNSIGNED" : \
144 (n) == PSI_T_EOF ? "EOF" : \
145 (n) == PSI_T_QUOTED_STRING ? "QUOTED_STRING" : \
146 (n) == PSI_T_EOS ? "EOS" : \
147 (n) == PSI_T_STRUCT ? "STRUCT" : \
148 (n) == PSI_T_LBRACE ? "LBRACE" : \
149 (n) == PSI_T_RBRACE ? "RBRACE" : \
150 (n) == PSI_T_COLON ? "COLON" : \
151 (n) == PSI_T_LPAREN ? "LPAREN" : \
152 (n) == PSI_T_NUMBER ? "NUMBER" : \
153 (n) == PSI_T_RPAREN ? "RPAREN" : \
154 (n) == PSI_T_BOOL ? "BOOL" : \
155 (n) == PSI_T_FLOAT ? "FLOAT" : \
156 (n) == PSI_T_STRING ? "STRING" : \
157 (n) == PSI_T_CONST ? "CONST" : \
158 (n) == PSI_T_NSNAME ? "NSNAME" : \
159 (n) == PSI_T_EQUALS ? "EQUALS" : \
160 (n) == PSI_T_TYPEDEF ? "TYPEDEF" : \
161 (n) == PSI_T_VOID ? "VOID" : \
162 (n) == PSI_T_LBRACKET ? "LBRACKET" : \
163 (n) == PSI_T_RBRACKET ? "RBRACKET" : \
164 (n) == PSI_T_COMMA ? "COMMA" : \
165 (n) == PSI_T_ELLIPSIS ? "ELLIPSIS" : \
166 (n) == PSI_T_DOUBLE ? "DOUBLE" : \
167 (n) == PSI_T_INT8 ? "INT8" : \
168 (n) == PSI_T_UINT8 ? "UINT8" : \
169 (n) == PSI_T_INT16 ? "INT16" : \
170 (n) == PSI_T_UINT16 ? "UINT16" : \
171 (n) == PSI_T_INT32 ? "INT32" : \
172 (n) == PSI_T_UINT32 ? "UINT32" : \
173 (n) == PSI_T_INT64 ? "INT64" : \
174 (n) == PSI_T_UINT64 ? "UINT64" : \
175 (n) == PSI_T_FUNCTION ? "FUNCTION" : \
176 (n) == PSI_T_NULL ? "NULL" : \
177 (n) == PSI_T_TRUE ? "TRUE" : \
178 (n) == PSI_T_FALSE ? "FALSE" : \
179 (n) == PSI_T_DOLLAR ? "DOLLAR" : \
180 (n) == PSI_T_CALLOC ? "CALLOC" : \
181 (n) == PSI_T_OBJVAL ? "OBJVAL" : \
182 (n) == PSI_T_ARRVAL ? "ARRVAL" : \
183 (n) == PSI_T_PATHVAL ? "PATHVAL" : \
184 (n) == PSI_T_STRLEN ? "STRLEN" : \
185 (n) == PSI_T_STRVAL ? "STRVAL" : \
186 (n) == PSI_T_FLOATVAL ? "FLOATVAL" : \
187 (n) == PSI_T_INTVAL ? "INTVAL" : \
188 (n) == PSI_T_BOOLVAL ? "BOOLVAL" : \
189 (n) == PSI_T_TO_OBJECT ? "TO_OBJECT" : \
190 (n) == PSI_T_TO_ARRAY ? "TO_ARRAY" : \
191 (n) == PSI_T_TO_STRING ? "TO_STRING" : \
192 (n) == PSI_T_TO_INT ? "TO_INT" : \
193 (n) == PSI_T_TO_FLOAT ? "TO_FLOAT" : \
194 (n) == PSI_T_TO_BOOL ? "TO_BOOL" : \
195 (n) == PSI_T_MIXED ? "MIXED" : \
196 (n) == PSI_T_ARRAY ? "ARRAY" : \
197 (n) == PSI_T_OBJECT ? "OBJECT" : \
198 (n) == PSI_T_AMPERSAND ? "AMPERSAND" : \
199 <UNKNOWN>
200
201 #define RETURN(t) do { \
202 P->num = t; \
203 if (P->flags & PSI_PARSER_DEBUG) { \
204 fprintf(stderr, "PSI> TOKEN: %d %.*s (EOF=%d %s:%u:%u)\n", \
205 P->num, (int) (P->cur-P->tok), P->tok, P->num == PSI_T_EOF, \
206 P->psi.file.fn, P->line, P->col); \
207 } \
208 return t; \
209 } while(1)
210
211 #define ADDCOLS \
212 P->col += P->cur - P->tok
213
214 #define NEWLINE \
215 P->col = 1; \
216 ++P->line; \
217 goto nextline
218
219 token_t PSI_ParserScan(PSI_Parser *P)
220 {
221 for (;;) {
222 ADDCOLS;
223 nextline:
224 P->tok = P->cur;
225 /*!re2c
226 re2c:indent:top = 2;
227 re2c:define:YYCTYPE = "unsigned char";
228 re2c:define:YYCURSOR = P->cur;
229 re2c:define:YYLIMIT = P->lim;
230 re2c:define:YYMARKER = P->mrk;
231 re2c:define:YYFILL = "{ if (!PSI_ParserFill(P,@@)) RETURN(PSI_T_EOF); }";
232 re2c:yyfill:parameter = 0;
233
234 B = [^a-zA-Z0-9_];
235 W = [a-zA-Z0-9_];
236 NAME = [a-zA-Z_]W*;
237 NSNAME = (NAME)? ("\\" NAME)+;
238 QUOTED_STRING = "\"" ([^\"])+ "\"";
239 NUMBER = [+-]? [0-9]* "."? [0-9]+ ([eE] [+-]? [0-9]+)?;
240
241 ("#"|"//") .* "\n" { NEWLINE; }
242 "(" {RETURN(PSI_T_LPAREN);}
243 ")" {RETURN(PSI_T_RPAREN);}
244 ";" {RETURN(PSI_T_EOS);}
245 "," {RETURN(PSI_T_COMMA);}
246 ":" {RETURN(PSI_T_COLON);}
247 "{" {RETURN(PSI_T_LBRACE);}
248 "}" {RETURN(PSI_T_RBRACE);}
249 "[" {RETURN(PSI_T_LBRACKET);}
250 "]" {RETURN(PSI_T_RBRACKET);}
251 "=" {RETURN(PSI_T_EQUALS);}
252 "$" {RETURN(PSI_T_DOLLAR);}
253 "*" {RETURN(PSI_T_ASTERISK);}
254 "&" {RETURN(PSI_T_AMPERSAND);}
255 "+" {RETURN(PSI_T_PLUS);}
256 "-" {RETURN(PSI_T_MINUS);}
257 "/" {RETURN(PSI_T_SLASH);}
258 "..." {RETURN(PSI_T_ELLIPSIS);}
259 [\r\n] { NEWLINE; }
260 [\t ]+ { continue; }
261 'TRUE' {RETURN(PSI_T_TRUE);}
262 'FALSE' {RETURN(PSI_T_FALSE);}
263 'NULL' {RETURN(PSI_T_NULL);}
264 'MIXED' {RETURN(PSI_T_MIXED);}
265 'VOID' {RETURN(PSI_T_VOID);}
266 'BOOL' {RETURN(PSI_T_BOOL);}
267 'CHAR' {RETURN(PSI_T_CHAR);}
268 'SHORT' {RETURN(PSI_T_SHORT);}
269 'INT' {RETURN(PSI_T_INT);}
270 'LONG' {RETURN(PSI_T_LONG);}
271 'FLOAT' {RETURN(PSI_T_FLOAT);}
272 'DOUBLE' {RETURN(PSI_T_DOUBLE);}
273 'INT8_T' {RETURN(PSI_T_INT8);}
274 'UINT8_T' {RETURN(PSI_T_UINT8);}
275 'INT16_T' {RETURN(PSI_T_INT16);}
276 'UINT16_T' {RETURN(PSI_T_UINT16);}
277 'INT32_T' {RETURN(PSI_T_INT32);}
278 'UINT32_T' {RETURN(PSI_T_UINT32);}
279 'INT64_T' {RETURN(PSI_T_INT64);}
280 'UINT64_T' {RETURN(PSI_T_UINT64);}
281 'UNSIGNED' {RETURN(PSI_T_UNSIGNED);}
282 'SIGNED' {RETURN(PSI_T_SIGNED);}
283 'STRING' {RETURN(PSI_T_STRING);}
284 'ARRAY' {RETURN(PSI_T_ARRAY);}
285 'OBJECT' {RETURN(PSI_T_OBJECT);}
286 'FUNCTION' {RETURN(PSI_T_FUNCTION);}
287 'TYPEDEF' {RETURN(PSI_T_TYPEDEF);}
288 'STRUCT' {RETURN(PSI_T_STRUCT);}
289 'CONST' {RETURN(PSI_T_CONST);}
290 'LIB' {RETURN(PSI_T_LIB);}
291 'LET' {RETURN(PSI_T_LET);}
292 'SET' {RETURN(PSI_T_SET);}
293 'RETURN' {RETURN(PSI_T_RETURN);}
294 'FREE' {RETURN(PSI_T_FREE);}
295 'TEMP' {RETURN(PSI_T_TEMP);}
296 'STRLEN' {RETURN(PSI_T_STRLEN);}
297 'STRVAL' {RETURN(PSI_T_STRVAL);}
298 'PATHVAL' {RETURN(PSI_T_PATHVAL);}
299 'INTVAL' {RETURN(PSI_T_INTVAL);}
300 'FLOATVAL' {RETURN(PSI_T_FLOATVAL);}
301 'BOOLVAL' {RETURN(PSI_T_BOOLVAL);}
302 'ARRVAL' {RETURN(PSI_T_ARRVAL);}
303 'OBJVAL' {RETURN(PSI_T_OBJVAL);}
304 'CALLOC' {RETURN(PSI_T_CALLOC);}
305 'TO_OBJECT' {RETURN(PSI_T_TO_OBJECT);}
306 'TO_ARRAY' {RETURN(PSI_T_TO_ARRAY);}
307 'TO_STRING' {RETURN(PSI_T_TO_STRING);}
308 'TO_INT' {RETURN(PSI_T_TO_INT);}
309 'TO_FLOAT' {RETURN(PSI_T_TO_FLOAT);}
310 'TO_BOOL' {RETURN(PSI_T_TO_BOOL);}
311 NUMBER {RETURN(PSI_T_NUMBER);}
312 NAME {RETURN(PSI_T_NAME);}
313 NSNAME {RETURN(PSI_T_NSNAME);}
314 QUOTED_STRING {RETURN(PSI_T_QUOTED_STRING);}
315 [^] {break;}
316 */
317 }
318 return -1;
319 }