cpp: token stringification and pasting
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <ctype.h>
29
30 #include "token.h"
31 #include "parser.h"
32
33 size_t psi_token_alloc_size(size_t token_len, size_t fname_len) {
34 return sizeof(struct psi_token) + token_len + fname_len + 2;
35 }
36
37 struct psi_token *psi_token_alloc(struct psi_parser *P) {
38 struct psi_token *T;
39 size_t token_len, fname_len;
40 token_t token_typ;
41
42 if (P->cur < P->tok) {
43 return NULL;
44 }
45
46 token_typ = P->num;
47 token_len = P->cur - P->tok;
48 fname_len = strlen(P->file.fn);
49
50 T = calloc(1, psi_token_alloc_size(token_len, fname_len));
51 T->type = token_typ;
52 T->size = token_len;
53 T->text = &T->buf[0];
54 T->file = &T->buf[token_len + 1];
55 T->line = P->line;
56 T->col = P->col;
57
58 memcpy(T->text, P->tok, token_len);
59 memcpy(T->file, P->file.fn, fname_len);
60
61 return T;
62 }
63
64 void psi_token_free(struct psi_token **token_ptr) {
65 if (*token_ptr) {
66 struct psi_token *token = *token_ptr;
67
68 *token_ptr = NULL;
69 free(token);
70 }
71 }
72
73 struct psi_token *psi_token_copy(struct psi_token *src) {
74 size_t strct_len = psi_token_alloc_size(src->size, strlen(src->file));
75 struct psi_token *ptr = malloc(strct_len);
76
77 memcpy(ptr, src, strct_len);
78
79 ptr->text = &ptr->buf[0];
80 ptr->file = &ptr->buf[ptr->size + 1];
81
82 return ptr;
83 }
84
85 void psi_token_copy_ctor(struct psi_token **tok) {
86 *tok = psi_token_copy(*tok);
87 }
88
89 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
90 va_list argv;
91 unsigned i;
92 size_t sep_len = sep ? strlen(sep) : 0;
93 struct psi_token *T = NULL;
94
95 va_start(argv, argc);
96 for (i = 0; i < argc; ++i) {
97 struct psi_token *arg = va_arg(argv, struct psi_token *);
98
99 if (T) {
100 size_t token_len = T->size, fname_len = strlen(T->file);
101 struct psi_token *tmp = realloc(T, psi_token_alloc_size(T->size += arg->size + sep_len, fname_len));
102
103 if (tmp) {
104 T = tmp;
105 } else {
106 free(T);
107 va_end(argv);
108 return NULL;
109 }
110
111 T->text = &T->buf[0];
112 T->file = &T->buf[T->size + 1];
113 memmove(&T->buf[T->size + 1], &T->buf[token_len + sep_len], fname_len + 1);
114 memcpy(&T->buf[token_len], sep, sep_len);
115 memcpy(&T->buf[token_len + sep_len], arg->text, arg->size + 1);
116 } else {
117 T = psi_token_copy(arg);
118 T->type = PSI_T_NAME;
119 }
120 }
121 va_end(argv);
122
123 return T;
124 }
125
126 struct psi_token *psi_token_prepend(const char *sep, struct psi_token *T, unsigned argc, ...) {
127 va_list argv;
128 unsigned i;
129 size_t sep_len = sep ? strlen(sep) : 0;
130
131 va_start(argv, argc);
132 for (i = 0; i < argc; ++i) {
133 char *str = va_arg(argv, char *);
134 size_t str_len = strlen(str), token_len = T->size, fname_len = strlen(T->file);
135
136 T = realloc(T, psi_token_alloc_size(T->size += str_len + sep_len, fname_len));
137 T->text = &T->buf[0];
138 T->file = &T->buf[T->size + 1];
139 memmove(&T->buf[str_len + sep_len], &T->buf[0], T->size + 1 + fname_len + 1);
140 memcpy(&T->buf[0], str, str_len);
141 memcpy(&T->buf[str_len], sep, sep_len);
142 T->buf[T->size] = '\0';
143 }
144 va_end(argv);
145
146 return T;
147 }
148 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
149 va_list argv;
150 unsigned i;
151 size_t sep_len = sep ? strlen(sep) : 0;
152
153 va_start(argv, argc);
154 for (i = 0; i < argc; ++i) {
155 char *str = va_arg(argv, char *);
156 size_t str_len = strlen(str), token_len = T->size, fname_len = strlen(T->file);
157
158 T = realloc(T, psi_token_alloc_size(T->size += str_len + sep_len, fname_len));
159 T->text = &T->buf[0];
160 T->file = &T->buf[T->size + 1];
161 memmove(&T->buf[T->size + 1], &T->buf[token_len + sep_len], fname_len + 1);
162 memcpy(&T->buf[token_len], sep, sep_len);
163 memcpy(&T->buf[token_len + sep_len], str, str_len + 1);
164 }
165 va_end(argv);
166
167 return T;
168 }
169
170 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
171 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
172 php_strtr(T->text, T->size, from, to, MIN(strlen(from), strlen(to)));
173 return T;
174 }
175
176 static inline uint64_t psi_hash(char *digest_buf, ...)
177 {
178 uint64_t hash = 5381;
179 uint8_t c;
180 const uint8_t *ptr;
181 va_list argv;
182
183 va_start(argv, digest_buf);
184 while ((ptr = va_arg(argv, const uint8_t *))) {
185 while ((c = *ptr++)) {
186 hash = ((hash << 5) + hash) + c;
187 }
188 }
189 va_end(argv);
190
191 if (digest_buf) {
192 sprintf(digest_buf, "%" PRIx64, hash);
193 }
194
195 return hash;
196 }
197
198 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
199 char loc_buf[48];
200
201 sprintf(loc_buf, "%u%u", t->line, t->col);
202 return psi_hash(digest_buf, t->file, loc_buf, (char *) NULL);
203 }
204
205 void psi_token_dump(int fd, struct psi_token *t)
206 {
207 size_t i;
208
209 dprintf(fd, "TOKEN %p (%d) \"", t, t->type);
210 for (i = 0; i < MIN(t->size, 16); ++i) {
211 switch (t->text[i]) {
212 case '\0':
213 dprintf(fd, "\\0");
214 break;
215 case '\a':
216 dprintf(fd, "\\a");
217 break;
218 case '\b':
219 dprintf(fd, "\\b");
220 break;
221 case '\f':
222 dprintf(fd, "\\f");
223 break;
224 case '\n':
225 dprintf(fd, "\\n");
226 break;
227 case '\r':
228 dprintf(fd, "\\r");
229 break;
230 case '\t':
231 dprintf(fd, "\\t");
232 break;
233 case '\v':
234 dprintf(fd, "\\v");
235 break;
236 case '"':
237 dprintf(fd, "\\\"");
238 break;
239 default:
240 if (isprint(t->text[i])) {
241 dprintf(fd, "%c", t->text[i]);
242 } else {
243 dprintf(fd, "\\%03hho", t->text[i]);
244 }
245 break;
246 }
247 }
248 dprintf(fd, "\" at col %u in %s on line %u\n", t->col, t->file, t->line);
249 }