ffi: fix buffer overrun when padding struct storage
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <ctype.h>
29
30 #include "token.h"
31 #include "parser.h"
32
33 size_t psi_token_alloc_size(size_t token_len, size_t fname_len) {
34 return sizeof(struct psi_token) + token_len + fname_len + 2;
35 }
36
37 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
38 size_t token_len, unsigned col, unsigned line, const char *file)
39 {
40 struct psi_token *T;
41 size_t file_len = strlen(file);
42
43 T = calloc(1, psi_token_alloc_size(token_len, file_len));
44 T->type = token_typ;
45 T->size = token_len;
46 T->text = &T->buf[0];
47 T->file = &T->buf[token_len + 1];
48 T->line = line;
49 T->col = col;
50
51 memcpy(T->text, token_txt, token_len);
52 memcpy(T->file, file, file_len);
53
54 return T;
55 }
56
57 void psi_token_free(struct psi_token **token_ptr) {
58 if (*token_ptr) {
59 struct psi_token *token = *token_ptr;
60
61 *token_ptr = NULL;
62 free(token);
63 }
64 }
65
66 struct psi_token *psi_token_copy(struct psi_token *src) {
67 size_t strct_len = psi_token_alloc_size(src->size, strlen(src->file));
68 struct psi_token *ptr = malloc(strct_len);
69
70 memcpy(ptr, src, strct_len);
71
72 ptr->text = &ptr->buf[0];
73 ptr->file = &ptr->buf[ptr->size + 1];
74
75 return ptr;
76 }
77
78 void psi_token_copy_ctor(struct psi_token **tok) {
79 *tok = psi_token_copy(*tok);
80 }
81
82 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
83 va_list argv;
84 unsigned i;
85 size_t sep_len = sep ? strlen(sep) : 0;
86 struct psi_token *T = NULL;
87
88 va_start(argv, argc);
89 for (i = 0; i < argc; ++i) {
90 struct psi_token *arg = va_arg(argv, struct psi_token *);
91
92 if (T) {
93 size_t token_len = T->size, fname_len = strlen(T->file);
94 struct psi_token *tmp = realloc(T, psi_token_alloc_size(T->size += arg->size + sep_len, fname_len));
95
96 if (tmp) {
97 T = tmp;
98 } else {
99 free(T);
100 va_end(argv);
101 return NULL;
102 }
103
104 T->text = &T->buf[0];
105 T->file = &T->buf[T->size + 1];
106 memmove(&T->buf[T->size + 1], &T->buf[token_len + 1], fname_len + 1);
107 memcpy(&T->buf[token_len], sep, sep_len);
108 memcpy(&T->buf[token_len + sep_len], arg->text, arg->size + 1);
109 } else {
110 T = psi_token_copy(arg);
111 T->type = PSI_T_NAME;
112 }
113 }
114 va_end(argv);
115
116 return T;
117 }
118
119 struct psi_token *psi_token_prepend(const char *sep, struct psi_token *T, unsigned argc, ...) {
120 va_list argv;
121 unsigned i;
122 size_t sep_len = sep ? strlen(sep) : 0;
123
124 va_start(argv, argc);
125 for (i = 0; i < argc; ++i) {
126 char *str = va_arg(argv, char *);
127 size_t str_len = strlen(str), token_len = T->size, fname_len = strlen(T->file);
128
129 T = realloc(T, psi_token_alloc_size(T->size += str_len + sep_len, fname_len));
130 T->text = &T->buf[0];
131 T->file = &T->buf[T->size + 1];
132 memmove(&T->buf[str_len + sep_len], &T->buf[0], token_len + 1 + fname_len + 1);
133 memcpy(&T->buf[0], str, str_len);
134 memcpy(&T->buf[str_len], sep, sep_len);
135 T->buf[T->size] = '\0';
136 }
137 va_end(argv);
138
139 return T;
140 }
141 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
142 va_list argv;
143 unsigned i;
144 size_t sep_len = sep ? strlen(sep) : 0;
145
146 va_start(argv, argc);
147 for (i = 0; i < argc; ++i) {
148 char *str = va_arg(argv, char *);
149 size_t str_len = strlen(str), token_len = T->size, fname_len = strlen(T->file);
150
151 T = realloc(T, psi_token_alloc_size(T->size += str_len + sep_len, fname_len));
152 T->text = &T->buf[0];
153 T->file = &T->buf[T->size + 1];
154 memmove(&T->buf[T->size + 1], &T->buf[token_len + 1], fname_len + 1);
155 memcpy(&T->buf[token_len], sep, sep_len);
156 memcpy(&T->buf[token_len + sep_len], str, str_len + 1);
157 }
158 va_end(argv);
159
160 return T;
161 }
162
163 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
164 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
165 php_strtr(T->text, T->size, from, to, MIN(strlen(from), strlen(to)));
166 return T;
167 }
168
169 static inline uint64_t psi_hash(char *digest_buf, ...)
170 {
171 uint64_t hash = 5381;
172 uint8_t c;
173 const uint8_t *ptr;
174 va_list argv;
175
176 va_start(argv, digest_buf);
177 while ((ptr = va_arg(argv, const uint8_t *))) {
178 while ((c = *ptr++)) {
179 hash = ((hash << 5) + hash) + c;
180 }
181 }
182 va_end(argv);
183
184 if (digest_buf) {
185 sprintf(digest_buf, "%" PRIx64, hash);
186 }
187
188 return hash;
189 }
190
191 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
192 char loc_buf[48];
193
194 sprintf(loc_buf, "%u%u", t->line, t->col);
195 return psi_hash(digest_buf, t->file, loc_buf, (char *) NULL);
196 }
197
198 void psi_token_dump(int fd, struct psi_token *t)
199 {
200 size_t i;
201
202 dprintf(fd, "TOKEN %p (%d) ", t, t->type);
203 if (t->type == PSI_T_EOF) {
204 dprintf(fd, "EOF");
205 } else {
206 dprintf(fd, "\"");
207 for (i = 0; i < t->size; ++i) {
208 switch (t->text[i]) {
209 case '\0':
210 dprintf(fd, "\\0");
211 break;
212 case '\a':
213 dprintf(fd, "\\a");
214 break;
215 case '\b':
216 dprintf(fd, "\\b");
217 break;
218 case '\f':
219 dprintf(fd, "\\f");
220 break;
221 case '\n':
222 dprintf(fd, "\\n");
223 break;
224 case '\r':
225 dprintf(fd, "\\r");
226 break;
227 case '\t':
228 dprintf(fd, "\\t");
229 break;
230 case '\v':
231 dprintf(fd, "\\v");
232 break;
233 case '"':
234 dprintf(fd, "\\\"");
235 break;
236 default:
237 if (isprint(t->text[i])) {
238 dprintf(fd, "%c", t->text[i]);
239 } else {
240 dprintf(fd, "\\x%02hhX", t->text[i]);
241 }
242 break;
243 }
244 }
245 dprintf(fd, "\"");
246 }
247 dprintf(fd, " at col %u in %s on line %u\n", t->col, t->file, t->line);
248 }