new test
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include <ctype.h>
33
34 #include <Zend/zend_smart_str.h>
35
36 #include "token.h"
37 #include "parser.h"
38
39 #ifndef PSI_DEBUG_TOKEN_ALLOC
40 # define PSI_DEBUG_TOKEN_ALLOC 0
41 #endif
42
43 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
44 size_t token_len, unsigned col, unsigned line, zend_string *file)
45 {
46 struct psi_token *T;
47
48 T = pecalloc(1, sizeof(*T), 1);
49 T->type = token_typ;
50 T->col = col;
51 T->line = line;
52 T->file = zend_string_copy(file);
53 T->text = psi_string_init_interned(token_txt, token_len, 1);
54 #if PSI_DEBUG_TOKEN_ALLOC
55 fprintf(stderr, "PSI: token_init %p\t", T);
56 psi_token_dump(NULL, T);
57 #endif
58 return T;
59 }
60
61 void psi_token_free(struct psi_token **token_ptr) {
62 if (*token_ptr) {
63 struct psi_token *token = *token_ptr;
64 #if PSI_DEBUG_TOKEN_ALLOC
65 fprintf(stderr, "PSI: token_free %p\t", token);
66 psi_token_dump(NULL, token);
67 #endif
68 *token_ptr = NULL;
69 zend_string_release(token->text);
70 zend_string_release(token->file);
71 free(token);
72 }
73 }
74
75 struct psi_token *psi_token_copy(struct psi_token *src) {
76 struct psi_token *ptr = pemalloc(sizeof(*ptr), 1);
77
78 *ptr = *src;
79 #if PSI_DEBUG_TOKEN_ALLOC
80 fprintf(stderr, "PSI: token_copy %p\t", ptr);
81 psi_token_dump(NULL, src);
82 #endif
83 ptr->text = zend_string_copy(ptr->text);
84 ptr->file = zend_string_copy(ptr->file);
85
86 return ptr;
87 }
88
89 void psi_token_copy_ctor(struct psi_token **tok) {
90 *tok = psi_token_copy(*tok);
91 }
92
93 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
94 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
95 va_list argv;
96 unsigned i;
97 size_t sep_len = sep ? strlen(sep) : 0;
98 struct psi_token *T = pemalloc(sizeof(*T), 1);
99 smart_str text = {0};
100
101 va_start(argv, argc);
102
103 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
104 T->type = PSI_T_NAME;
105 T->file = zend_string_copy(T->file);
106
107 smart_str_append_ex(&text, T->text, 1);
108
109 for (i = 1; i < argc; ++i) {
110 struct psi_token *arg = va_arg(argv, struct psi_token *);
111
112 smart_str_appendl_ex(&text, sep, sep_len, 1);
113 smart_str_append_ex(&text, arg->text, 1);
114 }
115 va_end(argv);
116
117 T->text = smart_str_extract(&text);
118
119 #if PSI_DEBUG_TOKEN_ALLOC
120 fprintf(stderr, "PSI: token_cat %p\n", T);
121 #endif
122 return T;
123 }
124
125 /* append `argc` number of C strings separated by `sep` to token `T` */
126 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
127 va_list argv;
128 unsigned i;
129 size_t sep_len = sep ? strlen(sep) : 0;
130 smart_str text = {0};
131
132 smart_str_append_ex(&text, T->text, 1);
133
134 va_start(argv, argc);
135 for (i = 0; i < argc; ++i) {
136 char *str = va_arg(argv, char *);
137 size_t str_len = strlen(str);
138
139 if (sep_len && text.a) {
140 smart_str_appendl_ex(&text, sep, sep_len, 1);
141 }
142
143 smart_str_appendl_ex(&text, str, str_len, 1);
144 }
145 va_end(argv);
146
147 zend_string_release(T->text);
148 T->text = smart_str_extract(&text);
149
150 return T;
151 }
152
153 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
154 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
155 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
156
157 zend_string_release(T->text);
158 T->text = tmp;
159
160 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
161 zend_string_forget_hash_val(T->text);
162
163 return T;
164 }
165
166 static inline uint64_t psi_hash(char *digest_buf, ...)
167 {
168 uint64_t hash = 5381;
169 uint8_t c;
170 const uint8_t *ptr;
171 va_list argv;
172
173 va_start(argv, digest_buf);
174 while ((ptr = va_arg(argv, const uint8_t *))) {
175 while ((c = *ptr++)) {
176 hash = ((hash << 5) + hash) + c;
177 }
178 }
179 va_end(argv);
180
181 if (digest_buf) {
182 sprintf(digest_buf, "%" PRIx64, hash);
183 }
184
185 return hash;
186 }
187
188 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
189 char loc_buf[48];
190
191 sprintf(loc_buf, "%u%u", t->line, t->col);
192 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
193 }
194
195 void psi_token_dump(struct psi_dump *dump, struct psi_token *t)
196 {
197 size_t i;
198
199 if (!t) {
200 PSI_DUMP(dump, "TOKEN deleted\n");
201 return;
202 }
203
204 PSI_DUMP(dump, "TOKEN %p (%u) ", t, t->type);
205 if (t->type == PSI_T_EOF) {
206 PSI_DUMP(dump, "EOF");
207 } else {
208 PSI_DUMP(dump, "\"");
209 for (i = 0; i < t->text->len; ++i) {
210 switch (t->text->val[i]) {
211 case '\0':
212 PSI_DUMP(dump, "\\0");
213 break;
214 case '\a':
215 PSI_DUMP(dump, "\\a");
216 break;
217 case '\b':
218 PSI_DUMP(dump, "\\b");
219 break;
220 case '\f':
221 PSI_DUMP(dump, "\\f");
222 break;
223 case '\n':
224 PSI_DUMP(dump, "\\n");
225 break;
226 case '\r':
227 PSI_DUMP(dump, "\\r");
228 break;
229 case '\t':
230 PSI_DUMP(dump, "\\t");
231 break;
232 case '\v':
233 PSI_DUMP(dump, "\\v");
234 break;
235 case '"':
236 PSI_DUMP(dump, "\\\"");
237 break;
238 default:
239 if (isprint(t->text->val[i])) {
240 PSI_DUMP(dump, "%c", t->text->val[i]);
241 } else {
242 PSI_DUMP(dump, "\\x%02hhX", t->text->val[i]);
243 }
244 break;
245 }
246 }
247 PSI_DUMP(dump, "\"");
248 }
249 PSI_DUMP(dump, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
250 }