f3beb2cf39f6a6b72676c214ec7b12e53ed80450
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include <ctype.h>
33
34 #include <Zend/zend_smart_str.h>
35
36 #include "token.h"
37 #include "parser.h"
38
39 #ifndef PSI_DEBUG_TOKEN_ALLOC
40 # define PSI_DEBUG_TOKEN_ALLOC 0
41 #endif
42
43 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
44 size_t token_len, unsigned col, unsigned line, zend_string *file)
45 {
46 struct psi_token *T;
47
48 T = pecalloc(1, sizeof(*T), 1);
49 T->type = token_typ;
50 T->col = col;
51 T->line = line;
52 T->file = zend_string_copy(file);
53 T->text = psi_string_init_interned(token_txt, token_len, 1);
54 #if PSI_DEBUG_TOKEN_ALLOC
55 fprintf(stderr, "PSI: token_init %p\t", T);
56 psi_token_dump(NULL, T);
57 #endif
58 return T;
59 }
60
61 void psi_token_free(struct psi_token **token_ptr) {
62 if (*token_ptr) {
63 struct psi_token *token = *token_ptr;
64 #if PSI_DEBUG_TOKEN_ALLOC
65 fprintf(stderr, "PSI: token_free %p\t", token);
66 psi_token_dump(NULL, token);
67 #endif
68 *token_ptr = NULL;
69 zend_string_release(token->text);
70 zend_string_release(token->file);
71 free(token);
72 }
73 }
74
75 struct psi_token *psi_token_copy(struct psi_token *src) {
76 struct psi_token *ptr = pemalloc(sizeof(*ptr), 1);
77
78 *ptr = *src;
79 #if PSI_DEBUG_TOKEN_ALLOC
80 fprintf(stderr, "PSI: token_copy %p\t", ptr);
81 psi_token_dump(NULL, src);
82 #endif
83 ptr->text = zend_string_copy(ptr->text);
84 ptr->file = zend_string_copy(ptr->file);
85
86 return ptr;
87 }
88
89 void psi_token_copy_ctor(struct psi_token **tok) {
90 *tok = psi_token_copy(*tok);
91 }
92
93 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
94 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
95 va_list argv;
96 unsigned i;
97 size_t sep_len = sep ? strlen(sep) : 0;
98 struct psi_token *T = pemalloc(sizeof(*T), 1);
99 smart_str text = {0};
100
101 va_start(argv, argc);
102
103 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
104 T->type = PSI_T_NAME;
105 T->file = zend_string_copy(T->file);
106
107 smart_str_append_ex(&text, T->text, 1);
108
109 for (i = 1; i < argc; ++i) {
110 struct psi_token *arg = va_arg(argv, struct psi_token *);
111
112 smart_str_appendl_ex(&text, sep, sep_len, 1);
113 smart_str_append_ex(&text, arg->text, 1);
114 }
115 va_end(argv);
116
117 T->text = smart_str_extract(&text);
118
119 #if PSI_DEBUG_TOKEN_ALLOC
120 fprintf(stderr, "PSI: token_cat %p\n", T);
121 #endif
122 return T;
123 }
124
125 /* append `argc` number of C strings separated by `sep` to token `T` */
126 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
127 va_list argv;
128 unsigned i;
129 size_t sep_len = sep ? strlen(sep) : 0;
130 smart_str text = {0};
131
132 smart_str_append_ex(&text, T->text, 1);
133
134 va_start(argv, argc);
135 for (i = 0; i < argc; ++i) {
136 char *str = va_arg(argv, char *);
137 size_t str_len = strlen(str);
138
139 if (sep_len && text.a) {
140 smart_str_appendl_ex(&text, sep, sep_len, 1);
141 }
142
143 smart_str_appendl_ex(&text, str, str_len, 1);
144 }
145 va_end(argv);
146
147 zend_string_release(T->text);
148 T->text = smart_str_extract(&text);
149
150 return T;
151 }
152
153 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
154 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
155 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
156
157 zend_string_release(T->text);
158 T->text = tmp;
159
160 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
161 zend_string_forget_hash_val(T->text);
162
163 return T;
164 }
165
166 static inline uint64_t psi_hash(char *digest_buf, ...)
167 {
168 uint64_t hash = 5381;
169 uint8_t c;
170 const uint8_t *ptr;
171 va_list argv;
172
173 va_start(argv, digest_buf);
174 while ((ptr = va_arg(argv, const uint8_t *))) {
175 while ((c = *ptr++)) {
176 hash = ((hash << 5) + hash) + c;
177 }
178 }
179 va_end(argv);
180
181 if (digest_buf) {
182 sprintf(digest_buf, "%" PRIx64, hash);
183 }
184
185 return hash;
186 }
187
188 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
189 char loc_buf[48];
190
191 sprintf(loc_buf, "%u%u", t->line, t->col);
192 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
193 }
194
195 void psi_token_dump(struct psi_dump *dump, struct psi_token *t)
196 {
197 size_t i;
198
199 PSI_DUMP(dump, "TOKEN %p (%u) ", t, t->type);
200 if (t->type == PSI_T_EOF) {
201 PSI_DUMP(dump, "EOF");
202 } else {
203 PSI_DUMP(dump, "\"");
204 for (i = 0; i < t->text->len; ++i) {
205 switch (t->text->val[i]) {
206 case '\0':
207 PSI_DUMP(dump, "\\0");
208 break;
209 case '\a':
210 PSI_DUMP(dump, "\\a");
211 break;
212 case '\b':
213 PSI_DUMP(dump, "\\b");
214 break;
215 case '\f':
216 PSI_DUMP(dump, "\\f");
217 break;
218 case '\n':
219 PSI_DUMP(dump, "\\n");
220 break;
221 case '\r':
222 PSI_DUMP(dump, "\\r");
223 break;
224 case '\t':
225 PSI_DUMP(dump, "\\t");
226 break;
227 case '\v':
228 PSI_DUMP(dump, "\\v");
229 break;
230 case '"':
231 PSI_DUMP(dump, "\\\"");
232 break;
233 default:
234 if (isprint(t->text->val[i])) {
235 PSI_DUMP(dump, "%c", t->text->val[i]);
236 } else {
237 PSI_DUMP(dump, "\\x%02hhX", t->text->val[i]);
238 }
239 break;
240 }
241 }
242 PSI_DUMP(dump, "\"");
243 }
244 PSI_DUMP(dump, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
245 }