flush
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #else
29 # include "php_config.h"
30 #endif
31
32 #include <ctype.h>
33
34 #include <Zend/zend_smart_str.h>
35
36 #include "token.h"
37 #include "parser.h"
38
39 #ifndef PSI_DEBUG_TOKEN_ALLOC
40 # define PSI_DEBUG_TOKEN_ALLOC 0
41 #endif
42
43 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
44 size_t token_len, unsigned col, unsigned line, zend_string *file)
45 {
46 struct psi_token *T;
47
48 T = pecalloc(1, sizeof(*T), 1);
49 T->type = token_typ;
50 T->col = col;
51 T->line = line;
52 T->file = zend_string_copy(file);
53 T->text = psi_string_init_interned(token_txt, token_len, 1);
54 #if PSI_DEBUG_TOKEN_ALLOC
55 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_init %p\n", T);
56 #endif
57 return T;
58 }
59
60 void psi_token_free(struct psi_token **token_ptr) {
61 if (*token_ptr) {
62 struct psi_token *token = *token_ptr;
63 #if PSI_DEBUG_TOKEN_ALLOC
64 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_free %p\n", token);
65 #endif
66 *token_ptr = NULL;
67 zend_string_release(token->text);
68 zend_string_release(token->file);
69 free(token);
70 }
71 }
72
73 struct psi_token *psi_token_copy(struct psi_token *src) {
74 struct psi_token *ptr = pemalloc(sizeof(*ptr), 1);
75
76 *ptr = *src;
77 #if PSI_DEBUG_TOKEN_ALLOC
78 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_copy %p <= %p\n", ptr, src);
79 #endif
80 ptr->text = zend_string_copy(ptr->text);
81 ptr->file = zend_string_copy(ptr->file);
82
83 return ptr;
84 }
85
86 void psi_token_copy_ctor(struct psi_token **tok) {
87 *tok = psi_token_copy(*tok);
88 }
89
90 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
91 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
92 va_list argv;
93 unsigned i;
94 size_t sep_len = sep ? strlen(sep) : 0;
95 struct psi_token *T = pemalloc(sizeof(*T), 1);
96 smart_str text = {0};
97
98 va_start(argv, argc);
99
100 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
101 T->type = PSI_T_NAME;
102 T->file = zend_string_copy(T->file);
103
104 smart_str_append_ex(&text, T->text, 1);
105
106 for (i = 1; i < argc; ++i) {
107 struct psi_token *arg = va_arg(argv, struct psi_token *);
108
109 smart_str_appendl_ex(&text, sep, sep_len, 1);
110 smart_str_append_ex(&text, arg->text, 1);
111 }
112 va_end(argv);
113
114 T->text = smart_str_extract(&text);
115
116 #if PSI_DEBUG_TOKEN_ALLOC
117 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_cat %p\n", T);
118 #endif
119 return T;
120 }
121
122 /* append `argc` number of C strings separated by `sep` to token `T` */
123 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
124 va_list argv;
125 unsigned i;
126 size_t sep_len = sep ? strlen(sep) : 0;
127 smart_str text = {0};
128
129 smart_str_append_ex(&text, T->text, 1);
130
131 va_start(argv, argc);
132 for (i = 0; i < argc; ++i) {
133 char *str = va_arg(argv, char *);
134 size_t str_len = strlen(str);
135
136 if (sep_len && text.a) {
137 smart_str_appendl_ex(&text, sep, sep_len, 1);
138 }
139
140 smart_str_appendl_ex(&text, str, str_len, 1);
141 }
142 va_end(argv);
143
144 zend_string_release(T->text);
145 T->text = smart_str_extract(&text);
146
147 return T;
148 }
149
150 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
151 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
152 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
153
154 zend_string_release(T->text);
155 T->text = tmp;
156
157 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
158 zend_string_forget_hash_val(T->text);
159
160 return T;
161 }
162
163 static inline uint64_t psi_hash(char *digest_buf, ...)
164 {
165 uint64_t hash = 5381;
166 uint8_t c;
167 const uint8_t *ptr;
168 va_list argv;
169
170 va_start(argv, digest_buf);
171 while ((ptr = va_arg(argv, const uint8_t *))) {
172 while ((c = *ptr++)) {
173 hash = ((hash << 5) + hash) + c;
174 }
175 }
176 va_end(argv);
177
178 if (digest_buf) {
179 sprintf(digest_buf, "%" PRIx64, hash);
180 }
181
182 return hash;
183 }
184
185 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
186 char loc_buf[48];
187
188 sprintf(loc_buf, "%u%u", t->line, t->col);
189 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
190 }
191
192 void psi_token_dump(struct psi_dump *dump, struct psi_token *t)
193 {
194 size_t i;
195
196 PSI_DUMP(dump, "TOKEN %p (%u) ", t, t->type);
197 if (t->type == PSI_T_EOF) {
198 PSI_DUMP(dump, "EOF");
199 } else {
200 PSI_DUMP(dump, "\"");
201 for (i = 0; i < t->text->len; ++i) {
202 switch (t->text->val[i]) {
203 case '\0':
204 PSI_DUMP(dump, "\\0");
205 break;
206 case '\a':
207 PSI_DUMP(dump, "\\a");
208 break;
209 case '\b':
210 PSI_DUMP(dump, "\\b");
211 break;
212 case '\f':
213 PSI_DUMP(dump, "\\f");
214 break;
215 case '\n':
216 PSI_DUMP(dump, "\\n");
217 break;
218 case '\r':
219 PSI_DUMP(dump, "\\r");
220 break;
221 case '\t':
222 PSI_DUMP(dump, "\\t");
223 break;
224 case '\v':
225 PSI_DUMP(dump, "\\v");
226 break;
227 case '"':
228 PSI_DUMP(dump, "\\\"");
229 break;
230 default:
231 if (isprint(t->text->val[i])) {
232 PSI_DUMP(dump, "%c", t->text->val[i]);
233 } else {
234 PSI_DUMP(dump, "\\x%02hhX", t->text->val[i]);
235 }
236 break;
237 }
238 }
239 PSI_DUMP(dump, "\"");
240 }
241 PSI_DUMP(dump, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
242 }