832b040a08665db86a534f600e90693a1a979b71
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <ctype.h>
29
30 #include <Zend/zend_smart_str.h>
31
32 #include "token.h"
33 #include "parser.h"
34
35 #ifndef PSI_DEBUG_TOKEN_ALLOC
36 # define PSI_DEBUG_TOKEN_ALLOC 0
37 #endif
38
39 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
40 size_t token_len, unsigned col, unsigned line, zend_string *file)
41 {
42 struct psi_token *T;
43
44 T = pecalloc(1, sizeof(*T), 1);
45 T->type = token_typ;
46 T->col = col;
47 T->line = line;
48 T->file = zend_string_copy(file);
49 T->text = psi_string_init_interned(token_txt, token_len, 1);
50 #if PSI_DEBUG_TOKEN_ALLOC
51 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_init %p\n", T);
52 #endif
53 return T;
54 }
55
56 void psi_token_free(struct psi_token **token_ptr) {
57 if (*token_ptr) {
58 struct psi_token *token = *token_ptr;
59 #if PSI_DEBUG_TOKEN_ALLOC
60 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_free %p\n", token);
61 #endif
62 *token_ptr = NULL;
63 zend_string_release(token->text);
64 zend_string_release(token->file);
65 free(token);
66 }
67 }
68
69 struct psi_token *psi_token_copy(struct psi_token *src) {
70 struct psi_token *ptr = pemalloc(sizeof(*ptr), 1);
71
72 *ptr = *src;
73 #if PSI_DEBUG_TOKEN_ALLOC
74 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_copy %p <= %p\n", ptr, src);
75 #endif
76 ptr->text = zend_string_copy(ptr->text);
77 ptr->file = zend_string_copy(ptr->file);
78
79 return ptr;
80 }
81
82 void psi_token_copy_ctor(struct psi_token **tok) {
83 *tok = psi_token_copy(*tok);
84 }
85
86 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
87 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
88 va_list argv;
89 unsigned i;
90 size_t sep_len = sep ? strlen(sep) : 0;
91 struct psi_token *T = pemalloc(sizeof(*T), 1);
92 smart_str text = {0};
93
94 va_start(argv, argc);
95
96 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
97 T->type = PSI_T_NAME;
98 T->file = zend_string_copy(T->file);
99
100 smart_str_append_ex(&text, T->text, 1);
101
102 for (i = 1; i < argc; ++i) {
103 struct psi_token *arg = va_arg(argv, struct psi_token *);
104
105 smart_str_appendl_ex(&text, sep, sep_len, 1);
106 smart_str_append_ex(&text, arg->text, 1);
107 }
108 va_end(argv);
109
110 T->text = smart_str_extract(&text);
111
112 #if PSI_DEBUG_TOKEN_ALLOC
113 PSI_DEBUG_PRINT(cpp->parser, "PSI: token_cat %p\n", T);
114 #endif
115 return T;
116 }
117
118 /* append `argc` number of C strings separated by `sep` to token `T` */
119 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
120 va_list argv;
121 unsigned i;
122 size_t sep_len = sep ? strlen(sep) : 0;
123 smart_str text = {0};
124
125 smart_str_append_ex(&text, T->text, 1);
126
127 va_start(argv, argc);
128 for (i = 0; i < argc; ++i) {
129 char *str = va_arg(argv, char *);
130 size_t str_len = strlen(str);
131
132 if (sep_len && text.a) {
133 smart_str_appendl_ex(&text, sep, sep_len, 1);
134 }
135
136 smart_str_appendl_ex(&text, str, str_len, 1);
137 }
138 va_end(argv);
139
140 zend_string_release(T->text);
141 T->text = smart_str_extract(&text);
142
143 return T;
144 }
145
146 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
147 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
148 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
149
150 zend_string_release(T->text);
151 T->text = tmp;
152
153 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
154 zend_string_forget_hash_val(T->text);
155
156 return T;
157 }
158
159 static inline uint64_t psi_hash(char *digest_buf, ...)
160 {
161 uint64_t hash = 5381;
162 uint8_t c;
163 const uint8_t *ptr;
164 va_list argv;
165
166 va_start(argv, digest_buf);
167 while ((ptr = va_arg(argv, const uint8_t *))) {
168 while ((c = *ptr++)) {
169 hash = ((hash << 5) + hash) + c;
170 }
171 }
172 va_end(argv);
173
174 if (digest_buf) {
175 sprintf(digest_buf, "%" PRIx64, hash);
176 }
177
178 return hash;
179 }
180
181 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
182 char loc_buf[48];
183
184 sprintf(loc_buf, "%u%u", t->line, t->col);
185 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
186 }
187
188 void psi_token_dump(struct psi_dump *dump, struct psi_token *t)
189 {
190 size_t i;
191
192 PSI_DUMP(dump, "TOKEN %p (%u) ", t, t->type);
193 if (t->type == PSI_T_EOF) {
194 PSI_DUMP(dump, "EOF");
195 } else {
196 PSI_DUMP(dump, "\"");
197 for (i = 0; i < t->text->len; ++i) {
198 switch (t->text->val[i]) {
199 case '\0':
200 PSI_DUMP(dump, "\\0");
201 break;
202 case '\a':
203 PSI_DUMP(dump, "\\a");
204 break;
205 case '\b':
206 PSI_DUMP(dump, "\\b");
207 break;
208 case '\f':
209 PSI_DUMP(dump, "\\f");
210 break;
211 case '\n':
212 PSI_DUMP(dump, "\\n");
213 break;
214 case '\r':
215 PSI_DUMP(dump, "\\r");
216 break;
217 case '\t':
218 PSI_DUMP(dump, "\\t");
219 break;
220 case '\v':
221 PSI_DUMP(dump, "\\v");
222 break;
223 case '"':
224 PSI_DUMP(dump, "\\\"");
225 break;
226 default:
227 if (isprint(t->text->val[i])) {
228 PSI_DUMP(dump, "%c", t->text->val[i]);
229 } else {
230 PSI_DUMP(dump, "\\x%02hhX", t->text->val[i]);
231 }
232 break;
233 }
234 }
235 PSI_DUMP(dump, "\"");
236 }
237 PSI_DUMP(dump, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
238 }