95eaf7a6f765db9793198c5226be8233f66364af
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <ctype.h>
29
30 #include <Zend/zend_smart_str.h>
31
32 #include "token.h"
33 #include "parser.h"
34
35 size_t psi_token_alloc_size(size_t token_len, size_t fname_len) {
36 return sizeof(struct psi_token) + token_len + fname_len + 2;
37 }
38
39 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
40 size_t token_len, unsigned col, unsigned line, zend_string *file)
41 {
42 struct psi_token *T;
43
44 T = calloc(1, sizeof(*T));
45 T->type = token_typ;
46 T->col = col;
47 T->line = line;
48 T->file = zend_string_copy(file);
49 T->text = zend_string_init(token_txt, token_len, 1);
50
51 return T;
52 }
53
54 void psi_token_free(struct psi_token **token_ptr) {
55 if (*token_ptr) {
56 struct psi_token *token = *token_ptr;
57
58 *token_ptr = NULL;
59 zend_string_release(token->text);
60 zend_string_release(token->file);
61 free(token);
62 }
63 }
64
65 struct psi_token *psi_token_copy(struct psi_token *src) {
66 struct psi_token *ptr = malloc(sizeof(*ptr));
67
68 *ptr = *src;
69
70 ptr->text = zend_string_copy(ptr->text);
71 ptr->file = zend_string_copy(ptr->file);
72
73 return ptr;
74 }
75
76 void psi_token_copy_ctor(struct psi_token **tok) {
77 *tok = psi_token_copy(*tok);
78 }
79
80 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
81 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
82 va_list argv;
83 unsigned i;
84 size_t sep_len = sep ? strlen(sep) : 0;
85 struct psi_token *T = malloc(sizeof(*T));
86 smart_str text = {0};
87
88 va_start(argv, argc);
89
90 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
91 T->type = PSI_T_NAME;
92 T->file = zend_string_copy(T->file);
93
94 for (i = 0; i < argc; ++i) {
95 struct psi_token *arg = va_arg(argv, struct psi_token *);
96
97 if (sep_len && text.a) {
98 smart_str_appendl_ex(&text, sep, sep_len, 1);
99 }
100
101 smart_str_append_ex(&text, arg->text, 1);
102 }
103 va_end(argv);
104
105 T->text = smart_str_extract(&text);
106
107 return T;
108 }
109
110 /* append `argc` number of C strings separated by `sep` to token `T` */
111 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
112 va_list argv;
113 unsigned i;
114 size_t sep_len = sep ? strlen(sep) : 0;
115 smart_str text = {0};
116
117 smart_str_append_ex(&text, T->text, 1);
118
119 va_start(argv, argc);
120 for (i = 0; i < argc; ++i) {
121 char *str = va_arg(argv, char *);
122 size_t str_len = strlen(str);
123
124 if (sep_len && text.a) {
125 smart_str_appendl_ex(&text, sep, sep_len, 1);
126 }
127
128 smart_str_appendl_ex(&text, str, str_len, 1);
129 }
130 va_end(argv);
131
132 zend_string_release(T->text);
133 T->text = smart_str_extract(&text);
134
135 return T;
136 }
137
138 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
139 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
140 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
141
142 zend_string_release(T->text);
143 T->text = tmp;
144
145 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
146 zend_string_forget_hash_val(T->text);
147
148 return T;
149 }
150
151 static inline uint64_t psi_hash(char *digest_buf, ...)
152 {
153 uint64_t hash = 5381;
154 uint8_t c;
155 const uint8_t *ptr;
156 va_list argv;
157
158 va_start(argv, digest_buf);
159 while ((ptr = va_arg(argv, const uint8_t *))) {
160 while ((c = *ptr++)) {
161 hash = ((hash << 5) + hash) + c;
162 }
163 }
164 va_end(argv);
165
166 if (digest_buf) {
167 sprintf(digest_buf, "%" PRIx64, hash);
168 }
169
170 return hash;
171 }
172
173 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
174 char loc_buf[48];
175
176 sprintf(digest_buf, "%u%u", t->line, t->col);
177 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
178 }
179
180 void psi_token_dump(int fd, struct psi_token *t)
181 {
182 size_t i;
183
184 dprintf(fd, "TOKEN %p (%u) ", t, t->type);
185 if (t->type == PSI_T_EOF) {
186 dprintf(fd, "EOF");
187 } else {
188 dprintf(fd, "\"");
189 for (i = 0; i < t->text->len; ++i) {
190 switch (t->text->val[i]) {
191 case '\0':
192 dprintf(fd, "\\0");
193 break;
194 case '\a':
195 dprintf(fd, "\\a");
196 break;
197 case '\b':
198 dprintf(fd, "\\b");
199 break;
200 case '\f':
201 dprintf(fd, "\\f");
202 break;
203 case '\n':
204 dprintf(fd, "\\n");
205 break;
206 case '\r':
207 dprintf(fd, "\\r");
208 break;
209 case '\t':
210 dprintf(fd, "\\t");
211 break;
212 case '\v':
213 dprintf(fd, "\\v");
214 break;
215 case '"':
216 dprintf(fd, "\\\"");
217 break;
218 default:
219 if (isprint(t->text->val[i])) {
220 dprintf(fd, "%c", t->text->val[i]);
221 } else {
222 dprintf(fd, "\\x%02hhX", t->text->val[i]);
223 }
224 break;
225 }
226 }
227 dprintf(fd, "\"");
228 }
229 dprintf(fd, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
230 }