fix token concatenation
[m6w6/ext-psi] / src / token.c
1 /*******************************************************************************
2 Copyright (c) 2016, Michael Wallner <mike@php.net>.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *******************************************************************************/
25
26 #include "php_psi_stdinc.h"
27
28 #include <ctype.h>
29
30 #include <Zend/zend_smart_str.h>
31
32 #include "token.h"
33 #include "parser.h"
34
35 size_t psi_token_alloc_size(size_t token_len, size_t fname_len) {
36 return sizeof(struct psi_token) + token_len + fname_len + 2;
37 }
38
39 struct psi_token *psi_token_init(token_t token_typ, const char *token_txt,
40 size_t token_len, unsigned col, unsigned line, zend_string *file)
41 {
42 struct psi_token *T;
43
44 T = calloc(1, sizeof(*T));
45 T->type = token_typ;
46 T->col = col;
47 T->line = line;
48 T->file = zend_string_copy(file);
49 T->text = zend_string_init(token_txt, token_len, 1);
50
51 return T;
52 }
53
54 void psi_token_free(struct psi_token **token_ptr) {
55 if (*token_ptr) {
56 struct psi_token *token = *token_ptr;
57
58 *token_ptr = NULL;
59 zend_string_release(token->text);
60 zend_string_release(token->file);
61 free(token);
62 }
63 }
64
65 struct psi_token *psi_token_copy(struct psi_token *src) {
66 struct psi_token *ptr = malloc(sizeof(*ptr));
67
68 *ptr = *src;
69
70 ptr->text = zend_string_copy(ptr->text);
71 ptr->file = zend_string_copy(ptr->file);
72
73 return ptr;
74 }
75
76 void psi_token_copy_ctor(struct psi_token **tok) {
77 *tok = psi_token_copy(*tok);
78 }
79
80 /* concatenate `argc` number of tokens separated by `sep` into a newly allocated token */
81 struct psi_token *psi_token_cat(const char *sep, unsigned argc, ...) {
82 va_list argv;
83 unsigned i;
84 size_t sep_len = sep ? strlen(sep) : 0;
85 struct psi_token *T = malloc(sizeof(*T));
86 smart_str text = {0};
87
88 va_start(argv, argc);
89
90 *T = *(struct psi_token *) va_arg(argv, struct psi_token *);
91 T->type = PSI_T_NAME;
92 T->file = zend_string_copy(T->file);
93
94 smart_str_append_ex(&text, T->text, 1);
95
96 for (i = 1; i < argc; ++i) {
97 struct psi_token *arg = va_arg(argv, struct psi_token *);
98
99 smart_str_appendl_ex(&text, sep, sep_len, 1);
100 smart_str_append_ex(&text, arg->text, 1);
101 }
102 va_end(argv);
103
104 T->text = smart_str_extract(&text);
105
106 return T;
107 }
108
109 /* append `argc` number of C strings separated by `sep` to token `T` */
110 struct psi_token *psi_token_append(const char *sep, struct psi_token *T, unsigned argc, ...) {
111 va_list argv;
112 unsigned i;
113 size_t sep_len = sep ? strlen(sep) : 0;
114 smart_str text = {0};
115
116 smart_str_append_ex(&text, T->text, 1);
117
118 va_start(argv, argc);
119 for (i = 0; i < argc; ++i) {
120 char *str = va_arg(argv, char *);
121 size_t str_len = strlen(str);
122
123 if (sep_len && text.a) {
124 smart_str_appendl_ex(&text, sep, sep_len, 1);
125 }
126
127 smart_str_appendl_ex(&text, str, str_len, 1);
128 }
129 va_end(argv);
130
131 zend_string_release(T->text);
132 T->text = smart_str_extract(&text);
133
134 return T;
135 }
136
137 char *php_strtr(char *str, size_t len, char *str_from, char *str_to, size_t trlen);
138 struct psi_token *psi_token_translit(struct psi_token *T, char *from, char *to) {
139 zend_string *tmp = zend_string_init(T->text->val, T->text->len, 1);
140
141 zend_string_release(T->text);
142 T->text = tmp;
143
144 php_strtr(T->text->val, T->text->len, from, to, MIN(strlen(from), strlen(to)));
145 zend_string_forget_hash_val(T->text);
146
147 return T;
148 }
149
150 static inline uint64_t psi_hash(char *digest_buf, ...)
151 {
152 uint64_t hash = 5381;
153 uint8_t c;
154 const uint8_t *ptr;
155 va_list argv;
156
157 va_start(argv, digest_buf);
158 while ((ptr = va_arg(argv, const uint8_t *))) {
159 while ((c = *ptr++)) {
160 hash = ((hash << 5) + hash) + c;
161 }
162 }
163 va_end(argv);
164
165 if (digest_buf) {
166 sprintf(digest_buf, "%" PRIx64, hash);
167 }
168
169 return hash;
170 }
171
172 uint64_t psi_token_hash(struct psi_token *t, char *digest_buf) {
173 char loc_buf[48];
174
175 sprintf(loc_buf, "%u%u", t->line, t->col);
176 return psi_hash(digest_buf, t->file->val, loc_buf, (char *) NULL);
177 }
178
179 void psi_token_dump(int fd, struct psi_token *t)
180 {
181 size_t i;
182
183 dprintf(fd, "TOKEN %p (%u) ", t, t->type);
184 if (t->type == PSI_T_EOF) {
185 dprintf(fd, "EOF");
186 } else {
187 dprintf(fd, "\"");
188 for (i = 0; i < t->text->len; ++i) {
189 switch (t->text->val[i]) {
190 case '\0':
191 dprintf(fd, "\\0");
192 break;
193 case '\a':
194 dprintf(fd, "\\a");
195 break;
196 case '\b':
197 dprintf(fd, "\\b");
198 break;
199 case '\f':
200 dprintf(fd, "\\f");
201 break;
202 case '\n':
203 dprintf(fd, "\\n");
204 break;
205 case '\r':
206 dprintf(fd, "\\r");
207 break;
208 case '\t':
209 dprintf(fd, "\\t");
210 break;
211 case '\v':
212 dprintf(fd, "\\v");
213 break;
214 case '"':
215 dprintf(fd, "\\\"");
216 break;
217 default:
218 if (isprint(t->text->val[i])) {
219 dprintf(fd, "%c", t->text->val[i]);
220 } else {
221 dprintf(fd, "\\x%02hhX", t->text->val[i]);
222 }
223 break;
224 }
225 }
226 dprintf(fd, "\"");
227 }
228 dprintf(fd, " at col %u in %s on line %u\n", t->col, t->file->val, t->line);
229 }