da02ff5f765237b76ba79d477be637c588691f7c
[m6w6/ext-http] / php_http_header_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifndef DBG_PARSER
16 # define DBG_PARSER 0
17 #endif
18
19 typedef struct php_http_header_parser_state_spec {
20 php_http_header_parser_state_t state;
21 unsigned need_data:1;
22 } php_http_header_parser_state_spec_t;
23
24 static const php_http_header_parser_state_spec_t php_http_header_parser_states[] = {
25 {PHP_HTTP_HEADER_PARSER_STATE_START, 1},
26 {PHP_HTTP_HEADER_PARSER_STATE_KEY, 1},
27 {PHP_HTTP_HEADER_PARSER_STATE_VALUE, 1},
28 {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX, 0},
29 {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE, 0},
30 {PHP_HTTP_HEADER_PARSER_STATE_DONE, 0}
31 };
32
33 php_http_header_parser_t *php_http_header_parser_init(php_http_header_parser_t *parser TSRMLS_DC)
34 {
35 if (!parser) {
36 parser = emalloc(sizeof(*parser));
37 }
38 memset(parser, 0, sizeof(*parser));
39
40 TSRMLS_SET_CTX(parser->ts);
41
42 return parser;
43 }
44
45 php_http_header_parser_state_t php_http_header_parser_state_push(php_http_header_parser_t *parser, unsigned argc, ...)
46 {
47 va_list va_args;
48 unsigned i;
49 php_http_header_parser_state_t state = 0;
50
51 /* short circuit */
52 ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc);
53
54 va_start(va_args, argc);
55 for (i = 0; i < argc; ++i) {
56 state = va_arg(va_args, php_http_header_parser_state_t);
57 zend_ptr_stack_push(&parser->stack, (void *) state);
58 }
59 va_end(va_args);
60
61 return state;
62 }
63
64 php_http_header_parser_state_t php_http_header_parser_state_is(php_http_header_parser_t *parser)
65 {
66 if (parser->stack.top) {
67 return (php_http_header_parser_state_t) parser->stack.elements[parser->stack.top - 1];
68 }
69
70 return PHP_HTTP_HEADER_PARSER_STATE_START;
71 }
72
73 php_http_header_parser_state_t php_http_header_parser_state_pop(php_http_header_parser_t *parser)
74 {
75 if (parser->stack.top) {
76 return (php_http_header_parser_state_t) zend_ptr_stack_pop(&parser->stack);
77 }
78
79 return PHP_HTTP_HEADER_PARSER_STATE_START;
80 }
81
82 void php_http_header_parser_dtor(php_http_header_parser_t *parser)
83 {
84 zend_ptr_stack_destroy(&parser->stack);
85 php_http_info_dtor(&parser->info);
86 PTR_FREE(parser->_key.str);
87 PTR_FREE(parser->_val.str);
88 }
89
90 void php_http_header_parser_free(php_http_header_parser_t **parser)
91 {
92 if (*parser) {
93 php_http_header_parser_dtor(*parser);
94 efree(*parser);
95 *parser = NULL;
96 }
97 }
98
99 STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg)
100 {
101 TSRMLS_FETCH_FROM_CTX(parser->ts);
102
103 while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) {
104 #if DBG_PARSER
105 const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
106 fprintf(stderr, "#HP: %s (avail:%zu, num:%d cleanup:%u)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0, flags);
107 _dpf(0, buffer->data, buffer->used);
108 #endif
109 switch (php_http_header_parser_state_pop(parser)) {
110 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
111 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
112
113 case PHP_HTTP_HEADER_PARSER_STATE_START: {
114 char *ptr = buffer->data;
115
116 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
117 ++ptr;
118 }
119
120 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
121 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
122 break;
123 }
124
125 case PHP_HTTP_HEADER_PARSER_STATE_KEY: {
126 const char *colon, *eol_str = NULL;
127 int eol_len = 0;
128
129 if (buffer->data == (eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
130 /* end of headers */
131 php_http_buffer_cut(buffer, 0, eol_len);
132 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_DONE);
133 } else if (php_http_info_parse(&parser->info, php_http_buffer_fix(buffer)->data TSRMLS_CC)) {
134 /* new message starting with request/response line */
135 if (callback_func) {
136 callback_func(callback_arg, &headers, &parser->info TSRMLS_CC);
137 }
138 php_http_info_dtor(&parser->info);
139 php_http_buffer_cut(buffer, 0, eol_str + eol_len - buffer->data);
140 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
141 } else if ((colon = memchr(buffer->data, ':', buffer->used)) && (!eol_str || eol_str > colon)) {
142 /* header: string */
143 parser->_key.str = estrndup(buffer->data, parser->_key.len = colon - buffer->data);
144 while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r');
145 php_http_buffer_cut(buffer, 0, colon - buffer->data);
146 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
147 } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
148 /* neither reqeust/response line nor header: string */
149 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
150 } else {
151 /* keep feeding */
152 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
153 }
154 break;
155 }
156
157 case PHP_HTTP_HEADER_PARSER_STATE_VALUE: {
158 const char *eol_str;
159 int eol_len;
160
161 #define SET_ADD_VAL(slen, eol_len) \
162 do { \
163 const char *ptr = buffer->data; \
164 size_t len = slen; \
165 \
166 while (len > 0 && PHP_HTTP_IS_CTYPE(space, *ptr)) { \
167 ++ptr; \
168 --len; \
169 } \
170 while (len > 0 && PHP_HTTP_IS_CTYPE(space, ptr[len - 1])) { \
171 --len; \
172 } \
173 \
174 if (len > 0) { \
175 if (parser->_val.str) { \
176 parser->_val.str = erealloc(parser->_val.str, parser->_val.len + len + 2); \
177 parser->_val.str[parser->_val.len++] = ' '; \
178 memcpy(&parser->_val.str[parser->_val.len], ptr, len); \
179 parser->_val.len += len; \
180 parser->_val.str[parser->_val.len] = '\0'; \
181 } else { \
182 parser->_val.len = len; \
183 parser->_val.str = estrndup(ptr, len); \
184 } \
185 } \
186 php_http_buffer_cut(buffer, 0, slen + eol_len); \
187 } while (0)
188
189 if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
190 SET_ADD_VAL(eol_str - buffer->data, eol_len);
191 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
192 } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
193 if (buffer->used) {
194 SET_ADD_VAL(buffer->used, 0);
195 }
196 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
197 } else {
198 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
199 }
200 break;
201 }
202
203 case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX:
204 if (buffer->used && (*buffer->data == ' ' || *buffer->data == '\t')) {
205 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
206 } else if (buffer->used || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
207 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
208 } else {
209 /* keep feeding */
210 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
211 }
212 break;
213
214 case PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE:
215 if (parser->_key.str && parser->_val.str) {
216 zval array, **exist;
217
218 if (!headers && callback_func) {
219 callback_func(callback_arg, &headers, NULL TSRMLS_CC);
220 }
221
222 INIT_PZVAL_ARRAY(&array, headers);
223 php_http_pretty_key(parser->_key.str, parser->_key.len, 1, 1);
224 if (SUCCESS == zend_symtable_find(headers, parser->_key.str, parser->_key.len + 1, (void *) &exist)) {
225 convert_to_array(*exist);
226 add_next_index_stringl(*exist, parser->_val.str, parser->_val.len, 0);
227 } else {
228 add_assoc_stringl_ex(&array, parser->_key.str, parser->_key.len + 1, parser->_val.str, parser->_val.len, 0);
229 }
230 parser->_val.str = NULL;
231 }
232
233 PTR_SET(parser->_key.str, NULL);
234 PTR_SET(parser->_val.str, NULL);
235
236 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
237 break;
238
239 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
240 return PHP_HTTP_HEADER_PARSER_STATE_DONE;
241 }
242 }
243
244 return php_http_header_parser_state_is(parser);
245 }
246
247
248 zend_class_entry *php_http_header_parser_class_entry;
249 static zend_object_handlers php_http_header_parser_object_handlers;
250
251 zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC)
252 {
253 return php_http_header_parser_object_new_ex(ce, NULL, NULL TSRMLS_CC);
254 }
255
256 zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC)
257 {
258 php_http_header_parser_object_t *o;
259
260 o = ecalloc(1, sizeof(php_http_header_parser_object_t));
261 zend_object_std_init((zend_object *) o, ce TSRMLS_CC);
262 object_properties_init((zend_object *) o, ce);
263
264 if (ptr) {
265 *ptr = o;
266 }
267
268 if (parser) {
269 o->parser = parser;
270 } else {
271 o->parser = php_http_header_parser_init(NULL TSRMLS_CC);
272 }
273 o->buffer = php_http_buffer_new();
274
275 o->zv.handle = zend_objects_store_put((zend_object *) o, NULL, php_http_header_parser_object_free, NULL TSRMLS_CC);
276 o->zv.handlers = &php_http_header_parser_object_handlers;
277
278 return o->zv;
279 }
280
281 void php_http_header_parser_object_free(void *object TSRMLS_DC)
282 {
283 php_http_header_parser_object_t *o = (php_http_header_parser_object_t *) object;
284
285 if (o->parser) {
286 php_http_header_parser_free(&o->parser);
287 }
288 if (o->buffer) {
289 php_http_buffer_free(&o->buffer);
290 }
291 zend_object_std_dtor((zend_object *) o TSRMLS_CC);
292 efree(o);
293 }
294
295 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0)
296 ZEND_END_ARG_INFO();
297 static PHP_METHOD(HttpHeaderParser, getState)
298 {
299 php_http_header_parser_object_t *parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
300
301 zend_parse_parameters_none();
302 /* always return the real state */
303 RETVAL_LONG(php_http_header_parser_state_is(parser_obj->parser));
304 }
305
306 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_parse, 0, 0, 3)
307 ZEND_ARG_INFO(0, data)
308 ZEND_ARG_INFO(0, flags)
309 ZEND_ARG_ARRAY_INFO(1, headers, 1)
310 ZEND_END_ARG_INFO();
311 static PHP_METHOD(HttpHeaderParser, parse)
312 {
313 php_http_header_parser_object_t *parser_obj;
314 zval *zmsg;
315 char *data_str;
316 int data_len;
317 long flags;
318
319 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return);
320
321 if (Z_TYPE_P(zmsg) != IS_ARRAY) {
322 zval_dtor(zmsg);
323 array_init(zmsg);
324 }
325 parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
326 php_http_buffer_append(parser_obj->buffer, data_str, data_len);
327 RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
328 }
329
330
331 static zend_function_entry php_http_header_parser_methods[] = {
332 PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC)
333 PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC)
334 {NULL, NULL, NULL}
335 };
336
337 PHP_MINIT_FUNCTION(http_header_parser)
338 {
339 zend_class_entry ce;
340
341 INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods);
342 php_http_header_parser_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
343 memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
344 php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new;
345 php_http_header_parser_object_handlers.clone_obj = NULL;
346
347 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP TSRMLS_CC);
348
349 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE TSRMLS_CC);
350 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START TSRMLS_CC);
351 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY TSRMLS_CC);
352 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE TSRMLS_CC);
353 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX TSRMLS_CC);
354 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE TSRMLS_CC);
355 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE TSRMLS_CC);
356
357 return SUCCESS;
358 }
359
360 /*
361 * Local variables:
362 * tab-width: 4
363 * c-basic-offset: 4
364 * End:
365 * vim600: noet sw=4 ts=4 fdm=marker
366 * vim<600: noet sw=4 ts=4
367 */
368