Merge branch 'master' into phpng
[m6w6/ext-http] / php_http_header_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifndef DBG_PARSER
16 # define DBG_PARSER 0
17 #endif
18
19 typedef struct php_http_header_parser_state_spec {
20 php_http_header_parser_state_t state;
21 unsigned need_data:1;
22 } php_http_header_parser_state_spec_t;
23
24 static const php_http_header_parser_state_spec_t php_http_header_parser_states[] = {
25 {PHP_HTTP_HEADER_PARSER_STATE_START, 1},
26 {PHP_HTTP_HEADER_PARSER_STATE_KEY, 1},
27 {PHP_HTTP_HEADER_PARSER_STATE_VALUE, 1},
28 {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX, 0},
29 {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE, 0},
30 {PHP_HTTP_HEADER_PARSER_STATE_DONE, 0}
31 };
32
33 php_http_header_parser_t *php_http_header_parser_init(php_http_header_parser_t *parser)
34 {
35 if (!parser) {
36 parser = emalloc(sizeof(*parser));
37 }
38 memset(parser, 0, sizeof(*parser));
39
40 return parser;
41 }
42
43 php_http_header_parser_state_t php_http_header_parser_state_push(php_http_header_parser_t *parser, unsigned argc, ...)
44 {
45 va_list va_args;
46 unsigned i;
47 php_http_header_parser_state_t state = 0;
48
49 /* short circuit */
50 ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc);
51
52 va_start(va_args, argc);
53 for (i = 0; i < argc; ++i) {
54 state = va_arg(va_args, php_http_header_parser_state_t);
55 zend_ptr_stack_push(&parser->stack, (void *) state);
56 }
57 va_end(va_args);
58
59 return state;
60 }
61
62 php_http_header_parser_state_t php_http_header_parser_state_is(php_http_header_parser_t *parser)
63 {
64 if (parser->stack.top) {
65 return (php_http_header_parser_state_t) parser->stack.elements[parser->stack.top - 1];
66 }
67
68 return PHP_HTTP_HEADER_PARSER_STATE_START;
69 }
70
71 php_http_header_parser_state_t php_http_header_parser_state_pop(php_http_header_parser_t *parser)
72 {
73 if (parser->stack.top) {
74 return (php_http_header_parser_state_t) zend_ptr_stack_pop(&parser->stack);
75 }
76
77 return PHP_HTTP_HEADER_PARSER_STATE_START;
78 }
79
80 void php_http_header_parser_dtor(php_http_header_parser_t *parser)
81 {
82 zend_ptr_stack_destroy(&parser->stack);
83 php_http_info_dtor(&parser->info);
84 PTR_FREE(parser->_key.str);
85 PTR_FREE(parser->_val.str);
86 }
87
88 void php_http_header_parser_free(php_http_header_parser_t **parser)
89 {
90 if (*parser) {
91 php_http_header_parser_dtor(*parser);
92 efree(*parser);
93 *parser = NULL;
94 }
95 }
96
97 /* NOTE: 'str' has to be null terminated */
98 static void php_http_header_parser_error(size_t valid_len, char *str, size_t len, const char *eol_str )
99 {
100 zend_string *escaped_str = zend_string_init(str, len, 0);
101
102 escaped_str = php_addcslashes(escaped_str, 1, ZEND_STRL("\x0..\x1F\x7F..\xFF"));
103
104 if (valid_len != len && (!eol_str || (str+valid_len) != eol_str)) {
105 php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected character '\\%03o' at pos %zu of '%s'", str[valid_len], valid_len, escaped_str->val);
106 } else if (eol_str) {
107 php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected end of line at pos %zu of '%s'", eol_str - str, escaped_str->val);
108 } else {
109 php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected end of input at pos %zu of '%s'", len, escaped_str->val);
110 }
111
112 efree(escaped_str);
113 }
114
115 php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg)
116 {
117 while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) {
118 #if DBG_PARSER
119 const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
120 fprintf(stderr, "#HP: %s (avail:%zu, num:%d cleanup:%u)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0, flags);
121 _dpf(0, buffer->data, buffer->used);
122 #endif
123 switch (php_http_header_parser_state_pop(parser)) {
124 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
125 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers");
126 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
127
128 case PHP_HTTP_HEADER_PARSER_STATE_START: {
129 char *ptr = buffer->data;
130
131 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
132 ++ptr;
133 }
134
135 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
136 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
137 break;
138 }
139
140 case PHP_HTTP_HEADER_PARSER_STATE_KEY: {
141 const char *colon, *eol_str = NULL;
142 int eol_len = 0;
143
144 if (buffer->data == (eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
145 /* end of headers */
146 php_http_buffer_cut(buffer, 0, eol_len);
147 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_DONE);
148 } else if (php_http_info_parse(&parser->info, php_http_buffer_fix(buffer)->data)) {
149 /* new message starting with request/response line */
150 if (callback_func) {
151 callback_func(callback_arg, &headers, &parser->info);
152 }
153 php_http_info_dtor(&parser->info);
154 php_http_buffer_cut(buffer, 0, eol_str + eol_len - buffer->data);
155 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
156 } else if ((colon = memchr(buffer->data, ':', buffer->used)) && (!eol_str || eol_str > colon)) {
157 /* header: string */
158 size_t valid_len;
159
160 parser->_key.len = colon - buffer->data;
161 parser->_key.str = estrndup(buffer->data, parser->_key.len);
162
163 valid_len = strspn(parser->_key.str, PHP_HTTP_HEADER_NAME_CHARS);
164 if (valid_len != parser->_key.len) {
165 php_http_header_parser_error(valid_len, parser->_key.str, parser->_key.len, eol_str TSRMLS_CC);
166 PTR_SET(parser->_key.str, NULL);
167 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
168 }
169 while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r');
170 php_http_buffer_cut(buffer, 0, colon - buffer->data);
171 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
172 } else if (eol_str || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
173 /* neither reqeust/response line nor 'header:' string, or injected new line or NUL etc. */
174 php_http_buffer_fix(buffer);
175 php_http_header_parser_error(strspn(buffer->data, PHP_HTTP_HEADER_NAME_CHARS), buffer->data, buffer->used, eol_str TSRMLS_CC);
176 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
177 } else {
178 /* keep feeding */
179 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
180 }
181 break;
182 }
183
184 case PHP_HTTP_HEADER_PARSER_STATE_VALUE: {
185 const char *eol_str;
186 int eol_len;
187
188 #define SET_ADD_VAL(slen, eol_len) \
189 do { \
190 const char *ptr = buffer->data; \
191 size_t len = slen; \
192 \
193 while (len > 0 && PHP_HTTP_IS_CTYPE(space, *ptr)) { \
194 ++ptr; \
195 --len; \
196 } \
197 while (len > 0 && PHP_HTTP_IS_CTYPE(space, ptr[len - 1])) { \
198 --len; \
199 } \
200 \
201 if (len > 0) { \
202 if (parser->_val.str) { \
203 parser->_val.str = erealloc(parser->_val.str, parser->_val.len + len + 2); \
204 parser->_val.str[parser->_val.len++] = ' '; \
205 memcpy(&parser->_val.str[parser->_val.len], ptr, len); \
206 parser->_val.len += len; \
207 parser->_val.str[parser->_val.len] = '\0'; \
208 } else { \
209 parser->_val.len = len; \
210 parser->_val.str = estrndup(ptr, len); \
211 } \
212 } \
213 php_http_buffer_cut(buffer, 0, slen + eol_len); \
214 } while (0)
215
216 if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
217 SET_ADD_VAL(eol_str - buffer->data, eol_len);
218 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
219 } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
220 if (buffer->used) {
221 SET_ADD_VAL(buffer->used, 0);
222 }
223 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
224 } else {
225 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
226 }
227 break;
228 }
229
230 case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX:
231 if (buffer->used && (*buffer->data == ' ' || *buffer->data == '\t')) {
232 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
233 } else if (buffer->used || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
234 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
235 } else {
236 /* keep feeding */
237 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
238 }
239 break;
240
241 case PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE:
242 if (parser->_key.str && parser->_val.str) {
243 zval tmp, *exist;
244 size_t valid_len = strlen(parser->_val.str);
245
246 /* check for truncation */
247 if (valid_len != parser->_val.len) {
248 php_http_header_parser_error(valid_len, parser->_val.str, parser->_val.len, NULL TSRMLS_CC);
249
250 PTR_SET(parser->_key.str, NULL);
251 PTR_SET(parser->_val.str, NULL);
252
253 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
254 }
255
256 if (!headers && callback_func) {
257 callback_func(callback_arg, &headers, NULL);
258 }
259
260 php_http_pretty_key(parser->_key.str, parser->_key.len, 1, 1);
261 if ((exist = zend_symtable_str_find(headers, parser->_key.str, parser->_key.len))) {
262 convert_to_array(exist);
263 add_next_index_str(exist, php_http_cs2zs(parser->_val.str, parser->_val.len));
264 } else {
265 ZVAL_STR(&tmp, php_http_cs2zs(parser->_val.str, parser->_val.len));
266 zend_symtable_str_update(headers, parser->_key.str, parser->_key.len, &tmp);
267 }
268 parser->_val.str = NULL;
269 }
270
271 PTR_SET(parser->_key.str, NULL);
272 PTR_SET(parser->_val.str, NULL);
273
274 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
275 break;
276
277 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
278 return PHP_HTTP_HEADER_PARSER_STATE_DONE;
279 }
280 }
281
282 return php_http_header_parser_state_is(parser);
283 }
284
285 php_http_header_parser_state_t php_http_header_parser_parse_stream(php_http_header_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg)
286 {
287 php_http_header_parser_state_t state = PHP_HTTP_HEADER_PARSER_STATE_START;
288 TSRMLS_FETCH_FROM_CTX(parser->ts);
289
290 if (!buf->data) {
291 php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
292 }
293 while (1) {
294 size_t justread = 0;
295 #if DBG_PARSER
296 const char *states[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
297 fprintf(stderr, "#SHP: %s (f:%u)\n", states[state], flags);
298 #endif
299 /* resize if needed */
300 if (buf->free < 0x1000) {
301 php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
302 }
303 switch (state) {
304 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
305 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
306 return state;
307
308 default:
309 /* read line */
310 php_stream_get_line(s, buf->data + buf->used, buf->free, &justread);
311 /* if we fail reading a whole line, try a single char */
312 if (!justread) {
313 int c = php_stream_getc(s);
314
315 if (c != EOF) {
316 char s[1] = {c};
317 justread = php_http_buffer_append(buf, s, 1);
318 }
319 }
320 php_http_buffer_account(buf, justread);
321 }
322
323 if (justread) {
324 state = php_http_header_parser_parse(parser, buf, flags, headers, callback_func, callback_arg);
325 } else if (php_stream_eof(s)) {
326 return php_http_header_parser_parse(parser, buf, flags | PHP_HTTP_HEADER_PARSER_CLEANUP, headers, callback_func, callback_arg);
327 } else {
328 return state;
329 }
330 }
331
332 return PHP_HTTP_HEADER_PARSER_STATE_DONE;
333 }
334
335 zend_class_entry *php_http_header_parser_class_entry;
336 static zend_object_handlers php_http_header_parser_object_handlers;
337
338 zend_object *php_http_header_parser_object_new(zend_class_entry *ce)
339 {
340 return &php_http_header_parser_object_new_ex(ce, NULL)->zo;
341 }
342
343 php_http_header_parser_object_t *php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser)
344 {
345 php_http_header_parser_object_t *o;
346
347 o = ecalloc(1, sizeof(php_http_header_parser_object_t) + zend_object_properties_size(ce));
348 zend_object_std_init(&o->zo, ce);
349 object_properties_init(&o->zo, ce);
350
351 if (parser) {
352 o->parser = parser;
353 } else {
354 o->parser = php_http_header_parser_init(NULL);
355 }
356 o->buffer = php_http_buffer_new();
357
358 o->zo.handlers = &php_http_header_parser_object_handlers;
359
360 return o;
361 }
362
363 void php_http_header_parser_object_free(zend_object *object)
364 {
365 php_http_header_parser_object_t *o = PHP_HTTP_OBJ(object, NULL);
366
367 if (o->parser) {
368 php_http_header_parser_free(&o->parser);
369 }
370 if (o->buffer) {
371 php_http_buffer_free(&o->buffer);
372 }
373 zend_object_std_dtor(object);
374 }
375
376 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0)
377 ZEND_END_ARG_INFO();
378 static PHP_METHOD(HttpHeaderParser, getState)
379 {
380 php_http_header_parser_object_t *parser_obj = PHP_HTTP_OBJ(NULL, getThis());
381
382 zend_parse_parameters_none();
383 /* always return the real state */
384 RETVAL_LONG(php_http_header_parser_state_is(parser_obj->parser));
385 }
386
387 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_parse, 0, 0, 3)
388 ZEND_ARG_INFO(0, data)
389 ZEND_ARG_INFO(0, flags)
390 ZEND_ARG_ARRAY_INFO(1, headers, 1)
391 ZEND_END_ARG_INFO();
392 static PHP_METHOD(HttpHeaderParser, parse)
393 {
394 php_http_header_parser_object_t *parser_obj;
395 zval *zmsg;
396 char *data_str;
397 size_t data_len;
398 zend_long flags;
399
400 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS(), "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return);
401
402 ZVAL_DEREF(zmsg);
403 if (Z_TYPE_P(zmsg) != IS_ARRAY) {
404 zval_dtor(zmsg);
405 array_init(zmsg);
406 }
407 parser_obj = PHP_HTTP_OBJ(NULL, getThis());
408 php_http_buffer_append(parser_obj->buffer, data_str, data_len);
409 RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
410 }
411
412 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_stream, 0, 0, 3)
413 ZEND_ARG_INFO(0, stream)
414 ZEND_ARG_INFO(0, flags)
415 ZEND_ARG_ARRAY_INFO(1, headers, 1)
416 ZEND_END_ARG_INFO();
417 static PHP_METHOD(HttpHeaderParser, stream)
418 {
419 php_http_header_parser_object_t *parser_obj;
420 zend_error_handling zeh;
421 zval *zmsg, *zstream;
422 php_stream *s;
423 zend_long flags;
424
425 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS(), "rlz", &zstream, &flags, &zmsg), invalid_arg, return);
426
427 zend_replace_error_handling(EH_THROW, php_http_exception_unexpected_val_class_entry, &zeh);
428 php_stream_from_zval(s, zstream);
429 zend_restore_error_handling(&zeh);
430
431 ZVAL_DEREF(zmsg);
432 if (Z_TYPE_P(zmsg) != IS_ARRAY) {
433 zval_dtor(zmsg);
434 array_init(zmsg);
435 }
436 parser_obj = PHP_HTTP_OBJ(NULL, getThis());
437 RETVAL_LONG(php_http_header_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
438 }
439
440 static zend_function_entry php_http_header_parser_methods[] = {
441 PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC)
442 PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC)
443 PHP_ME(HttpHeaderParser, stream, ai_HttpHeaderParser_stream, ZEND_ACC_PUBLIC)
444 {NULL, NULL, NULL}
445 };
446
447 PHP_MINIT_FUNCTION(http_header_parser)
448 {
449 zend_class_entry ce;
450
451 INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods);
452 php_http_header_parser_class_entry = zend_register_internal_class(&ce);
453 memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
454 php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new;
455 php_http_header_parser_object_handlers.offset = XtOffsetOf(php_http_header_parser_object_t, zo);
456 php_http_header_parser_object_handlers.clone_obj = NULL;
457 php_http_header_parser_object_handlers.free_obj = php_http_header_parser_object_free;
458
459 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP);
460
461 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
462 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START);
463 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY);
464 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE);
465 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
466 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
467 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE);
468
469 return SUCCESS;
470 }
471
472 /*
473 * Local variables:
474 * tab-width: 4
475 * c-basic-offset: 4
476 * End:
477 * vim600: noet sw=4 ts=4 fdm=marker
478 * vim<600: noet sw=4 ts=4
479 */
480