attempt to implement some personal standards
[m6w6/ext-http] / src / php_http_header_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifndef DBG_PARSER
16 # define DBG_PARSER 0
17 #endif
18
19 typedef struct php_http_header_parser_state_spec {
20 php_http_header_parser_state_t state;
21 unsigned need_data:1;
22 } php_http_header_parser_state_spec_t;
23
24 static const php_http_header_parser_state_spec_t php_http_header_parser_states[] = {
25 {PHP_HTTP_HEADER_PARSER_STATE_START, 1},
26 {PHP_HTTP_HEADER_PARSER_STATE_KEY, 1},
27 {PHP_HTTP_HEADER_PARSER_STATE_VALUE, 1},
28 {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX, 0},
29 {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE, 0},
30 {PHP_HTTP_HEADER_PARSER_STATE_DONE, 0}
31 };
32
33 php_http_header_parser_t *php_http_header_parser_init(php_http_header_parser_t *parser TSRMLS_DC)
34 {
35 if (!parser) {
36 parser = emalloc(sizeof(*parser));
37 }
38 memset(parser, 0, sizeof(*parser));
39
40 TSRMLS_SET_CTX(parser->ts);
41
42 return parser;
43 }
44
45 php_http_header_parser_state_t php_http_header_parser_state_push(php_http_header_parser_t *parser, unsigned argc, ...)
46 {
47 va_list va_args;
48 unsigned i;
49 php_http_header_parser_state_t state = 0;
50
51 /* short circuit */
52 ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc);
53
54 va_start(va_args, argc);
55 for (i = 0; i < argc; ++i) {
56 state = va_arg(va_args, php_http_header_parser_state_t);
57 zend_ptr_stack_push(&parser->stack, (void *) state);
58 }
59 va_end(va_args);
60
61 return state;
62 }
63
64 php_http_header_parser_state_t php_http_header_parser_state_is(php_http_header_parser_t *parser)
65 {
66 if (parser->stack.top) {
67 return (php_http_header_parser_state_t) parser->stack.elements[parser->stack.top - 1];
68 }
69
70 return PHP_HTTP_HEADER_PARSER_STATE_START;
71 }
72
73 php_http_header_parser_state_t php_http_header_parser_state_pop(php_http_header_parser_t *parser)
74 {
75 if (parser->stack.top) {
76 return (php_http_header_parser_state_t) zend_ptr_stack_pop(&parser->stack);
77 }
78
79 return PHP_HTTP_HEADER_PARSER_STATE_START;
80 }
81
82 void php_http_header_parser_dtor(php_http_header_parser_t *parser)
83 {
84 zend_ptr_stack_destroy(&parser->stack);
85 php_http_info_dtor(&parser->info);
86 PTR_FREE(parser->_key.str);
87 PTR_FREE(parser->_val.str);
88 }
89
90 void php_http_header_parser_free(php_http_header_parser_t **parser)
91 {
92 if (*parser) {
93 php_http_header_parser_dtor(*parser);
94 efree(*parser);
95 *parser = NULL;
96 }
97 }
98
99 /* NOTE: 'str' has to be null terminated */
100 static void php_http_header_parser_error(size_t valid_len, char *str, size_t len, const char *eol_str TSRMLS_DC)
101 {
102 int escaped_len;
103 char *escaped_str;
104
105 escaped_str = php_addcslashes(str, len, &escaped_len, 0, ZEND_STRL("\x0..\x1F\x7F..\xFF") TSRMLS_CC);
106
107 if (valid_len != len && (!eol_str || (str+valid_len) != eol_str)) {
108 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers: unexpected character '\\%03o' at pos %zu of '%.*s'", str[valid_len], valid_len, escaped_len, escaped_str);
109 } else if (eol_str) {
110 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers: unexpected end of line at pos %zu of '%.*s'", eol_str - str, escaped_len, escaped_str);
111 } else {
112 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers: unexpected end of input at pos %zu of '%.*s'", len, escaped_len, escaped_str);
113 }
114
115 efree(escaped_str);
116 }
117
118 php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg)
119 {
120 TSRMLS_FETCH_FROM_CTX(parser->ts);
121
122 while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) {
123 #if DBG_PARSER
124 const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
125 fprintf(stderr, "#HP: %s (avail:%zu, num:%d cleanup:%u)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0, flags);
126 _dpf(0, buffer->data, buffer->used);
127 #endif
128 switch (php_http_header_parser_state_pop(parser)) {
129 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
130 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers");
131 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
132
133 case PHP_HTTP_HEADER_PARSER_STATE_START: {
134 char *ptr = buffer->data;
135
136 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
137 ++ptr;
138 }
139
140 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
141 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
142 break;
143 }
144
145 case PHP_HTTP_HEADER_PARSER_STATE_KEY: {
146 const char *colon, *eol_str = NULL;
147 int eol_len = 0;
148
149 /* fix buffer here, so eol_str pointer doesn't become obsolete afterwards */
150 php_http_buffer_fix(buffer);
151
152 if (buffer->data == (eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
153 /* end of headers */
154 php_http_buffer_cut(buffer, 0, eol_len);
155 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_DONE);
156 } else if (php_http_info_parse(&parser->info, buffer->data TSRMLS_CC)) {
157 /* new message starting with request/response line */
158 if (callback_func) {
159 callback_func(callback_arg, &headers, &parser->info TSRMLS_CC);
160 }
161 php_http_info_dtor(&parser->info);
162 php_http_buffer_cut(buffer, 0, eol_str + eol_len - buffer->data);
163 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
164 } else if ((colon = memchr(buffer->data, ':', buffer->used)) && (!eol_str || eol_str > colon)) {
165 /* header: string */
166 size_t valid_len;
167
168 parser->_key.len = colon - buffer->data;
169 parser->_key.str = estrndup(buffer->data, parser->_key.len);
170
171 valid_len = strspn(parser->_key.str, PHP_HTTP_HEADER_NAME_CHARS);
172 if (valid_len != parser->_key.len) {
173 php_http_header_parser_error(valid_len, parser->_key.str, parser->_key.len, eol_str TSRMLS_CC);
174 PTR_SET(parser->_key.str, NULL);
175 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
176 }
177 while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r');
178 php_http_buffer_cut(buffer, 0, colon - buffer->data);
179 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
180 } else if (eol_str || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
181 /* neither reqeust/response line nor 'header:' string, or injected new line or NUL etc. */
182 php_http_header_parser_error(strspn(buffer->data, PHP_HTTP_HEADER_NAME_CHARS), buffer->data, buffer->used, eol_str TSRMLS_CC);
183 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
184 } else {
185 /* keep feeding */
186 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
187 }
188 break;
189 }
190
191 case PHP_HTTP_HEADER_PARSER_STATE_VALUE: {
192 const char *eol_str;
193 int eol_len;
194
195 #define SET_ADD_VAL(slen, eol_len) \
196 do { \
197 const char *ptr = buffer->data; \
198 size_t len = slen; \
199 \
200 while (len > 0 && PHP_HTTP_IS_CTYPE(space, *ptr)) { \
201 ++ptr; \
202 --len; \
203 } \
204 while (len > 0 && PHP_HTTP_IS_CTYPE(space, ptr[len - 1])) { \
205 --len; \
206 } \
207 \
208 if (len > 0) { \
209 if (parser->_val.str) { \
210 parser->_val.str = erealloc(parser->_val.str, parser->_val.len + len + 2); \
211 parser->_val.str[parser->_val.len++] = ' '; \
212 memcpy(&parser->_val.str[parser->_val.len], ptr, len); \
213 parser->_val.len += len; \
214 parser->_val.str[parser->_val.len] = '\0'; \
215 } else { \
216 parser->_val.len = len; \
217 parser->_val.str = estrndup(ptr, len); \
218 } \
219 } \
220 php_http_buffer_cut(buffer, 0, slen + eol_len); \
221 } while (0)
222
223 if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
224 SET_ADD_VAL(eol_str - buffer->data, eol_len);
225 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
226 } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
227 if (buffer->used) {
228 SET_ADD_VAL(buffer->used, 0);
229 }
230 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
231 } else {
232 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
233 }
234 break;
235 }
236
237 case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX:
238 if (buffer->used && (*buffer->data == ' ' || *buffer->data == '\t')) {
239 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
240 } else if (buffer->used || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
241 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
242 } else {
243 /* keep feeding */
244 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
245 }
246 break;
247
248 case PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE:
249 if (parser->_key.str && parser->_val.str) {
250 zval array, **exist;
251 size_t valid_len = strlen(parser->_val.str);
252
253 /* check for truncation */
254 if (valid_len != parser->_val.len) {
255 php_http_header_parser_error(valid_len, parser->_val.str, parser->_val.len, NULL TSRMLS_CC);
256
257 PTR_SET(parser->_key.str, NULL);
258 PTR_SET(parser->_val.str, NULL);
259
260 return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
261 }
262
263 if (!headers && callback_func) {
264 callback_func(callback_arg, &headers, NULL TSRMLS_CC);
265 }
266
267 INIT_PZVAL_ARRAY(&array, headers);
268 php_http_pretty_key(parser->_key.str, parser->_key.len, 1, 1);
269 if (SUCCESS == zend_symtable_find(headers, parser->_key.str, parser->_key.len + 1, (void *) &exist)) {
270 convert_to_array(*exist);
271 add_next_index_stringl(*exist, parser->_val.str, parser->_val.len, 0);
272 } else {
273 add_assoc_stringl_ex(&array, parser->_key.str, parser->_key.len + 1, parser->_val.str, parser->_val.len, 0);
274 }
275 parser->_val.str = NULL;
276 }
277
278 PTR_SET(parser->_key.str, NULL);
279 PTR_SET(parser->_val.str, NULL);
280
281 php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
282 break;
283
284 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
285 return PHP_HTTP_HEADER_PARSER_STATE_DONE;
286 }
287 }
288
289 return php_http_header_parser_state_is(parser);
290 }
291
292 php_http_header_parser_state_t php_http_header_parser_parse_stream(php_http_header_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg)
293 {
294 php_http_header_parser_state_t state = PHP_HTTP_HEADER_PARSER_STATE_START;
295 TSRMLS_FETCH_FROM_CTX(parser->ts);
296
297 if (!buf->data) {
298 php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
299 }
300 while (1) {
301 size_t justread = 0;
302 #if DBG_PARSER
303 const char *states[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
304 fprintf(stderr, "#SHP: %s (f:%u)\n", states[state], flags);
305 #endif
306 /* resize if needed */
307 if (buf->free < 0x1000) {
308 php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
309 }
310 switch (state) {
311 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
312 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
313 return state;
314
315 default:
316 /* read line */
317 php_stream_get_line(s, buf->data + buf->used, buf->free, &justread);
318 /* if we fail reading a whole line, try a single char */
319 if (!justread) {
320 int c = php_stream_getc(s);
321
322 if (c != EOF) {
323 char s[1] = {c};
324 justread = php_http_buffer_append(buf, s, 1);
325 }
326 }
327 php_http_buffer_account(buf, justread);
328 }
329
330 if (justread) {
331 state = php_http_header_parser_parse(parser, buf, flags, headers, callback_func, callback_arg);
332 } else if (php_stream_eof(s)) {
333 return php_http_header_parser_parse(parser, buf, flags | PHP_HTTP_HEADER_PARSER_CLEANUP, headers, callback_func, callback_arg);
334 } else {
335 return state;
336 }
337 }
338
339 return PHP_HTTP_HEADER_PARSER_STATE_DONE;
340 }
341
342 zend_class_entry *php_http_header_parser_class_entry;
343 static zend_object_handlers php_http_header_parser_object_handlers;
344
345 zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC)
346 {
347 return php_http_header_parser_object_new_ex(ce, NULL, NULL TSRMLS_CC);
348 }
349
350 zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC)
351 {
352 php_http_header_parser_object_t *o;
353
354 o = ecalloc(1, sizeof(php_http_header_parser_object_t));
355 zend_object_std_init((zend_object *) o, ce TSRMLS_CC);
356 object_properties_init((zend_object *) o, ce);
357
358 if (ptr) {
359 *ptr = o;
360 }
361
362 if (parser) {
363 o->parser = parser;
364 } else {
365 o->parser = php_http_header_parser_init(NULL TSRMLS_CC);
366 }
367 o->buffer = php_http_buffer_new();
368
369 o->zv.handle = zend_objects_store_put((zend_object *) o, NULL, php_http_header_parser_object_free, NULL TSRMLS_CC);
370 o->zv.handlers = &php_http_header_parser_object_handlers;
371
372 return o->zv;
373 }
374
375 void php_http_header_parser_object_free(void *object TSRMLS_DC)
376 {
377 php_http_header_parser_object_t *o = (php_http_header_parser_object_t *) object;
378
379 if (o->parser) {
380 php_http_header_parser_free(&o->parser);
381 }
382 if (o->buffer) {
383 php_http_buffer_free(&o->buffer);
384 }
385 zend_object_std_dtor((zend_object *) o TSRMLS_CC);
386 efree(o);
387 }
388
389 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0)
390 ZEND_END_ARG_INFO();
391 static PHP_METHOD(HttpHeaderParser, getState)
392 {
393 php_http_header_parser_object_t *parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
394
395 zend_parse_parameters_none();
396 /* always return the real state */
397 RETVAL_LONG(php_http_header_parser_state_is(parser_obj->parser));
398 }
399
400 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_parse, 0, 0, 3)
401 ZEND_ARG_INFO(0, data)
402 ZEND_ARG_INFO(0, flags)
403 ZEND_ARG_ARRAY_INFO(1, headers, 1)
404 ZEND_END_ARG_INFO();
405 static PHP_METHOD(HttpHeaderParser, parse)
406 {
407 php_http_header_parser_object_t *parser_obj;
408 zval *zmsg;
409 char *data_str;
410 int data_len;
411 long flags;
412
413 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return);
414
415 if (Z_TYPE_P(zmsg) != IS_ARRAY) {
416 zval_dtor(zmsg);
417 array_init(zmsg);
418 }
419 parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
420 php_http_buffer_append(parser_obj->buffer, data_str, data_len);
421 RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
422 }
423
424 ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_stream, 0, 0, 3)
425 ZEND_ARG_INFO(0, stream)
426 ZEND_ARG_INFO(0, flags)
427 ZEND_ARG_ARRAY_INFO(1, headers, 1)
428 ZEND_END_ARG_INFO();
429 static PHP_METHOD(HttpHeaderParser, stream)
430 {
431 php_http_header_parser_object_t *parser_obj;
432 zend_error_handling zeh;
433 zval *zmsg, *zstream;
434 php_stream *s;
435 long flags;
436
437 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rlz", &zstream, &flags, &zmsg), invalid_arg, return);
438
439 zend_replace_error_handling(EH_THROW, php_http_exception_unexpected_val_class_entry, &zeh TSRMLS_CC);
440 php_stream_from_zval(s, &zstream);
441 zend_restore_error_handling(&zeh TSRMLS_CC);
442
443 if (Z_TYPE_P(zmsg) != IS_ARRAY) {
444 zval_dtor(zmsg);
445 array_init(zmsg);
446 }
447 parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
448 RETVAL_LONG(php_http_header_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
449 }
450
451 static zend_function_entry php_http_header_parser_methods[] = {
452 PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC)
453 PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC)
454 PHP_ME(HttpHeaderParser, stream, ai_HttpHeaderParser_stream, ZEND_ACC_PUBLIC)
455 {NULL, NULL, NULL}
456 };
457
458 PHP_MINIT_FUNCTION(http_header_parser)
459 {
460 zend_class_entry ce;
461
462 INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods);
463 php_http_header_parser_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
464 memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
465 php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new;
466 php_http_header_parser_object_handlers.clone_obj = NULL;
467
468 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP TSRMLS_CC);
469
470 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE TSRMLS_CC);
471 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START TSRMLS_CC);
472 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY TSRMLS_CC);
473 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE TSRMLS_CC);
474 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX TSRMLS_CC);
475 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE TSRMLS_CC);
476 zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE TSRMLS_CC);
477
478 return SUCCESS;
479 }
480
481 /*
482 * Local variables:
483 * tab-width: 4
484 * c-basic-offset: 4
485 * End:
486 * vim600: noet sw=4 ts=4 fdm=marker
487 * vim<600: noet sw=4 ts=4
488 */
489