X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=php_http_header_parser.c;h=f4dcf95e9e63828d9b43db7accd9024e28d8cf82;hp=da02ff5f765237b76ba79d477be637c588691f7c;hb=e438aa9ce944f8c8f175554d9aa66d152a47f780;hpb=629c7b270b047582160c87b7c688c2b942a75d60 diff --git a/php_http_header_parser.c b/php_http_header_parser.c index da02ff5..f4dcf95 100644 --- a/php_http_header_parser.c +++ b/php_http_header_parser.c @@ -30,15 +30,13 @@ static const php_http_header_parser_state_spec_t php_http_header_parser_states[] {PHP_HTTP_HEADER_PARSER_STATE_DONE, 0} }; -php_http_header_parser_t *php_http_header_parser_init(php_http_header_parser_t *parser TSRMLS_DC) +php_http_header_parser_t *php_http_header_parser_init(php_http_header_parser_t *parser) { if (!parser) { parser = emalloc(sizeof(*parser)); } memset(parser, 0, sizeof(*parser)); - TSRMLS_SET_CTX(parser->ts); - return parser; } @@ -96,10 +94,26 @@ void php_http_header_parser_free(php_http_header_parser_t **parser) } } -STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg) +/* NOTE: 'str' has to be null terminated */ +static void php_http_header_parser_error(size_t valid_len, char *str, size_t len, const char *eol_str ) { - TSRMLS_FETCH_FROM_CTX(parser->ts); + zend_string *escaped_str = zend_string_init(str, len, 0); + + escaped_str = php_addcslashes(escaped_str, 1, ZEND_STRL("\x0..\x1F\x7F..\xFF")); + + if (valid_len != len && (!eol_str || (str+valid_len) != eol_str)) { + php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected character '\\%03o' at pos %zu of '%s'", str[valid_len], valid_len, escaped_str->val); + } else if (eol_str) { + php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected end of line at pos %zu of '%s'", eol_str - str, escaped_str->val); + } else { + php_error_docref(NULL, E_WARNING, "Failed to parse headers: unexpected end of input at pos %zu of '%s'", len, escaped_str->val); + } + efree(escaped_str); +} + +php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg) +{ while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) { #if DBG_PARSER const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"}; @@ -108,6 +122,7 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b #endif switch (php_http_header_parser_state_pop(parser)) { case PHP_HTTP_HEADER_PARSER_STATE_FAILURE: + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse headers"); return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE); case PHP_HTTP_HEADER_PARSER_STATE_START: { @@ -130,22 +145,34 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b /* end of headers */ php_http_buffer_cut(buffer, 0, eol_len); php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_DONE); - } else if (php_http_info_parse(&parser->info, php_http_buffer_fix(buffer)->data TSRMLS_CC)) { + } else if (php_http_info_parse(&parser->info, php_http_buffer_fix(buffer)->data)) { /* new message starting with request/response line */ if (callback_func) { - callback_func(callback_arg, &headers, &parser->info TSRMLS_CC); + callback_func(callback_arg, &headers, &parser->info); } php_http_info_dtor(&parser->info); php_http_buffer_cut(buffer, 0, eol_str + eol_len - buffer->data); php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE); } else if ((colon = memchr(buffer->data, ':', buffer->used)) && (!eol_str || eol_str > colon)) { /* header: string */ - parser->_key.str = estrndup(buffer->data, parser->_key.len = colon - buffer->data); + size_t valid_len; + + parser->_key.len = colon - buffer->data; + parser->_key.str = estrndup(buffer->data, parser->_key.len); + + valid_len = strspn(parser->_key.str, PHP_HTTP_HEADER_NAME_CHARS); + if (valid_len != parser->_key.len) { + php_http_header_parser_error(valid_len, parser->_key.str, parser->_key.len, eol_str TSRMLS_CC); + PTR_SET(parser->_key.str, NULL); + return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE); + } while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r'); php_http_buffer_cut(buffer, 0, colon - buffer->data); php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE); - } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) { - /* neither reqeust/response line nor header: string */ + } else if (eol_str || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) { + /* neither reqeust/response line nor 'header:' string, or injected new line or NUL etc. */ + php_http_buffer_fix(buffer); + php_http_header_parser_error(strspn(buffer->data, PHP_HTTP_HEADER_NAME_CHARS), buffer->data, buffer->used, eol_str TSRMLS_CC); return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE); } else { /* keep feeding */ @@ -213,19 +240,30 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b case PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE: if (parser->_key.str && parser->_val.str) { - zval array, **exist; + zval tmp, *exist; + size_t valid_len = strlen(parser->_val.str); + + /* check for truncation */ + if (valid_len != parser->_val.len) { + php_http_header_parser_error(valid_len, parser->_val.str, parser->_val.len, NULL TSRMLS_CC); + + PTR_SET(parser->_key.str, NULL); + PTR_SET(parser->_val.str, NULL); + + return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE); + } if (!headers && callback_func) { - callback_func(callback_arg, &headers, NULL TSRMLS_CC); + callback_func(callback_arg, &headers, NULL); } - INIT_PZVAL_ARRAY(&array, headers); php_http_pretty_key(parser->_key.str, parser->_key.len, 1, 1); - if (SUCCESS == zend_symtable_find(headers, parser->_key.str, parser->_key.len + 1, (void *) &exist)) { - convert_to_array(*exist); - add_next_index_stringl(*exist, parser->_val.str, parser->_val.len, 0); + if ((exist = zend_symtable_str_find(headers, parser->_key.str, parser->_key.len))) { + convert_to_array(exist); + add_next_index_str(exist, php_http_cs2zs(parser->_val.str, parser->_val.len)); } else { - add_assoc_stringl_ex(&array, parser->_key.str, parser->_key.len + 1, parser->_val.str, parser->_val.len, 0); + ZVAL_STR(&tmp, php_http_cs2zs(parser->_val.str, parser->_val.len)); + zend_symtable_str_update(headers, parser->_key.str, parser->_key.len, &tmp); } parser->_val.str = NULL; } @@ -244,43 +282,87 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b return php_http_header_parser_state_is(parser); } +php_http_header_parser_state_t php_http_header_parser_parse_stream(php_http_header_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg) +{ + php_http_header_parser_state_t state = PHP_HTTP_HEADER_PARSER_STATE_START; + TSRMLS_FETCH_FROM_CTX(parser->ts); + + if (!buf->data) { + php_http_buffer_resize_ex(buf, 0x1000, 1, 0); + } + while (1) { + size_t justread = 0; +#if DBG_PARSER + const char *states[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"}; + fprintf(stderr, "#SHP: %s (f:%u)\n", states[state], flags); +#endif + /* resize if needed */ + if (buf->free < 0x1000) { + php_http_buffer_resize_ex(buf, 0x1000, 1, 0); + } + switch (state) { + case PHP_HTTP_HEADER_PARSER_STATE_FAILURE: + case PHP_HTTP_HEADER_PARSER_STATE_DONE: + return state; + + default: + /* read line */ + php_stream_get_line(s, buf->data + buf->used, buf->free, &justread); + /* if we fail reading a whole line, try a single char */ + if (!justread) { + int c = php_stream_getc(s); + + if (c != EOF) { + char s[1] = {c}; + justread = php_http_buffer_append(buf, s, 1); + } + } + php_http_buffer_account(buf, justread); + } + + if (justread) { + state = php_http_header_parser_parse(parser, buf, flags, headers, callback_func, callback_arg); + } else if (php_stream_eof(s)) { + return php_http_header_parser_parse(parser, buf, flags | PHP_HTTP_HEADER_PARSER_CLEANUP, headers, callback_func, callback_arg); + } else { + return state; + } + } + + return PHP_HTTP_HEADER_PARSER_STATE_DONE; +} zend_class_entry *php_http_header_parser_class_entry; static zend_object_handlers php_http_header_parser_object_handlers; -zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC) +zend_object *php_http_header_parser_object_new(zend_class_entry *ce) { - return php_http_header_parser_object_new_ex(ce, NULL, NULL TSRMLS_CC); + return &php_http_header_parser_object_new_ex(ce, NULL)->zo; } -zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC) +php_http_header_parser_object_t *php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser) { php_http_header_parser_object_t *o; - o = ecalloc(1, sizeof(php_http_header_parser_object_t)); - zend_object_std_init((zend_object *) o, ce TSRMLS_CC); - object_properties_init((zend_object *) o, ce); - - if (ptr) { - *ptr = o; - } + o = ecalloc(1, sizeof(php_http_header_parser_object_t) + zend_object_properties_size(ce)); + zend_object_std_init(&o->zo, ce); + object_properties_init(&o->zo, ce); if (parser) { o->parser = parser; } else { - o->parser = php_http_header_parser_init(NULL TSRMLS_CC); + o->parser = php_http_header_parser_init(NULL); } o->buffer = php_http_buffer_new(); - o->zv.handle = zend_objects_store_put((zend_object *) o, NULL, php_http_header_parser_object_free, NULL TSRMLS_CC); - o->zv.handlers = &php_http_header_parser_object_handlers; + o->zo.handlers = &php_http_header_parser_object_handlers; - return o->zv; + return o; } -void php_http_header_parser_object_free(void *object TSRMLS_DC) +void php_http_header_parser_object_free(zend_object *object) { - php_http_header_parser_object_t *o = (php_http_header_parser_object_t *) object; + php_http_header_parser_object_t *o = PHP_HTTP_OBJ(object, NULL); if (o->parser) { php_http_header_parser_free(&o->parser); @@ -288,15 +370,14 @@ void php_http_header_parser_object_free(void *object TSRMLS_DC) if (o->buffer) { php_http_buffer_free(&o->buffer); } - zend_object_std_dtor((zend_object *) o TSRMLS_CC); - efree(o); + zend_object_std_dtor(object); } ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0) ZEND_END_ARG_INFO(); static PHP_METHOD(HttpHeaderParser, getState) { - php_http_header_parser_object_t *parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC); + php_http_header_parser_object_t *parser_obj = PHP_HTTP_OBJ(NULL, getThis()); zend_parse_parameters_none(); /* always return the real state */ @@ -313,24 +394,53 @@ static PHP_METHOD(HttpHeaderParser, parse) php_http_header_parser_object_t *parser_obj; zval *zmsg; char *data_str; - int data_len; - long flags; + size_t data_len; + zend_long flags; - php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return); + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS(), "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return); + ZVAL_DEREF(zmsg); if (Z_TYPE_P(zmsg) != IS_ARRAY) { zval_dtor(zmsg); array_init(zmsg); } - parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC); + parser_obj = PHP_HTTP_OBJ(NULL, getThis()); php_http_buffer_append(parser_obj->buffer, data_str, data_len); RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL)); } +ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_stream, 0, 0, 3) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_INFO(0, flags) + ZEND_ARG_ARRAY_INFO(1, headers, 1) +ZEND_END_ARG_INFO(); +static PHP_METHOD(HttpHeaderParser, stream) +{ + php_http_header_parser_object_t *parser_obj; + zend_error_handling zeh; + zval *zmsg, *zstream; + php_stream *s; + zend_long flags; + + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS(), "rlz", &zstream, &flags, &zmsg), invalid_arg, return); + + zend_replace_error_handling(EH_THROW, php_http_exception_unexpected_val_class_entry, &zeh); + php_stream_from_zval(s, zstream); + zend_restore_error_handling(&zeh); + + ZVAL_DEREF(zmsg); + if (Z_TYPE_P(zmsg) != IS_ARRAY) { + zval_dtor(zmsg); + array_init(zmsg); + } + parser_obj = PHP_HTTP_OBJ(NULL, getThis()); + RETVAL_LONG(php_http_header_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, Z_ARRVAL_P(zmsg), NULL, NULL)); +} static zend_function_entry php_http_header_parser_methods[] = { PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC) PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC) + PHP_ME(HttpHeaderParser, stream, ai_HttpHeaderParser_stream, ZEND_ACC_PUBLIC) {NULL, NULL, NULL} }; @@ -339,20 +449,22 @@ PHP_MINIT_FUNCTION(http_header_parser) zend_class_entry ce; INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods); - php_http_header_parser_class_entry = zend_register_internal_class(&ce TSRMLS_CC); + php_http_header_parser_class_entry = zend_register_internal_class(&ce); memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new; + php_http_header_parser_object_handlers.offset = XtOffsetOf(php_http_header_parser_object_t, zo); php_http_header_parser_object_handlers.clone_obj = NULL; + php_http_header_parser_object_handlers.free_obj = php_http_header_parser_object_free; - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE TSRMLS_CC); - zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE); return SUCCESS; }