From: Michael Wallner Date: Sun, 15 Feb 2015 13:54:12 +0000 (+0100) Subject: expose header parser X-Git-Tag: RELEASE_2_3_0_RC1~24 X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=commitdiff_plain;h=629c7b270b047582160c87b7c688c2b942a75d60 expose header parser --- diff --git a/php_http.c b/php_http.c index f7a0b86..17d9925 100644 --- a/php_http.c +++ b/php_http.c @@ -141,6 +141,7 @@ PHP_MINIT_FUNCTION(http) || SUCCESS != PHP_MINIT_CALL(http_encoding) || SUCCESS != PHP_MINIT_CALL(http_filter) || SUCCESS != PHP_MINIT_CALL(http_header) + || SUCCESS != PHP_MINIT_CALL(http_header_parser) || SUCCESS != PHP_MINIT_CALL(http_message) || SUCCESS != PHP_MINIT_CALL(http_message_parser) || SUCCESS != PHP_MINIT_CALL(http_message_body) diff --git a/php_http_header_parser.c b/php_http_header_parser.c index df0837d..da02ff5 100644 --- a/php_http_header_parser.c +++ b/php_http_header_parser.c @@ -12,6 +12,10 @@ #include "php_http_api.h" +#ifndef DBG_PARSER +# define DBG_PARSER 0 +#endif + typedef struct php_http_header_parser_state_spec { php_http_header_parser_state_t state; unsigned need_data:1; @@ -21,7 +25,7 @@ static const php_http_header_parser_state_spec_t php_http_header_parser_states[] {PHP_HTTP_HEADER_PARSER_STATE_START, 1}, {PHP_HTTP_HEADER_PARSER_STATE_KEY, 1}, {PHP_HTTP_HEADER_PARSER_STATE_VALUE, 1}, - {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX, 1}, + {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX, 0}, {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE, 0}, {PHP_HTTP_HEADER_PARSER_STATE_DONE, 0} }; @@ -97,9 +101,9 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b TSRMLS_FETCH_FROM_CTX(parser->ts); while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) { -#if 0 - const char *state[] = {"START", "KEY", "VALUE", "HEADER_DONE", "DONE"}; - fprintf(stderr, "#HP: %s (avail:%zu, num:%d)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0); +#if DBG_PARSER + const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"}; + fprintf(stderr, "#HP: %s (avail:%zu, num:%d cleanup:%u)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0, flags); _dpf(0, buffer->data, buffer->used); #endif switch (php_http_header_parser_state_pop(parser)) { @@ -140,9 +144,12 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r'); php_http_buffer_cut(buffer, 0, colon - buffer->data); php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE); - } else { + } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) { /* neither reqeust/response line nor header: string */ return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE); + } else { + /* keep feeding */ + return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY); } break; } @@ -181,34 +188,26 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) { SET_ADD_VAL(eol_str - buffer->data, eol_len); - - if (buffer->used) { - if (*buffer->data != '\t' && *buffer->data != ' ') { - php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE); - break; - } else { - php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE); - break; - } - } - } - - if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) { + php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX); + } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) { if (buffer->used) { SET_ADD_VAL(buffer->used, 0); } php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE); } else { - return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX); + return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE); } break; } case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX: - if (*buffer->data == ' ' || *buffer->data == '\t') { + if (buffer->used && (*buffer->data == ' ' || *buffer->data == '\t')) { php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE); - } else { + } else if (buffer->used || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) { php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE); + } else { + /* keep feeding */ + return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX); } break; @@ -245,6 +244,119 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b return php_http_header_parser_state_is(parser); } + +zend_class_entry *php_http_header_parser_class_entry; +static zend_object_handlers php_http_header_parser_object_handlers; + +zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC) +{ + return php_http_header_parser_object_new_ex(ce, NULL, NULL TSRMLS_CC); +} + +zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC) +{ + php_http_header_parser_object_t *o; + + o = ecalloc(1, sizeof(php_http_header_parser_object_t)); + zend_object_std_init((zend_object *) o, ce TSRMLS_CC); + object_properties_init((zend_object *) o, ce); + + if (ptr) { + *ptr = o; + } + + if (parser) { + o->parser = parser; + } else { + o->parser = php_http_header_parser_init(NULL TSRMLS_CC); + } + o->buffer = php_http_buffer_new(); + + o->zv.handle = zend_objects_store_put((zend_object *) o, NULL, php_http_header_parser_object_free, NULL TSRMLS_CC); + o->zv.handlers = &php_http_header_parser_object_handlers; + + return o->zv; +} + +void php_http_header_parser_object_free(void *object TSRMLS_DC) +{ + php_http_header_parser_object_t *o = (php_http_header_parser_object_t *) object; + + if (o->parser) { + php_http_header_parser_free(&o->parser); + } + if (o->buffer) { + php_http_buffer_free(&o->buffer); + } + zend_object_std_dtor((zend_object *) o TSRMLS_CC); + efree(o); +} + +ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0) +ZEND_END_ARG_INFO(); +static PHP_METHOD(HttpHeaderParser, getState) +{ + php_http_header_parser_object_t *parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC); + + zend_parse_parameters_none(); + /* always return the real state */ + RETVAL_LONG(php_http_header_parser_state_is(parser_obj->parser)); +} + +ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_parse, 0, 0, 3) + ZEND_ARG_INFO(0, data) + ZEND_ARG_INFO(0, flags) + ZEND_ARG_ARRAY_INFO(1, headers, 1) +ZEND_END_ARG_INFO(); +static PHP_METHOD(HttpHeaderParser, parse) +{ + php_http_header_parser_object_t *parser_obj; + zval *zmsg; + char *data_str; + int data_len; + long flags; + + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return); + + if (Z_TYPE_P(zmsg) != IS_ARRAY) { + zval_dtor(zmsg); + array_init(zmsg); + } + parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC); + php_http_buffer_append(parser_obj->buffer, data_str, data_len); + RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL)); +} + + +static zend_function_entry php_http_header_parser_methods[] = { + PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC) + PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC) + {NULL, NULL, NULL} +}; + +PHP_MINIT_FUNCTION(http_header_parser) +{ + zend_class_entry ce; + + INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods); + php_http_header_parser_class_entry = zend_register_internal_class(&ce TSRMLS_CC); + memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); + php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new; + php_http_header_parser_object_handlers.clone_obj = NULL; + + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP TSRMLS_CC); + + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE TSRMLS_CC); + zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE TSRMLS_CC); + + return SUCCESS; +} + /* * Local variables: * tab-width: 4 diff --git a/php_http_header_parser.h b/php_http_header_parser.h index 4c60f6e..c4c83a9 100644 --- a/php_http_header_parser.h +++ b/php_http_header_parser.h @@ -51,6 +51,21 @@ PHP_HTTP_API void php_http_header_parser_dtor(php_http_header_parser_t *parser); PHP_HTTP_API void php_http_header_parser_free(php_http_header_parser_t **parser); PHP_HTTP_API php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg); +typedef struct php_http_header_parser_object { + zend_object zo; + zend_object_value zv; + php_http_buffer_t *buffer; + php_http_header_parser_t *parser; +} php_http_header_parser_object_t; + +PHP_HTTP_API zend_class_entry *php_http_header_parser_class_entry; + +PHP_MINIT_FUNCTION(http_header_parser); + +zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC); +zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC); +void php_http_header_parser_object_free(void *object TSRMLS_DC); + #endif /* PHP_HTTP_HEADER_PARSER_H */ /* diff --git a/php_http_message_body.c b/php_http_message_body.c index 84e84e4..6129caf 100644 --- a/php_http_message_body.c +++ b/php_http_message_body.c @@ -482,9 +482,11 @@ static size_t splitbody(void *opaque, char *buf, size_t len TSRMLS_DC) } if (!first_boundary) { + int st; /* this is not the first boundary, read rest of this message */ php_http_buffer_append(&arg->buf, buf, real_boundary - buf); - php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message); + st=php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message); + //fprintf(stderr, "1 st=%d\n",st); } /* move after the boundary */ @@ -524,9 +526,11 @@ static size_t splitbody(void *opaque, char *buf, size_t len TSRMLS_DC) /* let there be room for the next boundary */ if (len > arg->boundary_len) { + int st; consumed += len - arg->boundary_len; php_http_buffer_append(&arg->buf, buf, len - arg->boundary_len); - php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message); + st=php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message); + //fprintf(stderr, "2 st=%d\n", st); } arg->consumed += consumed; diff --git a/php_http_message_parser.c b/php_http_message_parser.c index ce2a515..3ecad86 100644 --- a/php_http_message_parser.c +++ b/php_http_message_parser.c @@ -23,7 +23,7 @@ typedef struct php_http_message_parser_state_spec { static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = { {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1}, - {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1}, + {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1}, @@ -181,6 +181,8 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me if (justread) { state = php_http_message_parser_parse(parser, buf, flags, message); + } else if (php_stream_eof(s)) { + return php_http_message_parser_parse(parser, buf, flags | PHP_HTTP_MESSAGE_PARSER_CLEANUP, message); } else { return state; } @@ -242,9 +244,10 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p break; default: - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); - if (buffer->used) { - return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER; + if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) { + return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); + } else { + php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE); } } break; diff --git a/tests/headerparser001.phpt b/tests/headerparser001.phpt new file mode 100644 index 0000000..0a1eb37 --- /dev/null +++ b/tests/headerparser001.phpt @@ -0,0 +1,61 @@ +--TEST-- +header parser +--SKIPIF-- + +--FILE-- +"FAILURE",0=>"START","KEY","VALUE","VALUE_EX","HEADER_DONE","DONE"]; +$parser = new http\Header\Parser; +do { + $state = $parser->parse($part = array_shift($headers), + $headers ? 0 : http\Header\Parser::CLEANUP, + $result); + printf("%2\$-32s | %1\$s\n", $states[$state], addcslashes($part, "\r\n\t\0")); +} while ($headers && $state !== http\Header\Parser::STATE_FAILURE); + +var_dump($result); + +?> +===DONE=== +--EXPECT-- +Test +One: | VALUE +header\n | VALUE_EX +Two: header\n\tlines\n | VALUE_EX +Three | KEY +: header\n lines\n here\n | VALUE_EX +More: than one header\n | VALUE_EX +More: | VALUE +than: | VALUE +you: | VALUE +expect\n | VALUE_EX +\n | DONE +array(4) { + ["One"]=> + string(6) "header" + ["Two"]=> + string(12) "header lines" + ["Three"]=> + string(17) "header lines here" + ["More"]=> + array(2) { + [0]=> + string(15) "than one header" + [1]=> + string(17) "than: you: expect" + } +} +===DONE===