X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=php_http_message_parser.c;h=92c8e4073aad391bd5f25bb9c2be32605167af88;hp=2438fc13e31aaa427a88d0e21403e565c7568d2c;hb=468e8d748d365811af4ce890fd8fc4c1f88cc08a;hpb=16ecea28f2bbcf20d69dedc6611a959923720656 diff --git a/php_http_message_parser.c b/php_http_message_parser.c index 2438fc1..92c8e40 100644 --- a/php_http_message_parser.c +++ b/php_http_message_parser.c @@ -23,19 +23,20 @@ typedef struct php_http_message_parser_state_spec { static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = { {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1}, - {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1}, + {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0}, + {PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0} }; #if DBG_PARSER const char *php_http_message_parser_state_name(php_http_message_parser_state_t state) { - const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"}; + const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "UPDATE_CL", "DONE"}; if (state < 0 || state > (sizeof(states)/sizeof(char*))-1) { return "FAILURE"; @@ -58,19 +59,21 @@ php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_ php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...) { - php_http_message_parser_state_t state; + php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE; va_list va_args; unsigned i; - /* short circuit */ - ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc); + if (argc > 0) { + /* short circuit */ + ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc); - va_start(va_args, argc); - for (i = 0; i < argc; ++i) { - state = va_arg(va_args, php_http_message_parser_state_t); - zend_ptr_stack_push(&parser->stack, (void *) state); + va_start(va_args, argc); + for (i = 0; i < argc; ++i) { + state = va_arg(va_args, php_http_message_parser_state_t); + zend_ptr_stack_push(&parser->stack, (void *) state); + } + va_end(va_args); } - va_end(va_args); return state; } @@ -120,18 +123,30 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me if (!buf->data) { php_http_buffer_resize_ex(buf, 0x1000, 1, 0); } - - while (!php_stream_eof(s)) { + while (1) { size_t justread = 0; #if DBG_PARSER fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags); #endif + /* resize if needed */ + if (buf->free < 0x1000) { + php_http_buffer_resize_ex(buf, 0x1000, 1, 0); + } switch (state) { case PHP_HTTP_MESSAGE_PARSER_STATE_START: case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER: case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE: /* read line */ php_stream_get_line(s, buf->data + buf->used, buf->free, &justread); + /* if we fail reading a whole line, try a single char */ + if (!justread) { + int c = php_stream_getc(s); + + if (c != EOF) { + char s[1] = {c}; + justread = php_http_buffer_append(buf, s, 1); + } + } php_http_buffer_account(buf, justread); break; @@ -166,6 +181,7 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me case PHP_HTTP_MESSAGE_PARSER_STATE_BODY: case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE: + case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL: /* should not occur */ abort(); break; @@ -177,7 +193,9 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me if (justread) { state = php_http_message_parser_parse(parser, buf, flags, message); - } else { + } else if (php_stream_eof(s)) { + return php_http_message_parser_parse(parser, buf, flags | PHP_HTTP_MESSAGE_PARSER_CLEANUP, message); + } else { return state; } } @@ -237,9 +255,10 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p break; default: - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); - if (buffer->used) { - return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER; + if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) { + return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); + } else { + php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE); } } break; @@ -247,26 +266,39 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE: { - zval h, *h_loc = NULL, *h_con = NULL, *h_cl, *h_cr, *h_te, *h_ce; + zval h, *h_ptr, *h_loc = NULL, *h_con = NULL, *h_ce; + zend_bool chunked = 0; + zend_long content_length = -1; + zend_string *content_range = NULL; + + /* Content-Range has higher precedence than Content-Length, + * and content-length denotes the original length of the entity, + * so let's *NOT* remove CR/CL, because that would fundamentally + * change the meaning of the whole message + */ + if ((h_ptr = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding")))) { + zend_string *zs = zval_get_string(h_ptr); + + chunked = zend_string_equals_literal(zs, "chunked"); + zend_string_release(zs); - if ((h_te = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding")))) { - Z_TRY_ADDREF_P(h_te); - zend_hash_str_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", lenof("X-Original-Transfer-Encoding"), h_te); + Z_TRY_ADDREF_P(h_ptr); + zend_hash_str_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", lenof("X-Original-Transfer-Encoding"), h_ptr); zend_hash_str_del(&(*message)->hdrs, "Transfer-Encoding", lenof("Transfer-Encoding")); - } - if ((h_cl = php_http_message_header(*message, ZEND_STRL("Content-Length")))) { - Z_TRY_ADDREF_P(h_cl); - zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Length", lenof("X-Original-Content-Length"), h_cl); - } - if ((h_cr = php_http_message_header(*message, ZEND_STRL("Content-Range")))) { - Z_TRY_ADDREF_P(h_cr); - zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), h_cr); - zend_hash_str_del(&(*message)->hdrs, "Content-Range", lenof("Content-Range")); + + /* reset */ + ZVAL_LONG(&h, 0); + zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &h); + } else if ((h_ptr = php_http_message_header(*message, ZEND_STRL("Content-Length")))) { + content_length = zval_get_long(h_ptr); + Z_TRY_ADDREF_P(h_ptr); + zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Length", lenof("X-Original-Content-Length"), h_ptr); } - /* default */ - ZVAL_LONG(&h, 0); - zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &h); + if ((content_range = php_http_message_header_string(*message, ZEND_STRL("Content-Range")))) { + ZVAL_STR_COPY(&h, content_range); + zend_hash_str_update(&(*message)->hdrs, "Content-Range", lenof("Content-Range"), &h); + } /* so, if curl sees a 3xx code, a Location header and a Connection:close header * it decides not to read the response body. @@ -277,24 +309,22 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"))) && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"))) ) { - if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) { + zend_string *con = zval_get_string(h_con); + + if (php_http_match(con->val, "close", PHP_HTTP_MATCH_WORD)) { + zend_string_release(con); php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE); - zval_ptr_dtor(h_loc); - zval_ptr_dtor(h_con); break; } - } - if (h_loc) { - zval_ptr_dtor(h_loc); - } - if (h_con) { - zval_ptr_dtor(h_con); + zend_string_release(con); } if ((h_ce = php_http_message_header(*message, ZEND_STRL("Content-Encoding")))) { - if (php_http_match(Z_STRVAL_P(h_ce), "gzip", PHP_HTTP_MATCH_WORD) - || php_http_match(Z_STRVAL_P(h_ce), "x-gzip", PHP_HTTP_MATCH_WORD) - || php_http_match(Z_STRVAL_P(h_ce), "deflate", PHP_HTTP_MATCH_WORD) + zend_string *ce = zval_get_string(h_ce); + + if (php_http_match(ce->val, "gzip", PHP_HTTP_MATCH_WORD) + || php_http_match(ce->val, "x-gzip", PHP_HTTP_MATCH_WORD) + || php_http_match(ce->val, "deflate", PHP_HTTP_MATCH_WORD) ) { if (parser->inflate) { php_http_encoding_stream_reset(&parser->inflate); @@ -305,47 +335,29 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Encoding", lenof("X-Original-Content-Encoding"), h_ce); zend_hash_str_del(&(*message)->hdrs, "Content-Encoding", lenof("Content-Encoding")); } + zend_string_release(ce); } if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) { php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB); } else { - if (h_te) { - if (strstr(Z_STRVAL_P(h_te), "chunked")) { - parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0); - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED); - break; - } - } - - if (h_cl) { - char *stop; - - if (Z_TYPE_P(h_cl) == IS_STRING) { - parser->body_length = strtoul(Z_STRVAL_P(h_cl), &stop, 10); - - if (stop != Z_STRVAL_P(h_cl)) { - php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); - break; - } - } else if (Z_TYPE_P(h_cl) == IS_LONG) { - parser->body_length = Z_LVAL_P(h_cl); - php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); - break; - } + if (chunked) { + parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0); + php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED); + break; } - if (h_cr) { + if (content_range) { ulong total = 0, start = 0, end = 0; - if (!strncasecmp(Z_STRVAL_P(h_cr), "bytes", lenof("bytes")) - && ( Z_STRVAL_P(h_cr)[lenof("bytes")] == ':' - || Z_STRVAL_P(h_cr)[lenof("bytes")] == ' ' - || Z_STRVAL_P(h_cr)[lenof("bytes")] == '=' + if (!strncasecmp(content_range->val, "bytes", lenof("bytes")) + && ( content_range->val[lenof("bytes")] == ':' + || content_range->val[lenof("bytes")] == ' ' + || content_range->val[lenof("bytes")] == '=' ) ) { char *total_at = NULL, *end_at = NULL; - char *start_at = Z_STRVAL_P(h_cr) + sizeof("bytes"); + char *start_at = content_range->val + sizeof("bytes"); start = strtoul(start_at, &end_at, 10); if (end_at) { @@ -354,15 +366,23 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p total = strtoul(total_at + 1, NULL, 10); } - if (end >= start && (!total || end < total)) { + if (end >= start && (!total || end <= total)) { parser->body_length = end + 1 - start; php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); + zend_string_release(content_range); break; } } } + + zend_string_release(content_range); } + if (content_length >= 0) { + parser->body_length = content_length; + php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); + break; + } if ((*message)->type == PHP_HTTP_REQUEST) { php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE); @@ -376,8 +396,6 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p case PHP_HTTP_MESSAGE_PARSER_STATE_BODY: { if (len) { - zval zcl; - if (parser->inflate) { char *dec_str = NULL; size_t dec_len; @@ -394,10 +412,6 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p } php_stream_write(php_http_message_body_stream((*message)->body), str, len); - - /* keep track */ - ZVAL_LONG(&zcl, php_http_message_body_size((*message)->body)); - zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &zcl); } if (cut) { @@ -470,7 +484,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p { php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE); - if (parser->dechunk) { + if (parser->dechunk && parser->dechunk->ctx) { char *dec_str = NULL; size_t dec_len; @@ -483,14 +497,24 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p str = dec_str; len = dec_len; cut = 0; - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY); + php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, PHP_HTTP_MESSAGE_PARSER_STATE_BODY); } } break; } - case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: { + case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL: + { + zval zcl; + + ZVAL_LONG(&zcl, php_http_message_body_size((*message)->body)); + zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &zcl); + break; + } + + case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: + { char *ptr = buffer->data; while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) { @@ -522,7 +546,7 @@ php_http_message_parser_object_t *php_http_message_parser_object_new_ex(zend_cla { php_http_message_parser_object_t *o; - o = ecalloc(1, sizeof(php_http_message_parser_object_t) + (ce->default_properties_count - 1) * sizeof(zval)); + o = ecalloc(1, sizeof(*o) + zend_object_properties_size(ce)); zend_object_std_init(&o->zo, ce); object_properties_init(&o->zo, ce); @@ -578,7 +602,9 @@ static PHP_METHOD(HttpMessageParser, parse) php_http_buffer_append(&parser_obj->buffer, data_str, data_len); RETVAL_LONG(php_http_message_parser_parse(parser_obj->parser, &parser_obj->buffer, flags, &parser_obj->parser->message)); + ZVAL_DEREF(zmsg); zval_dtor(zmsg); + ZVAL_NULL(zmsg); if (parser_obj->parser->message) { php_http_message_t *msg_cpy = php_http_message_copy(parser_obj->parser->message, NULL); php_http_message_object_t *msg_obj = php_http_message_object_new_ex(php_http_message_class_entry, msg_cpy); @@ -608,7 +634,9 @@ static PHP_METHOD(HttpMessageParser, stream) parser_obj = PHP_HTTP_OBJ(NULL, getThis()); RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, &parser_obj->buffer, s, flags, &parser_obj->parser->message)); + ZVAL_DEREF(zmsg); zval_dtor(zmsg); + ZVAL_NULL(zmsg); if (parser_obj->parser->message) { php_http_message_t *msg_cpy = php_http_message_copy(parser_obj->parser->message, NULL); php_http_message_object_t *msg_obj = php_http_message_object_new_ex(php_http_message_class_entry, msg_cpy); @@ -649,6 +677,7 @@ PHP_MINIT_FUNCTION(http_message_parser) zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_LENGTH"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_CHUNKED"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE); + zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_UPDATE_CL"), PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_DONE); return SUCCESS;