X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=php_http_message_parser.c;h=3b55efbab7b295a9cd210fa57cb409769a3a97e6;hp=0d11bf6aed153a2843ec87d6c5af85ae57021e07;hb=refs%2Fheads%2Fv2.4.x;hpb=e7e1fe60a6bd2a521767d87e6d21920fcdad6c1d diff --git a/php_http_message_parser.c b/php_http_message_parser.c index 0d11bf6..3b55efb 100644 --- a/php_http_message_parser.c +++ b/php_http_message_parser.c @@ -23,19 +23,20 @@ typedef struct php_http_message_parser_state_spec { static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = { {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1}, - {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1}, + {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1}, {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0}, + {PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, 0}, {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0} }; #if DBG_PARSER const char *php_http_message_parser_state_name(php_http_message_parser_state_t state) { - const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"}; + const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "UPDATE_CL", "DONE"}; if (state < 0 || state > (sizeof(states)/sizeof(char*))-1) { return "FAILURE"; @@ -115,71 +116,91 @@ void php_http_message_parser_free(php_http_message_parser_t **parser) } } -php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message) +php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, php_http_message_t **message) { - php_http_buffer_t buf; php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_START; TSRMLS_FETCH_FROM_CTX(parser->ts); - php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC); - - while (!php_stream_eof(s)) { - size_t len = 0; + if (!buf->data) { + php_http_buffer_resize_ex(buf, 0x1000, 1, 0); + } + while (1) { + size_t justread = 0; #if DBG_PARSER fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags); #endif + /* resize if needed */ + if (buf->free < 0x1000) { + php_http_buffer_resize_ex(buf, 0x1000, 1, 0); + } switch (state) { case PHP_HTTP_MESSAGE_PARSER_STATE_START: case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER: case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE: /* read line */ - php_stream_get_line(s, buf.data + buf.used, buf.free, &len); - php_http_buffer_account(&buf, len); + php_stream_get_line(s, buf->data + buf->used, buf->free, &justread); + /* if we fail reading a whole line, try a single char */ + if (!justread) { + int c = php_stream_getc(s); + + if (c != EOF) { + char s[1] = {c}; + justread = php_http_buffer_append(buf, s, 1); + } + } + php_http_buffer_account(buf, justread); break; case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB: /* read all */ - php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free)); + justread = php_stream_read(s, buf->data + buf->used, buf->free); + php_http_buffer_account(buf, justread); break; case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH: /* read body_length */ - php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length))); + justread = php_stream_read(s, buf->data + buf->used, MIN(buf->free, parser->body_length)); + php_http_buffer_account(buf, justread); break; case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED: /* duh, this is very naive */ - if (len) { - size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free)); + if (parser->body_length) { + justread = php_stream_read(s, buf->data + buf->used, MIN(parser->body_length, buf->free)); - php_http_buffer_account(&buf, read); + php_http_buffer_account(buf, justread); - len -= read; + parser->body_length -= justread; } else { - php_http_buffer_resize(&buf, 24); - php_stream_get_line(s, buf.data, buf.free, &len); - php_http_buffer_account(&buf, len); + php_http_buffer_resize(buf, 24); + php_stream_get_line(s, buf->data, buf->free, &justread); + php_http_buffer_account(buf, justread); - len = strtoul(buf.data + buf.used - len, NULL, 16); + parser->body_length = strtoul(buf->data + buf->used - justread, NULL, 16); } break; case PHP_HTTP_MESSAGE_PARSER_STATE_BODY: case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE: + case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL: /* should not occur */ abort(); break; case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE: - php_http_buffer_dtor(&buf); return php_http_message_parser_state_is(parser); } - state = php_http_message_parser_parse(parser, &buf, flags, message); + if (justread) { + state = php_http_message_parser_parse(parser, buf, flags, message); + } else if (php_stream_eof(s)) { + return php_http_message_parser_parse(parser, buf, flags | PHP_HTTP_MESSAGE_PARSER_CLEANUP, message); + } else { + return state; + } } - php_http_buffer_dtor(&buf); return PHP_HTTP_MESSAGE_PARSER_STATE_DONE; } @@ -236,9 +257,10 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p break; default: - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); - if (buffer->used) { - return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER; + if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) { + return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER); + } else { + php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE); } } break; @@ -248,23 +270,32 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p { zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL; + /* Content-Range has higher precedence than Content-Length, + * and content-length denotes the original length of the entity, + * so let's *NOT* remove CR/CL, because that would fundamentally + * change the meaning of the whole message + */ if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) { - zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te); + zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), (void *) &h, sizeof(zval *), (void *) &h_te); zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding")); + + /* reset */ + MAKE_STD_ZVAL(h); + ZVAL_LONG(h, 0); + zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), (void *) &h, sizeof(zval *), NULL); + } else if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) { + zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), (void *) &h, sizeof(zval *), (void *) &h_cl); } - if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) { - zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl); - } + if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) { - zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr); - zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range")); + zend_hash_find(&(*message)->hdrs, ZEND_STRS("Content-Range"), (void *) &h_cr); + if (h != *h_cr) { + zend_hash_update(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"), &h, sizeof(zval *), (void *) &h_cr); + } else { + zval_ptr_dtor(&h); + } } - /* default */ - MAKE_STD_ZVAL(h); - ZVAL_LONG(h, 0); - zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL); - /* so, if curl sees a 3xx code, a Location header and a Connection:close header * it decides not to read the response body. */ @@ -316,30 +347,13 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p } } - if (h_cl) { - char *stop; - - if (Z_TYPE_PP(h_cl) == IS_STRING) { - parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10); - - if (stop != Z_STRVAL_PP(h_cl)) { - php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); - break; - } - } else if (Z_TYPE_PP(h_cl) == IS_LONG) { - parser->body_length = Z_LVAL_PP(h_cl); - php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); - break; - } - } - if (h_cr) { ulong total = 0, start = 0, end = 0; if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes")) - && ( Z_STRVAL_P(h)[lenof("bytes")] == ':' - || Z_STRVAL_P(h)[lenof("bytes")] == ' ' - || Z_STRVAL_P(h)[lenof("bytes")] == '=' + && ( Z_STRVAL_PP(h_cr)[lenof("bytes")] == ':' + || Z_STRVAL_PP(h_cr)[lenof("bytes")] == ' ' + || Z_STRVAL_PP(h_cr)[lenof("bytes")] == '=' ) ) { char *total_at = NULL, *end_at = NULL; @@ -352,7 +366,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p total = strtoul(total_at + 1, NULL, 10); } - if (end >= start && (!total || end < total)) { + if (end >= start && (!total || end <= total)) { parser->body_length = end + 1 - start; php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); break; @@ -361,6 +375,22 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p } } + if (h_cl) { + char *stop; + + if (Z_TYPE_PP(h_cl) == IS_STRING) { + parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10); + + if (stop != Z_STRVAL_PP(h_cl)) { + php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); + break; + } + } else if (Z_TYPE_PP(h_cl) == IS_LONG) { + parser->body_length = Z_LVAL_PP(h_cl); + php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH); + break; + } + } if ((*message)->type == PHP_HTTP_REQUEST) { php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE); @@ -374,8 +404,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p case PHP_HTTP_MESSAGE_PARSER_STATE_BODY: { if (len) { - zval *zcl; - + /* FIXME: what if we re-use the parser? */ if (parser->inflate) { char *dec_str = NULL; size_t dec_len; @@ -385,7 +414,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p } if (str != buffer->data) { - STR_FREE(str); + PTR_FREE(str); } str = dec_str; len = dec_len; @@ -393,10 +422,6 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p php_stream_write(php_http_message_body_stream((*message)->body), str, len); - /* keep track */ - MAKE_STD_ZVAL(zcl); - ZVAL_LONG(zcl, php_http_message_body_size((*message)->body)); - zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL); } if (cut) { @@ -404,7 +429,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p } if (str != buffer->data) { - STR_FREE(str); + PTR_FREE(str); } str = NULL; @@ -469,7 +494,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p { php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE); - if (parser->dechunk) { + if (parser->dechunk && parser->dechunk->ctx) { char *dec_str = NULL; size_t dec_len; @@ -482,14 +507,24 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p str = dec_str; len = dec_len; cut = 0; - php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY); + php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, PHP_HTTP_MESSAGE_PARSER_STATE_BODY); } } break; } - case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: { + case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL: + { + zval *zcl; + MAKE_STD_ZVAL(zcl); + ZVAL_LONG(zcl, php_http_message_body_size((*message)->body)); + zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL); + break; + } + + case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: + { char *ptr = buffer->data; while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) { @@ -588,8 +623,8 @@ static PHP_METHOD(HttpMessageParser, parse) zval_dtor(zmsg); if (parser_obj->parser->message) { - ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0); - } + ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0); + } } ZEND_BEGIN_ARG_INFO_EX(ai_HttpMessageParser_stream, 0, 0, 3) @@ -612,7 +647,7 @@ static PHP_METHOD(HttpMessageParser, stream) zend_restore_error_handling(&zeh TSRMLS_CC); parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC); - RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, s, flags, &parser_obj->parser->message)); + RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, &parser_obj->parser->message)); zval_dtor(zmsg); if (parser_obj->parser->message) { @@ -651,6 +686,7 @@ PHP_MINIT_FUNCTION(http_message_parser) zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_LENGTH"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH TSRMLS_CC); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_CHUNKED"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED TSRMLS_CC); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE TSRMLS_CC); + zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_UPDATE_CL"), PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL TSRMLS_CC); zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_DONE TSRMLS_CC); return SUCCESS;