release 2.4.0
[m6w6/ext-http] / php_http_message_parser.c
index 0d11bf6aed153a2843ec87d6c5af85ae57021e07..3b55efbab7b295a9cd210fa57cb409769a3a97e6 100644 (file)
@@ -23,19 +23,20 @@ typedef struct php_http_message_parser_state_spec {
 
 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
                {PHP_HTTP_MESSAGE_PARSER_STATE_START,                   1},
-               {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER,                  1},
+               {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER,                  0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE,             0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY,                    0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB,               1},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH,             1},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED,    1},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE,               0},
+               {PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL,               0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_DONE,                    0}
 };
 
 #if DBG_PARSER
 const char *php_http_message_parser_state_name(php_http_message_parser_state_t state) {
-       const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
+       const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "UPDATE_CL", "DONE"};
        
        if (state < 0 || state > (sizeof(states)/sizeof(char*))-1) {
                return "FAILURE";
@@ -115,71 +116,91 @@ void php_http_message_parser_free(php_http_message_parser_t **parser)
        }
 }
 
-php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message)
+php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, php_http_message_t **message)
 {
-       php_http_buffer_t buf;
        php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_START;
        TSRMLS_FETCH_FROM_CTX(parser->ts);
 
-       php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
-
-       while (!php_stream_eof(s)) {
-               size_t len = 0;
+       if (!buf->data) {
+               php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
+       }
+       while (1) {
+               size_t justread = 0;
 #if DBG_PARSER
                fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags);
 #endif
+               /* resize if needed */
+               if (buf->free < 0x1000) {
+                       php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
+               }
                switch (state) {
                        case PHP_HTTP_MESSAGE_PARSER_STATE_START:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
                                /* read line */
-                               php_stream_get_line(s, buf.data + buf.used, buf.free, &len);
-                               php_http_buffer_account(&buf, len);
+                               php_stream_get_line(s, buf->data + buf->used, buf->free, &justread);
+                               /* if we fail reading a whole line, try a single char */
+                               if (!justread) {
+                                       int c = php_stream_getc(s);
+
+                                       if (c != EOF) {
+                                               char s[1] = {c};
+                                               justread = php_http_buffer_append(buf, s, 1);
+                                       }
+                               }
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
                                /* read all */
-                               php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free));
+                               justread = php_stream_read(s, buf->data + buf->used, buf->free);
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
                                /* read body_length */
-                               php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length)));
+                               justread = php_stream_read(s, buf->data + buf->used, MIN(buf->free, parser->body_length));
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
                                /* duh, this is very naive */
-                               if (len) {
-                                       size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free));
+                               if (parser->body_length) {
+                                       justread = php_stream_read(s, buf->data + buf->used, MIN(parser->body_length, buf->free));
 
-                                       php_http_buffer_account(&buf, read);
+                                       php_http_buffer_account(buf, justread);
 
-                                       len -= read;
+                                       parser->body_length -= justread;
                                } else {
-                                       php_http_buffer_resize(&buf, 24);
-                                       php_stream_get_line(s, buf.data, buf.free, &len);
-                                       php_http_buffer_account(&buf, len);
+                                       php_http_buffer_resize(buf, 24);
+                                       php_stream_get_line(s, buf->data, buf->free, &justread);
+                                       php_http_buffer_account(buf, justread);
 
-                                       len = strtoul(buf.data + buf.used - len, NULL, 16);
+                                       parser->body_length = strtoul(buf->data + buf->used - justread, NULL, 16);
                                }
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
+                       case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL:
                                /* should not occur */
                                abort();
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
-                               php_http_buffer_dtor(&buf);
                                return php_http_message_parser_state_is(parser);
                }
 
-               state = php_http_message_parser_parse(parser, &buf, flags, message);
+               if (justread) {
+                       state = php_http_message_parser_parse(parser, buf, flags, message);
+               } else if (php_stream_eof(s)) {
+                       return php_http_message_parser_parse(parser, buf, flags | PHP_HTTP_MESSAGE_PARSER_CLEANUP, message);
+               } else {
+                       return state;
+               }
        }
 
-       php_http_buffer_dtor(&buf);
        return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
 }
 
@@ -236,9 +257,10 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                break;
 
                                        default:
-                                               php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
-                                               if (buffer->used) {
-                                                       return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
+                                               if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) {
+                                                       return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
+                                               } else {
+                                                       php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
                                                }
                                }
                                break;
@@ -248,23 +270,32 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                        {
                                zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
 
+                               /* Content-Range has higher precedence than Content-Length,
+                                * and content-length denotes the original length of the entity,
+                                * so let's *NOT* remove CR/CL, because that would fundamentally
+                                * change the meaning of the whole message
+                                */
                                if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
-                                       zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
+                                       zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), (void *) &h, sizeof(zval *), (void *) &h_te);
                                        zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
+
+                                       /* reset */
+                                       MAKE_STD_ZVAL(h);
+                                       ZVAL_LONG(h, 0);
+                                       zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), (void *) &h, sizeof(zval *), NULL);
+                               } else if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
+                                       zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), (void *) &h, sizeof(zval *), (void *) &h_cl);
                                }
-                               if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
-                                       zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
-                               }
+
                                if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
-                                       zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
-                                       zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
+                                       zend_hash_find(&(*message)->hdrs, ZEND_STRS("Content-Range"), (void *) &h_cr);
+                                       if (h != *h_cr) {
+                                               zend_hash_update(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
+                                       } else {
+                                               zval_ptr_dtor(&h);
+                                       }
                                }
 
-                               /* default */
-                               MAKE_STD_ZVAL(h);
-                               ZVAL_LONG(h, 0);
-                               zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
-
                                /* so, if curl sees a 3xx code, a Location header and a Connection:close header
                                 * it decides not to read the response body.
                                 */
@@ -316,30 +347,13 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                }
                                        }
 
-                                       if (h_cl) {
-                                               char *stop;
-
-                                               if (Z_TYPE_PP(h_cl) == IS_STRING) {
-                                                       parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
-
-                                                       if (stop != Z_STRVAL_PP(h_cl)) {
-                                                               php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
-                                                               break;
-                                                       }
-                                               } else if (Z_TYPE_PP(h_cl) == IS_LONG) {
-                                                       parser->body_length = Z_LVAL_PP(h_cl);
-                                                       php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
-                                                       break;
-                                               }
-                                       }
-
                                        if (h_cr) {
                                                ulong total = 0, start = 0, end = 0;
 
                                                if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
-                                               && (    Z_STRVAL_P(h)[lenof("bytes")] == ':'
-                                                       ||      Z_STRVAL_P(h)[lenof("bytes")] == ' '
-                                                       ||      Z_STRVAL_P(h)[lenof("bytes")] == '='
+                                               && (    Z_STRVAL_PP(h_cr)[lenof("bytes")] == ':'
+                                                       ||      Z_STRVAL_PP(h_cr)[lenof("bytes")] == ' '
+                                                       ||      Z_STRVAL_PP(h_cr)[lenof("bytes")] == '='
                                                        )
                                                ) {
                                                        char *total_at = NULL, *end_at = NULL;
@@ -352,7 +366,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                                        total = strtoul(total_at + 1, NULL, 10);
                                                                }
 
-                                                               if (end >= start && (!total || end < total)) {
+                                                               if (end >= start && (!total || end <= total)) {
                                                                        parser->body_length = end + 1 - start;
                                                                        php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
                                                                        break;
@@ -361,6 +375,22 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                }
                                        }
 
+                                       if (h_cl) {
+                                               char *stop;
+
+                                               if (Z_TYPE_PP(h_cl) == IS_STRING) {
+                                                       parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
+
+                                                       if (stop != Z_STRVAL_PP(h_cl)) {
+                                                               php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
+                                                               break;
+                                                       }
+                                               } else if (Z_TYPE_PP(h_cl) == IS_LONG) {
+                                                       parser->body_length = Z_LVAL_PP(h_cl);
+                                                       php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
+                                                       break;
+                                               }
+                                       }
 
                                        if ((*message)->type == PHP_HTTP_REQUEST) {
                                                php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
@@ -374,8 +404,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
                        {
                                if (len) {
-                                       zval *zcl;
-
+                                       /* FIXME: what if we re-use the parser? */
                                        if (parser->inflate) {
                                                char *dec_str = NULL;
                                                size_t dec_len;
@@ -385,7 +414,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                }
 
                                                if (str != buffer->data) {
-                                                       STR_FREE(str);
+                                                       PTR_FREE(str);
                                                }
                                                str = dec_str;
                                                len = dec_len;
@@ -393,10 +422,6 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
 
                                        php_stream_write(php_http_message_body_stream((*message)->body), str, len);
 
-                                       /* keep track */
-                                       MAKE_STD_ZVAL(zcl);
-                                       ZVAL_LONG(zcl, php_http_message_body_size((*message)->body));
-                                       zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
                                }
 
                                if (cut) {
@@ -404,7 +429,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                }
 
                                if (str != buffer->data) {
-                                       STR_FREE(str);
+                                       PTR_FREE(str);
                                }
 
                                str = NULL;
@@ -469,7 +494,7 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                        {
                                php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
 
-                               if (parser->dechunk) {
+                               if (parser->dechunk && parser->dechunk->ctx) {
                                        char *dec_str = NULL;
                                        size_t dec_len;
 
@@ -482,14 +507,24 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                str = dec_str;
                                                len = dec_len;
                                                cut = 0;
-                                               php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
+                                               php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
                                        }
                                }
 
                                break;
                        }
 
-                       case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
+                       case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL:
+                       {
+                               zval *zcl;
+                               MAKE_STD_ZVAL(zcl);
+                               ZVAL_LONG(zcl, php_http_message_body_size((*message)->body));
+                               zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
+                               break;
+                       }
+
+                       case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
+                       {
                                char *ptr = buffer->data;
 
                                while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
@@ -588,8 +623,8 @@ static PHP_METHOD(HttpMessageParser, parse)
 
        zval_dtor(zmsg);
        if (parser_obj->parser->message) {
-                       ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0);
-               }
+               ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0);
+       }
 }
 
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpMessageParser_stream, 0, 0, 3)
@@ -612,7 +647,7 @@ static PHP_METHOD(HttpMessageParser, stream)
        zend_restore_error_handling(&zeh TSRMLS_CC);
 
        parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
-       RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, s, flags, &parser_obj->parser->message));
+       RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, &parser_obj->parser->message));
 
        zval_dtor(zmsg);
        if (parser_obj->parser->message) {
@@ -651,6 +686,7 @@ PHP_MINIT_FUNCTION(http_message_parser)
        zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_LENGTH"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH TSRMLS_CC);
        zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_CHUNKED"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED TSRMLS_CC);
        zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_BODY_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_UPDATE_CL"), PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL TSRMLS_CC);
        zend_declare_class_constant_long(php_http_message_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_MESSAGE_PARSER_STATE_DONE TSRMLS_CC);
 
        return SUCCESS;