expose header parser
authorMichael Wallner <mike@php.net>
Sun, 15 Feb 2015 13:54:12 +0000 (14:54 +0100)
committerMichael Wallner <mike@php.net>
Sun, 15 Feb 2015 13:54:12 +0000 (14:54 +0100)
php_http.c
php_http_header_parser.c
php_http_header_parser.h
php_http_message_body.c
php_http_message_parser.c
tests/headerparser001.phpt [new file with mode: 0644]

index f7a0b8698f03012f2d8460532fcdb37504ea5649..17d9925b5af8d3b01409b613aeb2dca69f76545b 100644 (file)
@@ -141,6 +141,7 @@ PHP_MINIT_FUNCTION(http)
        || SUCCESS != PHP_MINIT_CALL(http_encoding)
        || SUCCESS != PHP_MINIT_CALL(http_filter)
        || SUCCESS != PHP_MINIT_CALL(http_header)
        || SUCCESS != PHP_MINIT_CALL(http_encoding)
        || SUCCESS != PHP_MINIT_CALL(http_filter)
        || SUCCESS != PHP_MINIT_CALL(http_header)
+       || SUCCESS != PHP_MINIT_CALL(http_header_parser)
        || SUCCESS != PHP_MINIT_CALL(http_message)
        || SUCCESS != PHP_MINIT_CALL(http_message_parser)
        || SUCCESS != PHP_MINIT_CALL(http_message_body)
        || SUCCESS != PHP_MINIT_CALL(http_message)
        || SUCCESS != PHP_MINIT_CALL(http_message_parser)
        || SUCCESS != PHP_MINIT_CALL(http_message_body)
index df0837df2727430de080869760daa65f4f7b8969..da02ff5f765237b76ba79d477be637c588691f7c 100644 (file)
 
 #include "php_http_api.h"
 
 
 #include "php_http_api.h"
 
+#ifndef DBG_PARSER
+#      define DBG_PARSER 0
+#endif
+
 typedef struct php_http_header_parser_state_spec {
        php_http_header_parser_state_t state;
        unsigned need_data:1;
 typedef struct php_http_header_parser_state_spec {
        php_http_header_parser_state_t state;
        unsigned need_data:1;
@@ -21,7 +25,7 @@ static const php_http_header_parser_state_spec_t php_http_header_parser_states[]
                {PHP_HTTP_HEADER_PARSER_STATE_START,            1},
                {PHP_HTTP_HEADER_PARSER_STATE_KEY,                      1},
                {PHP_HTTP_HEADER_PARSER_STATE_VALUE,            1},
                {PHP_HTTP_HEADER_PARSER_STATE_START,            1},
                {PHP_HTTP_HEADER_PARSER_STATE_KEY,                      1},
                {PHP_HTTP_HEADER_PARSER_STATE_VALUE,            1},
-               {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX,         1},
+               {PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX,         0},
                {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE,      0},
                {PHP_HTTP_HEADER_PARSER_STATE_DONE,                     0}
 };
                {PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE,      0},
                {PHP_HTTP_HEADER_PARSER_STATE_DONE,                     0}
 };
@@ -97,9 +101,9 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b
        TSRMLS_FETCH_FROM_CTX(parser->ts);
 
        while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) {
        TSRMLS_FETCH_FROM_CTX(parser->ts);
 
        while (buffer->used || !php_http_header_parser_states[php_http_header_parser_state_is(parser)].need_data) {
-#if 0
-               const char *state[] = {"START", "KEY", "VALUE", "HEADER_DONE", "DONE"};
-               fprintf(stderr, "#HP: %s (avail:%zu, num:%d)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0);
+#if DBG_PARSER
+               const char *state[] = {"START", "KEY", "VALUE", "VALUE_EX", "HEADER_DONE", "DONE"};
+               fprintf(stderr, "#HP: %s (avail:%zu, num:%d cleanup:%u)\n", php_http_header_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_header_parser_state_is(parser)], buffer->used, headers?zend_hash_num_elements(headers):0, flags);
                _dpf(0, buffer->data, buffer->used);
 #endif
                switch (php_http_header_parser_state_pop(parser)) {
                _dpf(0, buffer->data, buffer->used);
 #endif
                switch (php_http_header_parser_state_pop(parser)) {
@@ -140,9 +144,12 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b
                                        while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r');
                                        php_http_buffer_cut(buffer, 0, colon - buffer->data);
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
                                        while (PHP_HTTP_IS_CTYPE(space, *++colon) && *colon != '\n' && *colon != '\r');
                                        php_http_buffer_cut(buffer, 0, colon - buffer->data);
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
-                               } else {
+                               } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
                                        /* neither reqeust/response line nor header: string */
                                        return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
                                        /* neither reqeust/response line nor header: string */
                                        return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_FAILURE);
+                               } else {
+                                       /* keep feeding */
+                                       return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_KEY);
                                }
                                break;
                        }
                                }
                                break;
                        }
@@ -181,34 +188,26 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b
 
                                if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
                                        SET_ADD_VAL(eol_str - buffer->data, eol_len);
 
                                if ((eol_str = php_http_locate_bin_eol(buffer->data, buffer->used, &eol_len))) {
                                        SET_ADD_VAL(eol_str - buffer->data, eol_len);
-
-                                       if (buffer->used) {
-                                               if (*buffer->data != '\t' && *buffer->data != ' ') {
-                                                       php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
-                                                       break;
-                                               } else {
-                                                       php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
-                                                       break;
-                                               }
-                                       }
-                               }
-
-                               if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
+                                       php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
+                               } else if (flags & PHP_HTTP_HEADER_PARSER_CLEANUP) {
                                        if (buffer->used) {
                                                SET_ADD_VAL(buffer->used, 0);
                                        }
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
                                } else {
                                        if (buffer->used) {
                                                SET_ADD_VAL(buffer->used, 0);
                                        }
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
                                } else {
-                                       return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
+                                       return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
                                }
                                break;
                        }
 
                        case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX:
                                }
                                break;
                        }
 
                        case PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX:
-                               if (*buffer->data == ' ' || *buffer->data == '\t') {
+                               if (buffer->used && (*buffer->data == ' ' || *buffer->data == '\t')) {
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE);
-                               } else {
+                               } else if (buffer->used || (flags & PHP_HTTP_HEADER_PARSER_CLEANUP)) {
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
                                        php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE);
+                               } else {
+                                       /* keep feeding */
+                                       return php_http_header_parser_state_push(parser, 1, PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX);
                                }
                                break;
 
                                }
                                break;
 
@@ -245,6 +244,119 @@ STATUS php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_b
        return php_http_header_parser_state_is(parser);
 }
 
        return php_http_header_parser_state_is(parser);
 }
 
+
+zend_class_entry *php_http_header_parser_class_entry;
+static zend_object_handlers php_http_header_parser_object_handlers;
+
+zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC)
+{
+       return php_http_header_parser_object_new_ex(ce, NULL, NULL TSRMLS_CC);
+}
+
+zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC)
+{
+       php_http_header_parser_object_t *o;
+
+       o = ecalloc(1, sizeof(php_http_header_parser_object_t));
+       zend_object_std_init((zend_object *) o, ce TSRMLS_CC);
+       object_properties_init((zend_object *) o, ce);
+
+       if (ptr) {
+               *ptr = o;
+       }
+
+       if (parser) {
+               o->parser = parser;
+       } else {
+               o->parser = php_http_header_parser_init(NULL TSRMLS_CC);
+       }
+       o->buffer = php_http_buffer_new();
+
+       o->zv.handle = zend_objects_store_put((zend_object *) o, NULL, php_http_header_parser_object_free, NULL TSRMLS_CC);
+       o->zv.handlers = &php_http_header_parser_object_handlers;
+
+       return o->zv;
+}
+
+void php_http_header_parser_object_free(void *object TSRMLS_DC)
+{
+       php_http_header_parser_object_t *o = (php_http_header_parser_object_t *) object;
+
+       if (o->parser) {
+               php_http_header_parser_free(&o->parser);
+       }
+       if (o->buffer) {
+               php_http_buffer_free(&o->buffer);
+       }
+       zend_object_std_dtor((zend_object *) o TSRMLS_CC);
+       efree(o);
+}
+
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_getState, 0, 0, 0)
+ZEND_END_ARG_INFO();
+static PHP_METHOD(HttpHeaderParser, getState)
+{
+       php_http_header_parser_object_t *parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
+
+       zend_parse_parameters_none();
+       /* always return the real state */
+       RETVAL_LONG(php_http_header_parser_state_is(parser_obj->parser));
+}
+
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpHeaderParser_parse, 0, 0, 3)
+       ZEND_ARG_INFO(0, data)
+       ZEND_ARG_INFO(0, flags)
+       ZEND_ARG_ARRAY_INFO(1, headers, 1)
+ZEND_END_ARG_INFO();
+static PHP_METHOD(HttpHeaderParser, parse)
+{
+       php_http_header_parser_object_t *parser_obj;
+       zval *zmsg;
+       char *data_str;
+       int data_len;
+       long flags;
+
+       php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "slz", &data_str, &data_len, &flags, &zmsg), invalid_arg, return);
+
+       if (Z_TYPE_P(zmsg) != IS_ARRAY) {
+               zval_dtor(zmsg);
+               array_init(zmsg);
+       }
+       parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
+       php_http_buffer_append(parser_obj->buffer, data_str, data_len);
+       RETVAL_LONG(php_http_header_parser_parse(parser_obj->parser, parser_obj->buffer, flags, Z_ARRVAL_P(zmsg), NULL, NULL));
+}
+
+
+static zend_function_entry php_http_header_parser_methods[] = {
+               PHP_ME(HttpHeaderParser, getState, ai_HttpHeaderParser_getState, ZEND_ACC_PUBLIC)
+               PHP_ME(HttpHeaderParser, parse, ai_HttpHeaderParser_parse, ZEND_ACC_PUBLIC)
+               {NULL, NULL, NULL}
+};
+
+PHP_MINIT_FUNCTION(http_header_parser)
+{
+       zend_class_entry ce;
+
+       INIT_NS_CLASS_ENTRY(ce, "http\\Header", "Parser", php_http_header_parser_methods);
+       php_http_header_parser_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
+       memcpy(&php_http_header_parser_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
+       php_http_header_parser_class_entry->create_object = php_http_header_parser_object_new;
+       php_http_header_parser_object_handlers.clone_obj = NULL;
+
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("CLEANUP"), PHP_HTTP_HEADER_PARSER_CLEANUP TSRMLS_CC);
+
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_FAILURE"), PHP_HTTP_HEADER_PARSER_STATE_FAILURE TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_START"), PHP_HTTP_HEADER_PARSER_STATE_START TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_KEY"), PHP_HTTP_HEADER_PARSER_STATE_KEY TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE"), PHP_HTTP_HEADER_PARSER_STATE_VALUE TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_VALUE_EX"), PHP_HTTP_HEADER_PARSER_STATE_VALUE_EX TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_HEADER_DONE"), PHP_HTTP_HEADER_PARSER_STATE_HEADER_DONE TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_header_parser_class_entry, ZEND_STRL("STATE_DONE"), PHP_HTTP_HEADER_PARSER_STATE_DONE TSRMLS_CC);
+
+       return SUCCESS;
+}
+
 /*
  * Local variables:
  * tab-width: 4
 /*
  * Local variables:
  * tab-width: 4
index 4c60f6ef634b4b262eb9228855588816b19419d2..c4c83a9421a7ad87fb7b9f2ba2176578f24e1233 100644 (file)
@@ -51,6 +51,21 @@ PHP_HTTP_API void php_http_header_parser_dtor(php_http_header_parser_t *parser);
 PHP_HTTP_API void php_http_header_parser_free(php_http_header_parser_t **parser);
 PHP_HTTP_API php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg);
 
 PHP_HTTP_API void php_http_header_parser_free(php_http_header_parser_t **parser);
 PHP_HTTP_API php_http_header_parser_state_t php_http_header_parser_parse(php_http_header_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, HashTable *headers, php_http_info_callback_t callback_func, void *callback_arg);
 
+typedef struct php_http_header_parser_object {
+       zend_object zo;
+       zend_object_value zv;
+       php_http_buffer_t *buffer;
+       php_http_header_parser_t *parser;
+} php_http_header_parser_object_t;
+
+PHP_HTTP_API zend_class_entry *php_http_header_parser_class_entry;
+
+PHP_MINIT_FUNCTION(http_header_parser);
+
+zend_object_value php_http_header_parser_object_new(zend_class_entry *ce TSRMLS_DC);
+zend_object_value php_http_header_parser_object_new_ex(zend_class_entry *ce, php_http_header_parser_t *parser, php_http_header_parser_object_t **ptr TSRMLS_DC);
+void php_http_header_parser_object_free(void *object TSRMLS_DC);
+
 #endif /* PHP_HTTP_HEADER_PARSER_H */
 
 /*
 #endif /* PHP_HTTP_HEADER_PARSER_H */
 
 /*
index 84e84e4abece2361aef9bd0727dd87bd9153b3b0..6129caff1a9bc4eb2e4af7171eb267f87f8ed196 100644 (file)
@@ -482,9 +482,11 @@ static size_t splitbody(void *opaque, char *buf, size_t len TSRMLS_DC)
                        }
 
                        if (!first_boundary) {
                        }
 
                        if (!first_boundary) {
+                               int st;
                                /* this is not the first boundary, read rest of this message */
                                php_http_buffer_append(&arg->buf, buf, real_boundary - buf);
                                /* this is not the first boundary, read rest of this message */
                                php_http_buffer_append(&arg->buf, buf, real_boundary - buf);
-                               php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message);
+                               st=php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message);
+                               //fprintf(stderr, "1 st=%d\n",st);
                        }
 
                        /* move after the boundary */
                        }
 
                        /* move after the boundary */
@@ -524,9 +526,11 @@ static size_t splitbody(void *opaque, char *buf, size_t len TSRMLS_DC)
 
        /* let there be room for the next boundary */
        if (len > arg->boundary_len) {
 
        /* let there be room for the next boundary */
        if (len > arg->boundary_len) {
+               int st;
                consumed += len - arg->boundary_len;
                php_http_buffer_append(&arg->buf, buf, len - arg->boundary_len);
                consumed += len - arg->boundary_len;
                php_http_buffer_append(&arg->buf, buf, len - arg->boundary_len);
-               php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message);
+               st=php_http_message_parser_parse(arg->parser, &arg->buf, 0, &arg->parser->message);
+               //fprintf(stderr, "2 st=%d\n", st);
        }
 
        arg->consumed += consumed;
        }
 
        arg->consumed += consumed;
index ce2a515b26e59174c0140e8dcfb90bea7c3c2a8a..3ecad86d0ef23e4f75662972d29f81e3ac6a85c5 100644 (file)
@@ -23,7 +23,7 @@ typedef struct php_http_message_parser_state_spec {
 
 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
                {PHP_HTTP_MESSAGE_PARSER_STATE_START,                   1},
 
 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
                {PHP_HTTP_MESSAGE_PARSER_STATE_START,                   1},
-               {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER,                  1},
+               {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER,                  0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE,             0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY,                    0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB,               1},
                {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE,             0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY,                    0},
                {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB,               1},
@@ -181,6 +181,8 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me
 
                if (justread) {
                        state = php_http_message_parser_parse(parser, buf, flags, message);
 
                if (justread) {
                        state = php_http_message_parser_parse(parser, buf, flags, message);
+               } else if (php_stream_eof(s)) {
+                       return php_http_message_parser_parse(parser, buf, flags | PHP_HTTP_MESSAGE_PARSER_CLEANUP, message);
                } else  {
                        return state;
                }
                } else  {
                        return state;
                }
@@ -242,9 +244,10 @@ php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_p
                                                break;
 
                                        default:
                                                break;
 
                                        default:
-                                               php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
-                                               if (buffer->used) {
-                                                       return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
+                                               if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) {
+                                                       return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
+                                               } else {
+                                                       php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
                                                }
                                }
                                break;
                                                }
                                }
                                break;
diff --git a/tests/headerparser001.phpt b/tests/headerparser001.phpt
new file mode 100644 (file)
index 0000000..0a1eb37
--- /dev/null
@@ -0,0 +1,61 @@
+--TEST--
+header parser
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$headers = [
+       "One: ","header\n",
+       "Two: header\n\tlines\n",
+       "Three",": header\n lines\n here\n",
+       "More: than one header\n",
+       "More: ", "than: ", "you: ", "expect\n",
+       "\n",
+];
+
+$states = [-1=>"FAILURE",0=>"START","KEY","VALUE","VALUE_EX","HEADER_DONE","DONE"];
+$parser = new http\Header\Parser;
+do {
+       $state = $parser->parse($part = array_shift($headers), 
+               $headers ? 0 : http\Header\Parser::CLEANUP, 
+               $result);
+       printf("%2\$-32s | %1\$s\n", $states[$state], addcslashes($part, "\r\n\t\0"));
+} while ($headers && $state !== http\Header\Parser::STATE_FAILURE);
+
+var_dump($result);
+
+?>
+===DONE===
+--EXPECT--
+Test
+One:                             | VALUE
+header\n                         | VALUE_EX
+Two: header\n\tlines\n           | VALUE_EX
+Three                            | KEY
+: header\n lines\n here\n        | VALUE_EX
+More: than one header\n          | VALUE_EX
+More:                            | VALUE
+than:                            | VALUE
+you:                             | VALUE
+expect\n                         | VALUE_EX
+\n                               | DONE
+array(4) {
+  ["One"]=>
+  string(6) "header"
+  ["Two"]=>
+  string(12) "header lines"
+  ["Three"]=>
+  string(17) "header lines here"
+  ["More"]=>
+  array(2) {
+    [0]=>
+    string(15) "than one header"
+    [1]=>
+    string(17) "than: you: expect"
+  }
+}
+===DONE===