Merge branch 'master' of git.php.net:/pecl/http/pecl_http
authorRemi Collet <remi@php.net>
Fri, 7 Nov 2014 06:55:46 +0000 (07:55 +0100)
committerRemi Collet <remi@php.net>
Fri, 7 Nov 2014 06:55:46 +0000 (07:55 +0100)
* 'master' of git.php.net:/pecl/http/pecl_http: (31 commits)
  use our url parser
  back to dev
  prepare 2.1.4
  Fixed bug #66891 (Unexpected HTTP 401 after NTLM authentication)
  Fixed bug #68149 (duplicate content-length with libcurl < 7.23)
  Fixed bug #68353 (QsoSSL support removed in libcurl 7.39)
  fix long/int mismatch
  update missing tests
  test ::PARSE_TOPCT
  strip comments
  prepare for using all constants together
  simplify
  separate url and parser state
  fix edge cases with @
  brain wrecked
  more explicit constant names
  path should be forgotten
  fix query&fragment; add pctenc parser option
  simplify
  update
  ...

30 files changed:
config9.m4
gen_curlinfo.php [changed mode: 0644->0755]
gen_utf8.php [new file with mode: 0755]
package.xml
php_http_api.h
php_http_client_curl.c
php_http_env.c
php_http_env.h
php_http_env_response.c
php_http_message.c
php_http_message_parser.c
php_http_message_parser.h
php_http_misc.c
php_http_url.c
php_http_url.h
php_http_utf8.h [new file with mode: 0644]
php_http_version.c
tests/bug66891.phpt [new file with mode: 0644]
tests/bug67932.phpt [new file with mode: 0644]
tests/messageparser002.phpt [new file with mode: 0644]
tests/urlparser001.phpt [new file with mode: 0644]
tests/urlparser002.phpt [new file with mode: 0644]
tests/urlparser003.phpt [new file with mode: 0644]
tests/urlparser004.phpt [new file with mode: 0644]
tests/urlparser005.phpt [new file with mode: 0644]
tests/urlparser006.phpt [new file with mode: 0644]
tests/urlparser007.phpt [new file with mode: 0644]
tests/urlparser008.phpt [new file with mode: 0644]
tests/urlparser009.phpt [new file with mode: 0644]
tests/urlparser010.phpt [new file with mode: 0644]

index 686b5ea286aaaca9349956f228a55c45eeef4457..3b1f7015554d1683469836e9548efd85b2eb0a08 100644 (file)
@@ -10,6 +10,8 @@ PHP_ARG_WITH([http-libcurl-dir], [],
 [  --with-http-libcurl-dir[=DIR]  HTTP: where to find libcurl], $PHP_HTTP, $PHP_HTTP)
 PHP_ARG_WITH([http-libevent-dir], [],
 [  --with-http-libevent-dir[=DIR] HTTP: where to find libevent], $PHP_HTTP_LIBCURL_DIR, "")
+PHP_ARG_WITH([http-libidn-dir], [],
+[  --with-http-libidn-dir=[=DIR]  HTTP: where to find libidn], $PHP_HTTP_LIBCURL_DIR, "")
 
 if test "$PHP_HTTP" != "no"; then
 
@@ -96,10 +98,36 @@ dnl ----
 dnl STDC
 dnl ----
        AC_TYPE_OFF_T
+       AC_TYPE_MBSTATE_T
        dnl getdomainname() is declared in netdb.h on some platforms: AIX, OSF
-       AC_CHECK_HEADERS([netdb.h unistd.h])
+       AC_CHECK_HEADERS([netdb.h unistd.h wchar.h wctype.h arpa/inet.h])
        PHP_CHECK_FUNC(gethostname, nsl)
        PHP_CHECK_FUNC(getdomainname, nsl)
+       PHP_CHECK_FUNC(mbrtowc)
+       PHP_CHECK_FUNC(mbtowc)
+       PHP_CHECK_FUNC(iswalnum)
+       PHP_CHECK_FUNC(inet_pton)
+
+dnl ----
+dnl IDN
+dnl ----
+
+       AC_MSG_CHECKING([for idna.h])
+       IDNA_DIR=
+       for i in "$PHP_HTTP_LIBIDN_DIR" "$IDN_DIR" /usr/local /usr /opt; do
+               if test -f "$i/include/idna.h"; then
+                       IDNA_DIR=$i
+                       break;
+               fi
+       done
+       if test "x$IDNA_DIR" = "x"; then
+               AC_MSG_RESULT([not found])
+       else
+               AC_MSG_RESULT([found in $IDNA_DIR])
+               AC_DEFINE([PHP_HTTP_HAVE_IDN], [1], [Have libidn support])
+               PHP_ADD_INCLUDE($IDNA_DIR/include)
+               PHP_ADD_LIBRARY_WITH_PATH(idn, $IDNA_DIR/$PHP_LIBDIR, HTTP_SHARED_LIBADD)
+       fi
 
 dnl ----
 dnl ZLIB
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/gen_utf8.php b/gen_utf8.php
new file mode 100755 (executable)
index 0000000..865a2f5
--- /dev/null
@@ -0,0 +1,90 @@
+#!/usr/bin/env php
+<?php
+
+error_reporting(E_ALL);
+set_error_handler(function($c, $e, $f, $l) {
+       throw new Exception("$e in $f on line $l");
+});
+
+$i18n = $argc >= 2 ? $argv[1] : "/usr/share/i18n/locales/i18n";
+
+$f = fopen($i18n, "r");
+$c = false;
+$a = false;
+
+ob_start(null, 0xffff);
+while (!feof($f)) {
+       $line = fgets($f);
+       if (!$c && $line !== "LC_CTYPE\n") {
+               continue;
+       }
+       $c = true;
+       if ($line === "END LC_CTYPE\n") {
+               break;
+       }
+       switch($line{0}) {
+       case "%":
+               break;
+       case "\n":
+               if ($a) {
+                       break 2;
+               }
+               break;
+       case " ":
+               if ($a) {
+                       foreach (explode(";", trim($line, "\n/ ;")) as $ranges) {
+                               $range = explode("..", $ranges);
+                               $step = 0;
+                               $end = 0;
+                               switch (count($range)) {
+                               case 3:
+                                       list($sstart, $sstep, $send) = $range;
+                                       sscanf($sstart, "<U%X>", $start);
+                                       sscanf($sstep, "(%d)", $step);
+                                       sscanf($send, "<U%X>", $end);
+
+                                       break;
+                               case 2:
+                                       list($sstart, $send) = $range;
+                                       $step = 1;
+                                       sscanf($sstart, "<U%X>", $start);
+                                       sscanf($send, "<U%X>", $end);
+                                       break;
+                               case 1:
+                                       list($sstart) = $range;
+                                       sscanf($sstart, "<U%X>", $start);
+                                       break;
+                               }
+                               print "\t{";
+                               if ($start >= 0xffff) {
+                                       printf("0x%08X, ", $start);
+                                       if ($end) {
+                                               printf("0x%08X, ", $end);
+                                       } else {
+                                               print("         0, ");
+                                       }
+                               } else {
+                                       printf("    0x%04X, ", $start);
+                                       if ($end) {
+                                               printf("    0x%04X, ", $end);
+                                       } else {
+                                               print("         0, ");
+                                       }
+                               }
+                               printf("%d},\n", $step);
+                       }
+               }
+               break;
+       default:
+               if ($a) {
+                       break 2;
+               } elseif ($line === "alpha /\n") {
+                       $a = true;
+               }
+               break;
+       }
+}
+
+file_put_contents("php_http_utf8.h",
+       preg_replace('/(\/\* BEGIN::UTF8TABLE \*\/\n).*(\n\s*\/\* END::UTF8TABLE \*\/)/s', '$1'. ob_get_contents() .'$2',
+               file_get_contents("php_http_utf8.h")));
index 528ca8a960631f6d72682f67b3ce2687e89e514e..c02cad139fb2e098ac73917d020602004954bc18 100644 (file)
@@ -51,6 +51,8 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
 - var_dump(http\Message) no longer automatically creates an empty body
 + Added http\Message\Parser class
 + Made http\Client::once() and http\Client::wait() available when using events
++ Added http\Url::parse() method
++ Added http\Url::PARSE_MBLOC, http\Url::PARSE_MBUTF8, http\Url::PARSE_TOIDN and http\Url::PARSE_TOPCT constants
 ]]></notes>
  <contents>
   <dir name="/">
@@ -122,6 +124,7 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
    <file role="src" name="php_http_strlist.h"/>
    <file role="src" name="php_http_url.c"/>
    <file role="src" name="php_http_url.h"/>
+   <file role="src" name="php_http_utf8.h"/>
    <file role="src" name="php_http_version.c"/>
    <file role="src" name="php_http_version.h"/>
 
@@ -137,6 +140,7 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
     </dir>
      <file role="test" name="bug61444.phpt"/>
      <file role="test" name="bug66388.phpt"/>
+     <file role="test" name="bug67932.phpt"/>
      <file role="test" name="client001.phpt"/>
      <file role="test" name="client002.phpt"/>
      <file role="test" name="client003.phpt"/>
@@ -152,6 +156,7 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
      <file role="test" name="client013.phpt"/>
      <file role="test" name="client014.phpt"/>
      <file role="test" name="client015.phpt"/>
+     <file role="test" name="client016.phpt"/>
      <file role="test" name="clientrequest001.phpt"/>
      <file role="test" name="clientrequest002.phpt"/>
      <file role="test" name="clientrequest003.phpt"/>
@@ -251,6 +256,7 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
      <file role="test" name="messagebody009.phpt"/>
      <file role="test" name="messagebody010.phpt"/>
      <file role="test" name="messageparser001.phpt"/>
+     <file role="test" name="messageparser002.phpt"/>
      <file role="test" name="negotiate001.phpt"/>
      <file role="test" name="params001.phpt"/>
      <file role="test" name="params002.phpt"/>
@@ -271,11 +277,21 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
      <file role="test" name="querystring001.phpt"/>
      <file role="test" name="querystring002.phpt"/>
      <file role="test" name="serialize001.phpt"/>
+     <file role="test" name="url001.phpt"/>
      <file role="test" name="url002.phpt"/>
      <file role="test" name="url003.phpt"/>
      <file role="test" name="url004.phpt"/>
      <file role="test" name="url005.phpt"/>
-     <file role="test" name="url001.phpt"/>
+     <file role="test" name="urlparser001.phpt"/>
+     <file role="test" name="urlparser002.phpt"/>
+     <file role="test" name="urlparser003.phpt"/>
+     <file role="test" name="urlparser004.phpt"/>
+     <file role="test" name="urlparser005.phpt"/>
+     <file role="test" name="urlparser006.phpt"/>
+     <file role="test" name="urlparser007.phpt"/>
+     <file role="test" name="urlparser008.phpt"/>
+     <file role="test" name="urlparser009.phpt"/>
+     <file role="test" name="urlparser010.phpt"/>
      <file role="test" name="version001.phpt"/>
    </dir>
   </dir>
@@ -304,14 +320,11 @@ v2: http://dev.iworks.at/ext-http/lcov/ext/http/
   <optional>
    <extension><name>hash</name></extension>
    <extension><name>iconv</name></extension>
+   <extension><name>json</name></extension>
   </optional>
  </dependencies>
  <providesextension>http</providesextension>
  <extsrcrelease>
-  <configureoption 
-   name="with-http"
-   prompt="Enable extended HTTP support"
-   default="yes" />
   <configureoption
    name="with-http-zlib-dir"
    prompt="where to find zlib"
index 0e65ccb6315f3c9483de784e1abd8cd23909449b..5bddb0c42e670a69fe0bdc6049614b97953354f5 100644 (file)
@@ -68,6 +68,10 @@ typedef int STATUS;
 #      endif
 #endif
 
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTYPE_H) && defined(HAVE_ISWALNUM) && (defined(HAVE_MBRTOWC) || defined(HAVE_MBTOWC))
+#      define PHP_HTTP_HAVE_WCHAR 1
+#endif
+
 #include <ctype.h>
 #define PHP_HTTP_IS_CTYPE(type, c) is##type((int) (unsigned char) (c))
 #define PHP_HTTP_TO_CTYPE(type, c) to##type((int) (unsigned char) (c))
index 4056e9260ab9dbfb261c8be5ab04f13cb3c99b36..48bd0deb02789810778ef25840fc73aa1d6dcd8f 100644 (file)
@@ -282,7 +282,7 @@ static int php_http_curle_raw_callback(CURL *ch, curl_infotype type, char *data,
                        } else if (php_memnstr(data, ZEND_STRL("Operation timed out"), data + length)) {
                                h->progress.info = "timeout";
                        } else {
-#if PHP_DEBUG
+#if 0
                                h->progress.info = data;
                                data[length - 1] = '\0';
 #endif
@@ -539,9 +539,11 @@ static STATUS php_http_curle_get_info(CURL *ch, HashTable *info)
                        case CURLSSLBACKEND_NSS:
                                backend = "nss";
                                break;
+#if !PHP_HTTP_CURL_VERSION(7,39,0)
                        case CURLSSLBACKEND_QSOSSL:
                                backend = "qsossl";
                                break;
+#endif
                        case CURLSSLBACKEND_GSKIT:
                                backend = "gskit";
                                break;
@@ -903,7 +905,7 @@ static STATUS php_http_curle_option_set_lastmodified(php_http_option_t *opt, zva
                                return FAILURE;
                        }
                } else {
-                       if (CURLE_OK != curl_easy_setopt(ch, CURLOPT_TIMEVALUE, (long) PHP_HTTP_G->env.request.time + Z_LVAL_P(val))) {
+                       if (CURLE_OK != curl_easy_setopt(ch, CURLOPT_TIMEVALUE, (long) sapi_get_request_time(TSRMLS_C) + Z_LVAL_P(val))) {
                                return FAILURE;
                        }
                }
@@ -1632,15 +1634,25 @@ static STATUS php_http_client_curl_handler_prepare(php_http_client_curl_handler_
        php_http_message_update_headers(msg);
        if (zend_hash_num_elements(&msg->hdrs)) {
                php_http_array_hashkey_t header_key = php_http_array_hashkey_init(0);
-               zval **header_val;
+               zval **header_val, *header_cpy;
                HashPosition pos;
                php_http_buffer_t header;
+#if !PHP_HTTP_CURL_VERSION(7,23,0)
+               zval **ct = NULL;
+
+               zend_hash_find(&msg->hdrs, ZEND_STRS("Content-Length"), (void *) &ct);
+#endif
 
                php_http_buffer_init(&header);
                FOREACH_HASH_KEYVAL(pos, &msg->hdrs, header_key, header_val) {
                        if (header_key.type == HASH_KEY_IS_STRING) {
-                               zval *header_cpy = php_http_ztyp(IS_STRING, *header_val);
-
+#if !PHP_HTTP_CURL_VERSION(7,23,0)
+                               /* avoid duplicate content-length header */
+                               if (ct && *ct == *header_val) {
+                                       continue;
+                               }
+#endif
+                               header_cpy = php_http_ztyp(IS_STRING, *header_val);
                                php_http_buffer_appendf(&header, "%s: %s", header_key.str, Z_STRVAL_P(header_cpy));
                                php_http_buffer_fix(&header);
                                curl->options.headers = curl_slist_append(curl->options.headers, header.data);
index 10d54fe4019a5a0472ffc95b1c6bfe1758964279..2969d1a4fbe5b19bb0e5daf354e022ccabd948e2 100644 (file)
@@ -15,8 +15,6 @@
 
 PHP_RINIT_FUNCTION(http_env)
 {
-       PHP_HTTP_G->env.request.time = sapi_get_request_time(TSRMLS_C);
-
        /* populate form data on non-POST requests */
        if (SG(request_info).request_method && strcasecmp(SG(request_info).request_method, "POST") && SG(request_info).content_type && *SG(request_info).content_type) {
                uint ct_len = strlen(SG(request_info).content_type);
index ee1afe92feca016dc5de2e63e0b2d1a3e012d430..7556c3efb68e77e4de0129d1ab18b1e0aeffe3aa 100644 (file)
@@ -21,7 +21,6 @@ struct php_http_env_globals {
        char *etag_mode;
 
        struct {
-               time_t time;
                HashTable *headers;
                php_http_message_body_t *body;
        } request;
index c155dde3263bde5cba07c77ec108a4f7abd80077..13a35c6af7054803de8760f6cc688ee2745ab954 100644 (file)
@@ -329,6 +329,17 @@ static STATUS php_http_env_response_send_head(php_http_env_response_t *r, php_ht
                return ret;
        }
 
+       if ((zoption = get_option(options, ZEND_STRL("headers") TSRMLS_CC))) {
+               if (Z_TYPE_P(zoption) == IS_ARRAY) {
+                       php_http_header_to_callback(Z_ARRVAL_P(zoption), 0, (php_http_pass_format_callback_t) r->ops->set_header, r TSRMLS_CC);
+               }
+               zval_ptr_dtor(&zoption);
+       }
+
+       if (ret != SUCCESS) {
+               return ret;
+       }
+
        if ((zoption = get_option(options, ZEND_STRL("responseCode") TSRMLS_CC))) {
                zval *zoption_copy = php_http_ztyp(IS_LONG, zoption);
 
@@ -359,17 +370,6 @@ static STATUS php_http_env_response_send_head(php_http_env_response_t *r, php_ht
                return ret;
        }
 
-       if ((zoption = get_option(options, ZEND_STRL("headers") TSRMLS_CC))) {
-               if (Z_TYPE_P(zoption) == IS_ARRAY) {
-                       php_http_header_to_callback(Z_ARRVAL_P(zoption), 0, (php_http_pass_format_callback_t) r->ops->set_header, r TSRMLS_CC);
-               }
-               zval_ptr_dtor(&zoption);
-       }
-
-       if (ret != SUCCESS) {
-               return ret;
-       }
-
        if ((zoption = get_option(options, ZEND_STRL("contentType") TSRMLS_CC))) {
                zval *zoption_copy = php_http_ztyp(IS_STRING, zoption);
 
index 3141065f8353466ff9edb9003a0400397daa2ed3..c96531748c166ad8c86407e7d4c278650055fcad 100644 (file)
@@ -1041,13 +1041,15 @@ static PHP_METHOD(HttpMessage, __construct)
 
                if (s && php_http_message_parser_init(&p TSRMLS_CC)) {
                        unsigned flags = (greedy ? PHP_HTTP_MESSAGE_PARSER_GREEDY : 0);
+                       php_http_buffer_t buf;
 
-                       if (PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE == php_http_message_parser_parse_stream(&p, s, flags, &msg)) {
+                       php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
+                       if (PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE == php_http_message_parser_parse_stream(&p, &buf, s, flags, &msg)) {
                                if (!EG(exception)) {
                                        php_http_throw(bad_message, "Could not parse message from stream", NULL);
                                }
                        }
-
+                       php_http_buffer_dtor(&buf);
                        php_http_message_parser_dtor(&p);
                }
 
index 0d11bf6aed153a2843ec87d6c5af85ae57021e07..e73360eed1920ffb57a740d32fa629bca7d41ad4 100644 (file)
@@ -115,16 +115,16 @@ void php_http_message_parser_free(php_http_message_parser_t **parser)
        }
 }
 
-php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message)
+php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_http_buffer_t *buf, php_stream *s, unsigned flags, php_http_message_t **message)
 {
-       php_http_buffer_t buf;
        php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_START;
        TSRMLS_FETCH_FROM_CTX(parser->ts);
 
-       php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
-
+       if (!buf->data) {
+               php_http_buffer_resize_ex(buf, 0x1000, 1, 0);
+       }
        while (!php_stream_eof(s)) {
-               size_t len = 0;
+               size_t justread = 0;
 #if DBG_PARSER
                fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags);
 #endif
@@ -133,34 +133,36 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me
                        case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
                                /* read line */
-                               php_stream_get_line(s, buf.data + buf.used, buf.free, &len);
-                               php_http_buffer_account(&buf, len);
+                               php_stream_get_line(s, buf->data + buf->used, buf->free, &justread);
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
                                /* read all */
-                               php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free));
+                               justread = php_stream_read(s, buf->data + buf->used, buf->free);
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
                                /* read body_length */
-                               php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length)));
+                               justread = php_stream_read(s, buf->data + buf->used, MIN(buf->free, parser->body_length));
+                               php_http_buffer_account(buf, justread);
                                break;
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
                                /* duh, this is very naive */
-                               if (len) {
-                                       size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free));
+                               if (parser->body_length) {
+                                       justread = php_stream_read(s, buf->data + buf->used, MIN(parser->body_length, buf->free));
 
-                                       php_http_buffer_account(&buf, read);
+                                       php_http_buffer_account(buf, justread);
 
-                                       len -= read;
+                                       parser->body_length -= justread;
                                } else {
-                                       php_http_buffer_resize(&buf, 24);
-                                       php_stream_get_line(s, buf.data, buf.free, &len);
-                                       php_http_buffer_account(&buf, len);
+                                       php_http_buffer_resize(buf, 24);
+                                       php_stream_get_line(s, buf->data, buf->free, &justread);
+                                       php_http_buffer_account(buf, justread);
 
-                                       len = strtoul(buf.data + buf.used - len, NULL, 16);
+                                       parser->body_length = strtoul(buf->data + buf->used - justread, NULL, 16);
                                }
                                break;
 
@@ -172,14 +174,16 @@ php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_me
 
                        case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
                        case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
-                               php_http_buffer_dtor(&buf);
                                return php_http_message_parser_state_is(parser);
                }
 
-               state = php_http_message_parser_parse(parser, &buf, flags, message);
+               if (justread) {
+                       state = php_http_message_parser_parse(parser, buf, flags, message);
+               } else  {
+                       return state;
+               }
        }
 
-       php_http_buffer_dtor(&buf);
        return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
 }
 
@@ -588,8 +592,8 @@ static PHP_METHOD(HttpMessageParser, parse)
 
        zval_dtor(zmsg);
        if (parser_obj->parser->message) {
-                       ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0);
-               }
+               ZVAL_OBJVAL(zmsg, php_http_message_object_new_ex(php_http_message_class_entry, php_http_message_copy(parser_obj->parser->message, NULL), NULL TSRMLS_CC), 0);
+       }
 }
 
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpMessageParser_stream, 0, 0, 3)
@@ -612,7 +616,7 @@ static PHP_METHOD(HttpMessageParser, stream)
        zend_restore_error_handling(&zeh TSRMLS_CC);
 
        parser_obj = zend_object_store_get_object(getThis() TSRMLS_CC);
-       RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, s, flags, &parser_obj->parser->message));
+       RETVAL_LONG(php_http_message_parser_parse_stream(parser_obj->parser, parser_obj->buffer, s, flags, &parser_obj->parser->message));
 
        zval_dtor(zmsg);
        if (parser_obj->parser->message) {
index 5b04351678e430151e9102de2f40ec86e94bcd10..c2bee1544713ac84c031ff9b2fd9a9bccd9c1352 100644 (file)
@@ -54,7 +54,7 @@ PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_pop(p
 PHP_HTTP_API void php_http_message_parser_dtor(php_http_message_parser_t *parser);
 PHP_HTTP_API void php_http_message_parser_free(php_http_message_parser_t **parser);
 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message);
-PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message);
+PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_http_buffer_t *buffer, php_stream *s, unsigned flags, php_http_message_t **message);
 
 typedef struct php_http_message_parser_object {
        zend_object zo;
index 8fcb82e2449f5f4a2dbb982b5a50a3e1ef60061f..51072cf53da8e59e6578849c28a31bf0d28edb91 100644 (file)
@@ -115,7 +115,7 @@ char *php_http_pretty_key(register char *key, size_t key_len, zend_bool uctitle,
 
 size_t php_http_boundary(char *buf, size_t buf_len TSRMLS_DC)
 {
-       return snprintf(buf, buf_len, "%15.15F", PHP_HTTP_G->env.request.time * php_combined_lcg(TSRMLS_C));
+       return snprintf(buf, buf_len, "%15.15F", sapi_get_request_time(TSRMLS_C) * php_combined_lcg(TSRMLS_C));
 }
 
 int php_http_select_str(const char *cmp, int argc, ...)
index 7c8077b6a31995208a8a16da0ed6789d68bd2878..5aeefa8a3b6b50463fd5757e38171a77bc0cd33a 100644 (file)
 
 #include "php_http_api.h"
 
+#ifdef PHP_HTTP_HAVE_IDN
+#      include <idna.h>
+#endif
+
+#ifdef PHP_HTTP_HAVE_WCHAR
+#      include <wchar.h>
+#      include <wctype.h>
+#endif
+
+#ifdef HAVE_ARPA_INET_H
+#      include <arpa/inet.h>
+#endif
+
+#include "php_http_utf8.h"
+
 static inline char *localhostname(void)
 {
        char hostname[1024] = {0};
@@ -42,111 +57,175 @@ static inline char *localhostname(void)
        return estrndup("localhost", lenof("localhost"));
 }
 
-static php_url *php_http_url_from_env(php_url *url TSRMLS_DC)
+#define url(buf) ((php_http_url_t *) buf.data)
+
+static php_http_url_t *php_http_url_from_env(TSRMLS_D)
 {
        zval *https, *zhost, *zport;
        long port;
+       php_http_buffer_t buf;
 
-       if (!url) {
-               url = ecalloc(1, sizeof(*url));
-       }
-
-       /* port */
-       zport = php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC);
-       if (zport && IS_LONG == is_numeric_string(Z_STRVAL_P(zport), Z_STRLEN_P(zport), &port, NULL, 0)) {
-               url->port = port;
-       }
+       php_http_buffer_init_ex(&buf, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE, sizeof(php_http_url_t)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC);
+       php_http_buffer_account(&buf, sizeof(php_http_url_t));
+       memset(buf.data, 0, buf.used);
 
        /* scheme */
+       url(buf)->scheme = &buf.data[buf.used];
        https = php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC);
        if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) {
-               url->scheme = estrndup("https", lenof("https"));
+               php_http_buffer_append(&buf, "https", sizeof("https"));
        } else {
-               url->scheme = estrndup("http", lenof("http"));
+               php_http_buffer_append(&buf, "http", sizeof("http"));
        }
 
        /* host */
+       url(buf)->host = &buf.data[buf.used];
        if ((((zhost = php_http_env_get_server_var(ZEND_STRL("HTTP_HOST"), 1 TSRMLS_CC)) ||
                        (zhost = php_http_env_get_server_var(ZEND_STRL("SERVER_NAME"), 1 TSRMLS_CC)) ||
                        (zhost = php_http_env_get_server_var(ZEND_STRL("SERVER_ADDR"), 1 TSRMLS_CC)))) && Z_STRLEN_P(zhost)) {
                size_t stop_at = strspn(Z_STRVAL_P(zhost), "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.");
 
-               url->host = estrndup(Z_STRVAL_P(zhost), stop_at);
+               php_http_buffer_append(&buf, Z_STRVAL_P(zhost), stop_at);
+               php_http_buffer_append(&buf, "", 1);
        } else {
-               url->host = localhostname();
+               char *host_str = localhostname();
+
+               php_http_buffer_append(&buf, host_str, strlen(host_str) + 1);
+               efree(host_str);
+       }
+
+       /* port */
+       zport = php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC);
+       if (zport && IS_LONG == is_numeric_string(Z_STRVAL_P(zport), Z_STRLEN_P(zport), &port, NULL, 0)) {
+               url(buf)->port = port;
        }
 
        /* path */
        if (SG(request_info).request_uri && SG(request_info).request_uri[0]) {
                const char *q = strchr(SG(request_info).request_uri, '?');
 
+               url(buf)->path = &buf.data[buf.used];
+
                if (q) {
-                       url->path = estrndup(SG(request_info).request_uri, q - SG(request_info).request_uri);
+                       php_http_buffer_append(&buf, SG(request_info).request_uri, q - SG(request_info).request_uri);
+                       php_http_buffer_append(&buf, "", 1);
                } else {
-                       url->path = estrdup(SG(request_info).request_uri);
+                       php_http_buffer_append(&buf, SG(request_info).request_uri, strlen(SG(request_info).request_uri) + 1);
                }
        }
 
        /* query */
        if (SG(request_info).query_string && SG(request_info).query_string[0]) {
-               url->query = estrdup(SG(request_info).query_string);
+               url(buf)->query = &buf.data[buf.used];
+               php_http_buffer_append(&buf, SG(request_info).query_string, strlen(SG(request_info).query_string) + 1);
        }
 
-       return url;
+       return url(buf);
 }
 
 void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
 {
-       php_url *url, *tmp_url = NULL;
+       php_http_url_t *url = php_http_url_mod((const php_http_url_t *) old_url, (const php_http_url_t *) new_url, flags TSRMLS_CC);
+
+       if (url_ptr) {
+               *url_ptr = php_http_url_to_php_url(url);
+       }
+       if (url_str) {
+               php_http_url_to_string(url, url_str, url_len TSRMLS_CC);
+       }
+
+       php_http_url_free(&url);
+}
+
+#define url_isset(u,n) \
+       ((u)&&(u)->n)
+#define url_copy(n) do { \
+       if (url_isset(new_url, n)) { \
+               url(buf)->n = &buf.data[buf.used]; \
+               php_http_buffer_append(&buf, new_url->n, strlen(new_url->n) + 1); \
+       } else if (url_isset(old_url, n)) { \
+               url(buf)->n = &buf.data[buf.used]; \
+               php_http_buffer_append(&buf, old_url->n, strlen(old_url->n) + 1); \
+       } \
+} while (0)
+
+php_http_url_t *php_http_url_mod(const php_http_url_t *old_url, const php_http_url_t *new_url, unsigned flags TSRMLS_DC)
+{
+       php_http_url_t *tmp_url = NULL;
+       php_http_buffer_t buf;
+
+       php_http_buffer_init_ex(&buf, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE, sizeof(php_http_url_t)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC);
+       php_http_buffer_account(&buf, sizeof(php_http_url_t));
+       memset(buf.data, 0, buf.used);
 
        /* set from env if requested */
        if (flags & PHP_HTTP_URL_FROM_ENV) {
-               php_url *env_url = php_http_url_from_env(NULL TSRMLS_CC);
-
-               php_http_url(flags ^ PHP_HTTP_URL_FROM_ENV, env_url, old_url, &tmp_url, NULL, NULL TSRMLS_CC);
+               php_http_url_t *env_url = php_http_url_from_env(TSRMLS_C);
 
-               php_url_free(env_url);
-               old_url = tmp_url;
+               old_url = tmp_url = php_http_url_mod(env_url, old_url, flags ^ PHP_HTTP_URL_FROM_ENV TSRMLS_CC);
+               php_http_url_free(&env_url);
        }
 
-       url = ecalloc(1, sizeof(*url));
+       url_copy(scheme);
 
-#define __URLSET(u,n) \
-       ((u)&&(u)->n)
-#define __URLCPY(n) \
-       url->n = __URLSET(new_url,n) ? estrdup(new_url->n) : (__URLSET(old_url,n) ? estrdup(old_url->n) : NULL)
-       
-       if (!(flags & PHP_HTTP_URL_STRIP_PORT)) {
-               url->port = __URLSET(new_url, port) ? new_url->port : ((old_url) ? old_url->port : 0);
-       }
        if (!(flags & PHP_HTTP_URL_STRIP_USER)) {
-               __URLCPY(user);
+               url_copy(user);
        }
+
        if (!(flags & PHP_HTTP_URL_STRIP_PASS)) {
-               __URLCPY(pass);
+               url_copy(pass);
        }
        
-       __URLCPY(scheme);
-       __URLCPY(host);
+       url_copy(host);
        
+       if (!(flags & PHP_HTTP_URL_STRIP_PORT)) {
+               url(buf)->port = url_isset(new_url, port) ? new_url->port : ((old_url) ? old_url->port : 0);
+       }
+
        if (!(flags & PHP_HTTP_URL_STRIP_PATH)) {
-               if ((flags & PHP_HTTP_URL_JOIN_PATH) && __URLSET(old_url, path) && __URLSET(new_url, path) && *new_url->path != '/') {
+               if ((flags & PHP_HTTP_URL_JOIN_PATH) && url_isset(old_url, path) && url_isset(new_url, path) && *new_url->path != '/') {
                        size_t old_path_len = strlen(old_url->path), new_path_len = strlen(new_url->path);
+                       char *path = ecalloc(1, old_path_len + new_path_len + 1 + 1);
                        
-                       url->path = ecalloc(1, old_path_len + new_path_len + 1 + 1);
+                       strcat(path, old_url->path);
+                       if (path[old_path_len - 1] != '/') {
+                               php_dirname(path, old_path_len);
+                               strcat(path, "/");
+                       }
+                       strcat(path, new_url->path);
                        
-                       strcat(url->path, old_url->path);
-                       if (url->path[old_path_len - 1] != '/') {
-                               php_dirname(url->path, old_path_len);
-                               strcat(url->path, "/");
+                       url(buf)->path = &buf.data[buf.used];
+                       if (path[0] != '/') {
+                               php_http_buffer_append(&buf, "/", 1);
                        }
-                       strcat(url->path, new_url->path);
+                       php_http_buffer_append(&buf, path, strlen(path) + 1);
+                       efree(path);
                } else {
-                       __URLCPY(path);
+                       const char *path = NULL;
+
+                       url(buf)->path = &buf.data[buf.used];
+
+                       if (url_isset(new_url, path)) {
+                               path = new_url->path;
+                       } else if (url_isset(old_url, path)) {
+                               path = old_url->path;
+                       } else {
+                               php_http_buffer_append(&buf, "/", sizeof("/"));
+                       }
+
+                       if (path) {
+                               if (path[0] != '/') {
+                                       php_http_buffer_append(&buf, "/", 1);
+                               }
+                               php_http_buffer_append(&buf, path, strlen(path) + 1);
+                       }
+
+
                }
        }
+
        if (!(flags & PHP_HTTP_URL_STRIP_QUERY)) {
-               if ((flags & PHP_HTTP_URL_JOIN_QUERY) && __URLSET(new_url, query) && __URLSET(old_url, query)) {
+               if ((flags & PHP_HTTP_URL_JOIN_QUERY) && url_isset(new_url, query) && url_isset(old_url, query)) {
                        zval qarr, qstr;
                        
                        INIT_PZVAL(&qstr);
@@ -160,49 +239,50 @@ void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php
                        
                        ZVAL_NULL(&qstr);
                        php_http_querystring_update(&qarr, NULL, &qstr TSRMLS_CC);
-                       url->query = Z_STRVAL(qstr);
+
+                       url(buf)->query = &buf.data[buf.used];
+                       php_http_buffer_append(&buf, Z_STRVAL(qstr), Z_STRLEN(qstr) + 1);
+
+                       zval_dtor(&qstr);
                        zval_dtor(&qarr);
                } else {
-                       __URLCPY(query);
+                       url_copy(query);
                }
        }
+
        if (!(flags & PHP_HTTP_URL_STRIP_FRAGMENT)) {
-               __URLCPY(fragment);
+               url_copy(fragment);
        }
        
        /* done with copy & combine & strip */
 
        if (flags & PHP_HTTP_URL_FROM_ENV) {
                /* free old_url we tainted above */
-               php_url_free(tmp_url);
+               php_http_url_free(&tmp_url);
        }
 
        /* set some sane defaults */
 
-       if (!url->scheme) {
-               url->scheme = estrndup("http", lenof("http"));
+       if (!url(buf)->scheme) {
+               url(buf)->scheme = &buf.data[buf.used];
+               php_http_buffer_append(&buf, "http", sizeof("http"));
        }
 
-       if (!url->host) {
-               url->host = estrndup("localhost", lenof("localhost"));
+       if (!url(buf)->host) {
+               url(buf)->host = &buf.data[buf.used];
+               php_http_buffer_append(&buf, "localhost", sizeof("localhost"));
        }
        
-       if (!url->path) {
-               url->path = estrndup("/", 1);
-       } else if (url->path[0] != '/') {
-               size_t plen = strlen(url->path);
-               char *path = emalloc(plen + 1 + 1);
-
-               path[0] = '/';
-               memcpy(&path[1], url->path, plen + 1);
-               STR_SET(url->path, path);
+       if (!url(buf)->path) {
+               url(buf)->path = &buf.data[buf.used];
+               php_http_buffer_append(&buf, "/", sizeof("/"));
        }
        /* replace directory references if path is not a single slash */
        if ((flags & PHP_HTTP_URL_SANITIZE_PATH)
-       &&      url->path[0] && (url->path[0] != '/' || url->path[1])) {
-               char *ptr, *end = url->path + strlen(url->path) + 1;
+       &&      url(buf)->path[0] && url(buf)->path[1]) {
+               char *ptr, *end = url(buf)->path + strlen(url(buf)->path) + 1;
                        
-               for (ptr = strchr(url->path, '/'); ptr; ptr = strchr(ptr, '/')) {
+               for (ptr = strchr(url(buf)->path, '/'); ptr; ptr = strchr(ptr, '/')) {
                        switch (ptr[1]) {
                                case '/':
                                        memmove(&ptr[1], &ptr[2], end - &ptr[2]);
@@ -221,7 +301,7 @@ void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php
                                                case '.':
                                                        if (ptr[3] == '/') {
                                                                char *pos = &ptr[4];
-                                                               while (ptr != url->path) {
+                                                               while (ptr != url(buf)->path) {
                                                                        if (*--ptr == '/') {
                                                                                break;
                                                                        }
@@ -248,23 +328,185 @@ void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php
                }
        }
        /* unset default ports */
-       if (url->port) {
-               if (    ((url->port == 80) && !strcmp(url->scheme, "http"))
-                       ||      ((url->port ==443) && !strcmp(url->scheme, "https"))
+       if (url(buf)->port) {
+               if (    ((url(buf)->port == 80) && !strcmp(url(buf)->scheme, "http"))
+                       ||      ((url(buf)->port ==443) && !strcmp(url(buf)->scheme, "https"))
                ) {
-                       url->port = 0;
+                       url(buf)->port = 0;
                }
        }
        
+       return url(buf);
+}
+
+void php_http_url_to_string(const php_http_url_t *url, char **url_str, size_t *url_len TSRMLS_DC)
+{
+       php_http_buffer_t buf;
+
+       php_http_buffer_init(&buf);
+
+       if (url->scheme && *url->scheme) {
+               php_http_buffer_appendl(&buf, url->scheme);
+               php_http_buffer_appends(&buf, "://");
+       } else {
+               php_http_buffer_appends(&buf, "//");
+       }
+
+       if (url->user && *url->user) {
+               php_http_buffer_appendl(&buf, url->user);
+               if (url->pass && *url->pass) {
+                       php_http_buffer_appends(&buf, ":");
+                       php_http_buffer_appendl(&buf, url->pass);
+               }
+               php_http_buffer_appends(&buf, "@");
+       }
+
+       if (url->host && *url->host) {
+               php_http_buffer_appendl(&buf, url->host);
+       } else {
+               php_http_buffer_appends(&buf, "localhost");
+       }
+
+       if (url->port) {
+               php_http_buffer_appendf(&buf, ":%hu", url->port);
+       }
+
+       if (url->path && *url->path) {
+               php_http_buffer_appendl(&buf, url->path);
+       }
+
+       if (url->query && *url->query) {
+               php_http_buffer_appends(&buf, "?");
+               php_http_buffer_appendl(&buf, url->query);
+       }
+
+       if (url->fragment && *url->fragment) {
+               php_http_buffer_appends(&buf, "#");
+               php_http_buffer_appendl(&buf, url->fragment);
+       }
+
+       php_http_buffer_shrink(&buf);
+       php_http_buffer_fix(&buf);
+
+       if (url_len) {
+               *url_len = buf.used;
+       }
+
        if (url_str) {
-               php_http_url_to_string(url, url_str, url_len TSRMLS_CC);
+               *url_str = buf.data;
+       } else {
+               php_http_buffer_dtor(&buf);
        }
-       
-       if (url_ptr) {
-               *url_ptr = url;
+}
+
+php_http_url_t *php_http_url_from_struct(HashTable *ht TSRMLS_DC)
+{
+       zval **e;
+       php_http_buffer_t buf;
+
+       php_http_buffer_init_ex(&buf, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE, sizeof(php_http_url_t)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC);
+       php_http_buffer_account(&buf, sizeof(php_http_url_t));
+       memset(buf.data, 0, buf.used);
+
+       if (SUCCESS == zend_hash_find(ht, "scheme", sizeof("scheme"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->scheme = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "user", sizeof("user"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->user = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "pass", sizeof("pass"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->pass = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "host", sizeof("host"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->host = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "port", sizeof("port"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_LONG, *e);
+               url(buf)->port = (unsigned short) Z_LVAL_P(cpy);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "path", sizeof("path"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->path = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "query", sizeof("query"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->query = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+       if (SUCCESS == zend_hash_find(ht, "fragment", sizeof("fragment"), (void *) &e)) {
+               zval *cpy = php_http_ztyp(IS_STRING, *e);
+               url(buf)->fragment = &buf.data[buf.used];
+               php_http_buffer_append(&buf, Z_STRVAL_P(cpy), Z_STRLEN_P(cpy) + 1);
+               zval_ptr_dtor(&cpy);
+       }
+
+       return url(buf);
+}
+
+HashTable *php_http_url_to_struct(const php_http_url_t *url, zval *strct TSRMLS_DC)
+{
+       zval arr;
+
+       if (strct) {
+               switch (Z_TYPE_P(strct)) {
+                       default:
+                               zval_dtor(strct);
+                               array_init(strct);
+                               /* no break */
+                       case IS_ARRAY:
+                       case IS_OBJECT:
+                               INIT_PZVAL_ARRAY((&arr), HASH_OF(strct));
+                               break;
+               }
        } else {
-               php_url_free(url);
+               INIT_PZVAL(&arr);
+               array_init(&arr);
+       }
+
+       if (url) {
+               if (url->scheme) {
+                       add_assoc_string(&arr, "scheme", url->scheme, 1);
+               }
+               if (url->user) {
+                       add_assoc_string(&arr, "user", url->user, 1);
+               }
+               if (url->pass) {
+                       add_assoc_string(&arr, "pass", url->pass, 1);
+               }
+               if (url->host) {
+                       add_assoc_string(&arr, "host", url->host, 1);
+               }
+               if (url->port) {
+                       add_assoc_long(&arr, "port", (long) url->port);
+               }
+               if (url->path) {
+                       add_assoc_string(&arr, "path", url->path, 1);
+               }
+               if (url->query) {
+                       add_assoc_string(&arr, "query", url->query, 1);
+               }
+               if (url->fragment) {
+                       add_assoc_string(&arr, "fragment", url->fragment, 1);
+               }
        }
+
+       return Z_ARRVAL(arr);
 }
 
 STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
@@ -299,6 +541,682 @@ STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, con
        return SUCCESS;
 }
 
+struct parse_state {
+       php_http_url_t url;
+#ifdef ZTS
+       void ***ts;
+#endif
+       const char *ptr;
+       const char *end;
+       size_t maxlen;
+       off_t offset;
+       unsigned flags;
+       char buffer[1]; /* last member */
+};
+
+void php_http_url_free(php_http_url_t **url)
+{
+       if (*url) {
+               efree(*url);
+               *url = NULL;
+       }
+}
+
+static size_t parse_mb_utf8(unsigned *wc, const char *ptr, const char *end)
+{
+       unsigned wchar;
+       size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr);
+
+       if (!consumed || consumed == (size_t) -1) {
+               return 0;
+       }
+
+       if (wc) {
+               *wc = wchar;
+       }
+       return consumed;
+}
+
+#ifdef PHP_HTTP_HAVE_WCHAR
+static size_t parse_mb_loc(unsigned *wc, const char *ptr, const char *end)
+{
+       wchar_t wchar;
+       size_t consumed = 0;
+#if defined(HAVE_MBRTOWC)
+       mbstate_t ps = {0};
+
+       consumed = mbrtowc(&wchar, ptr, end - ptr, &ps);
+#elif defined(HAVE_MBTOWC)
+       consumed = mbtowc(&wchar, ptr, end - ptr);
+#endif
+
+       if (!consumed || consumed == (size_t) -1) {
+               return 0;
+       }
+
+       if (wc) {
+               *wc = wchar;
+       }
+       return consumed;
+}
+#endif
+
+typedef enum parse_mb_what {
+       PARSE_SCHEME,
+       PARSE_USERINFO,
+       PARSE_HOSTINFO,
+       PARSE_PATH,
+       PARSE_QUERY,
+       PARSE_FRAGMENT
+} parse_mb_what_t;
+
+static const char * const parse_what[] = {
+       "scheme",
+       "userinfo",
+       "hostinfo",
+       "path",
+       "query",
+       "fragment"
+};
+
+static const char parse_xdigits[] = "0123456789ABCDEF";
+
+static size_t parse_mb(struct parse_state *state, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent)
+{
+       unsigned wchar;
+       size_t consumed = 0;
+
+       if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+               consumed = parse_mb_utf8(&wchar, ptr, end);
+       }
+#ifdef PHP_HTTP_HAVE_WCHAR
+       else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+               consumed = parse_mb_loc(&wchar, ptr, end);
+       }
+#endif
+
+       while (consumed) {
+               if (!(state->flags & PHP_HTTP_URL_PARSE_TOPCT) || what == PARSE_HOSTINFO || what == PARSE_SCHEME) {
+                       if (what == PARSE_HOSTINFO && (state->flags & PHP_HTTP_URL_PARSE_TOIDN)) {
+                               /* idna */
+                       } else if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+                               if (!isualnum(wchar)) {
+                                       break;
+                               }
+#ifdef PHP_HTTP_HAVE_WCHAR
+                       } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+                               if (!iswalnum(wchar)) {
+                                       break;
+                               }
+#endif
+                       }
+                       PHP_HTTP_DUFF(consumed, state->buffer[state->offset++] = *ptr++);
+               } else {
+                       int i = 0;
+
+                       PHP_HTTP_DUFF(consumed,
+                                       state->buffer[state->offset++] = '%';
+                                       state->buffer[state->offset++] = parse_xdigits[((unsigned char) ptr[i]) >> 4];
+                                       state->buffer[state->offset++] = parse_xdigits[((unsigned char) ptr[i]) & 0xf];
+                                       ++i;
+                       );
+               }
+
+               return consumed;
+       }
+
+       if (!silent) {
+               TSRMLS_FETCH_FROM_CTX(state->ts);
+               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                               "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
+                               parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
+       }
+
+       return 0;
+}
+
+static STATUS parse_userinfo(struct parse_state *state, const char *ptr)
+{
+       size_t mb;
+       const char *password = NULL, *end = state->ptr, *tmp = ptr;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
+
+       state->url.user = &state->buffer[state->offset];
+
+       do {
+               switch (*ptr) {
+               case ':':
+                       if (password) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse password; duplicate ':' at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       password = ptr + 1;
+                       state->buffer[state->offset++] = 0;
+                       state->url.pass = &state->buffer[state->offset];
+                       break;
+
+               case '%':
+                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       state->buffer[state->offset++] = *ptr++;
+                       state->buffer[state->offset++] = *ptr++;
+                       state->buffer[state->offset++] = *ptr;
+                       break;
+
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       state->buffer[state->offset++] = *ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(state, PARSE_USERINFO, ptr, end, tmp, 0))) {
+                               return FAILURE;
+                       }
+                       ptr += mb - 1;
+               }
+       } while(++ptr != end);
+
+
+       state->buffer[state->offset++] = 0;
+
+       return SUCCESS;
+}
+
+static STATUS parse_hostinfo(struct parse_state *state, const char *ptr)
+{
+       size_t mb, len;
+       const char *end = state->ptr, *tmp = ptr, *port = NULL;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
+
+
+#ifdef HAVE_INET_PTON
+       if (*ptr == '[') {
+               char *error = NULL, *tmp = memchr(ptr, ']', end - ptr);
+
+               if (tmp) {
+                       size_t addrlen = tmp - ptr + 1;
+                       char buf[16], *addr = estrndup(ptr + 1, addrlen - 2);
+                       int rv = inet_pton(AF_INET6, addr, buf);
+
+                       efree(addr);
+                       if (rv == 1) {
+                               state->buffer[state->offset] = '[';
+                               state->url.host = &state->buffer[state->offset];
+                               inet_ntop(AF_INET6, buf, state->url.host + 1, state->maxlen - state->offset);
+                               state->offset += strlen(state->url.host);
+                               state->buffer[state->offset++] = ']';
+                               state->buffer[state->offset++] = 0;
+                               ptr = tmp + 1;
+                       } else if (rv == -1) {
+                               error = strerror(errno);
+                       } else {
+                               error = "unexpected '['";
+                       }
+               } else {
+                       error = "expected ']'";
+               }
+
+               if (error) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse hostinfo; %s", error);
+                       return FAILURE;
+               }
+       }
+#endif
+       if (ptr != end) do {
+               switch (*ptr) {
+               case ':':
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; unexpected ':' at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       port = ptr + 1;
+                       break;
+
+               case '%':
+                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       state->buffer[state->offset++] = *ptr++;
+                       state->buffer[state->offset++] = *ptr++;
+                       state->buffer[state->offset++] = *ptr;
+                       break;
+
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
+                                               (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       /* no break */
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       if (port) {
+                               state->url.port *= 10;
+                               state->url.port += *ptr - '0';
+                       } else {
+                               state->buffer[state->offset++] = *ptr;
+                       }
+                       break;
+
+               default:
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
+                                               (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       } else if (!(mb = parse_mb(state, PARSE_HOSTINFO, ptr, end, tmp, 0))) {
+                               return FAILURE;
+                       }
+                       ptr += mb - 1;
+               }
+       } while (++ptr != end);
+
+       if (!state->url.host) {
+               len = (port ? port - tmp - 1 : end - tmp);
+               state->url.host = &state->buffer[state->offset - len];
+               state->buffer[state->offset++] = 0;
+       }
+
+#ifdef PHP_HTTP_HAVE_IDN
+       if (state->flags & PHP_HTTP_URL_PARSE_TOIDN) {
+               char *idn = NULL;
+               int rv = -1;
+
+               if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+                       rv = idna_to_ascii_8z(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+               }
+#      ifdef PHP_HTTP_HAVE_WCHAR
+               else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+                       rv = idna_to_ascii_lz(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+               }
+#      endif
+               if (rv != IDNA_SUCCESS) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
+                       return FAILURE;
+               } else {
+                       size_t idnlen = strlen(idn);
+                       memcpy(state->url.host, idn, idnlen + 1);
+                       free(idn);
+                       state->offset += idnlen - len;
+               }
+       }
+#endif
+
+       return SUCCESS;
+}
+
+static const char *parse_authority(struct parse_state *state)
+{
+       const char *tmp = state->ptr, *host = NULL;
+
+       do {
+               switch (*state->ptr) {
+               case '@':
+                       /* userinfo delimiter */
+                       if (host) {
+                               TSRMLS_FETCH_FROM_CTX(state->ts);
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse userinfo; unexpected '@'");
+                               return NULL;
+                       }
+                       host = state->ptr + 1;
+                       if (tmp != state->ptr && SUCCESS != parse_userinfo(state, tmp)) {
+                               return NULL;
+                       }
+                       tmp = state->ptr + 1;
+                       break;
+
+               case '/':
+               case '?':
+               case '#':
+               case '\0':
+                       /* host delimiter */
+                       if (tmp != state->ptr && SUCCESS != parse_hostinfo(state, tmp)) {
+                               return NULL;
+                       }
+                       return state->ptr;
+               }
+       } while (++state->ptr <= state->end);
+
+       return NULL;
+}
+
+static const char *parse_path(struct parse_state *state)
+{
+       size_t mb;
+       const char *tmp;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
+
+       /* is there actually a path to parse? */
+       if (!*state->ptr) {
+               return state->ptr;
+       }
+       tmp = state->ptr;
+       state->url.path = &state->buffer[state->offset];
+
+       do {
+               switch (*state->ptr) {
+               case '#':
+               case '?':
+               case '\0':
+                       /* did we have any path component ? */
+                       if (tmp != state->ptr) {
+                               state->buffer[state->offset++] = 0;
+                       } else {
+                               state->url.path = NULL;
+                       }
+                       return state->ptr;
+
+               case '%':
+                       if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse path; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (state->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               case '/': /* yeah, well */
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(state, PARSE_PATH, state->ptr, state->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       state->ptr += mb - 1;
+               }
+       } while (++state->ptr <= state->end);
+
+       return NULL;
+}
+
+static const char *parse_query(struct parse_state *state)
+{
+       size_t mb;
+       const char *tmp = state->ptr + !!*state->ptr;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
+
+       /* is there actually a query to parse? */
+       if (*state->ptr != '?') {
+               return state->ptr;
+       }
+
+       /* skip initial '?' */
+       tmp = ++state->ptr;
+       state->url.query = &state->buffer[state->offset];
+
+       do {
+               switch (*state->ptr) {
+               case '#':
+               case '\0':
+                       state->buffer[state->offset++] = 0;
+                       return state->ptr;
+
+               case '%':
+                       if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse query; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (state->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               case ']':
+               case '[':
+                       if (state->flags & PHP_HTTP_URL_PARSE_TOPCT) {
+                               state->buffer[state->offset++] = '%';
+                               state->buffer[state->offset++] = parse_xdigits[((unsigned char) *state->ptr) >> 4];
+                               state->buffer[state->offset++] = parse_xdigits[((unsigned char) *state->ptr) & 0xf];
+                               break;
+                       }
+                       /* no break */
+
+               case '?': case '/': /* yeah, well */
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(state, PARSE_QUERY, state->ptr, state->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       state->ptr += mb - 1;
+               }
+       } while (++state->ptr <= state->end);
+
+       return NULL;
+}
+
+static const char *parse_fragment(struct parse_state *state)
+{
+       size_t mb;
+       const char *tmp;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
+
+       /* is there actually a fragment to parse? */
+       if (*state->ptr != '#') {
+               return state->ptr;
+       }
+
+       /* skip initial '#' */
+       tmp = ++state->ptr;
+       state->url.fragment = &state->buffer[state->offset];
+
+       do {
+               switch (*state->ptr) {
+               case '\0':
+                       state->buffer[state->offset++] = 0;
+                       return state->ptr;
+
+               case '%':
+                       if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse fragment; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (state->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr++;
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               case '?': case '/':
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(state, PARSE_FRAGMENT, state->ptr, state->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       state->ptr += mb - 1;
+               }
+       } while (++state->ptr <= state->end);
+
+       return NULL;
+}
+
+static const char *parse_hier(struct parse_state *state)
+{
+       if (*state->ptr == '/') {
+               if (state->end - state->ptr > 1) {
+                       if (*(state->ptr + 1) == '/') {
+                               state->ptr += 2;
+                               if (!(state->ptr = parse_authority(state))) {
+                                       return NULL;
+                               }
+                       }
+               }
+       }
+       return parse_path(state);
+}
+
+static const char *parse_scheme(struct parse_state *state)
+{
+       size_t mb;
+       const char *tmp = state->ptr;
+
+       do {
+               switch (*state->ptr) {
+               case ':':
+                       /* scheme delimiter */
+                       state->url.scheme = &state->buffer[0];
+                       state->buffer[state->offset++] = 0;
+                       return ++state->ptr;
+
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+               case '+': case '-': case '.':
+                       if (state->ptr == tmp) {
+                               return tmp;
+                       }
+                       /* no break */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+                       /* scheme part */
+                       state->buffer[state->offset++] = *state->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(state, PARSE_SCHEME, state->ptr, state->end, tmp, 1))) {
+                               /* soft fail; parse path next */
+                               return tmp;
+                       }
+                       state->ptr += mb - 1;
+               }
+       } while (++state->ptr != state->end);
+
+       return state->ptr = tmp;
+}
+
+php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC)
+{
+       size_t maxlen = 3 * len;
+       struct parse_state *state = ecalloc(1, sizeof(*state) + maxlen);
+
+       state->end = str + len;
+       state->ptr = str;
+       state->flags = flags;
+       state->maxlen = maxlen;
+       TSRMLS_SET_CTX(state->ts);
+
+       if (!parse_scheme(state)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", state->ptr);
+               efree(state);
+               return NULL;
+       }
+
+       if (!parse_hier(state)) {
+               efree(state);
+               return NULL;
+       }
+
+       if (!parse_query(state)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", state->ptr);
+               efree(state);
+               return NULL;
+       }
+
+       if (!parse_fragment(state)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", state->ptr);
+               efree(state);
+               return NULL;
+       }
+
+       return (php_http_url_t *) state;
+}
+
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0)
        ZEND_ARG_INFO(0, old_url)
        ZEND_ARG_INFO(0, new_url)
@@ -314,18 +1232,18 @@ PHP_METHOD(HttpUrl, __construct)
 
        zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
        {
-               php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
+               php_http_url_t *res_purl, *new_purl = NULL, *old_purl = NULL;
 
                if (new_url) {
                        switch (Z_TYPE_P(new_url)) {
                                case IS_OBJECT:
                                case IS_ARRAY:
-                                       new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
+                                       new_purl = php_http_url_from_struct(HASH_OF(new_url) TSRMLS_CC);
                                        break;
                                default: {
                                        zval *cpy = php_http_ztyp(IS_STRING, new_url);
 
-                                       new_purl = php_url_parse(Z_STRVAL_P(cpy));
+                                       new_purl = php_http_url_parse(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy), flags TSRMLS_CC);
                                        zval_ptr_dtor(&cpy);
                                        break;
                                }
@@ -339,34 +1257,34 @@ PHP_METHOD(HttpUrl, __construct)
                        switch (Z_TYPE_P(old_url)) {
                                case IS_OBJECT:
                                case IS_ARRAY:
-                                       old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
+                                       old_purl = php_http_url_from_struct(HASH_OF(old_url) TSRMLS_CC);
                                        break;
                                default: {
                                        zval *cpy = php_http_ztyp(IS_STRING, old_url);
 
-                                       old_purl = php_url_parse(Z_STRVAL_P(cpy));
+                                       old_purl = php_http_url_parse(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy), flags TSRMLS_CC);
                                        zval_ptr_dtor(&cpy);
                                        break;
                                }
                        }
                        if (!old_purl) {
                                if (new_purl) {
-                                       php_url_free(new_purl);
+                                       php_http_url_free(&new_purl);
                                }
                                zend_restore_error_handling(&zeh TSRMLS_CC);
                                return;
                        }
                }
 
-               php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
+               res_purl = php_http_url_mod(old_purl, new_purl, flags TSRMLS_CC);
                php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
 
-               php_url_free(res_purl);
+               php_http_url_free(&res_purl);
                if (old_purl) {
-                       php_url_free(old_purl);
+                       php_http_url_free(&old_purl);
                }
                if (new_purl) {
-                       php_url_free(new_purl);
+                       php_http_url_free(&new_purl);
                }
        }
        zend_restore_error_handling(&zeh TSRMLS_CC);
@@ -386,18 +1304,18 @@ PHP_METHOD(HttpUrl, mod)
 
        zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
        {
-               php_url *new_purl = NULL, *old_purl = NULL;
+               php_http_url_t *new_purl = NULL, *old_purl = NULL;
 
                if (new_url) {
                        switch (Z_TYPE_P(new_url)) {
                                case IS_OBJECT:
                                case IS_ARRAY:
-                                       new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
+                                       new_purl = php_http_url_from_struct(HASH_OF(new_url) TSRMLS_CC);
                                        break;
                                default: {
                                        zval *cpy = php_http_ztyp(IS_STRING, new_url);
 
-                                       new_purl = php_url_parse(Z_STRVAL_P(new_url));
+                                       new_purl = php_http_url_parse(Z_STRVAL_P(new_url), Z_STRLEN_P(new_url), flags TSRMLS_CC);
                                        zval_ptr_dtor(&cpy);
                                        break;
                                }
@@ -408,19 +1326,19 @@ PHP_METHOD(HttpUrl, mod)
                        }
                }
 
-               if ((old_purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC))) {
-                       php_url *res_purl;
+               if ((old_purl = php_http_url_from_struct(HASH_OF(getThis()) TSRMLS_CC))) {
+                       php_http_url_t *res_purl;
 
                        ZVAL_OBJVAL(return_value, zend_objects_clone_obj(getThis() TSRMLS_CC), 0);
 
-                       php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
+                       res_purl = php_http_url_mod(old_purl, new_purl, flags TSRMLS_CC);
                        php_http_url_to_struct(res_purl, return_value TSRMLS_CC);
 
-                       php_url_free(res_purl);
-                       php_url_free(old_purl);
+                       php_http_url_free(&res_purl);
+                       php_http_url_free(&old_purl);
                }
                if (new_purl) {
-                       php_url_free(new_purl);
+                       php_http_url_free(&new_purl);
                }
        }
        zend_restore_error_handling(&zeh TSRMLS_CC);
@@ -431,14 +1349,14 @@ ZEND_END_ARG_INFO();
 PHP_METHOD(HttpUrl, toString)
 {
        if (SUCCESS == zend_parse_parameters_none()) {
-               php_url *purl;
+               php_http_url_t *purl;
 
-               if ((purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC))) {
+               if ((purl = php_http_url_from_struct(HASH_OF(getThis()) TSRMLS_CC))) {
                        char *str;
                        size_t len;
 
-                       php_http_url(0, purl, NULL, NULL, &str, &len TSRMLS_CC);
-                       php_url_free(purl);
+                       php_http_url_to_string(purl, &str, &len TSRMLS_CC);
+                       php_http_url_free(&purl);
                        RETURN_STRINGL(str, len, 0);
                }
        }
@@ -449,16 +1367,70 @@ ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray, 0, 0, 0)
 ZEND_END_ARG_INFO();
 PHP_METHOD(HttpUrl, toArray)
 {
-       php_url *purl;
+       php_http_url_t *purl;
 
        if (SUCCESS != zend_parse_parameters_none()) {
                return;
        }
 
        /* strip any non-URL properties */
-       purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC);
+       purl = php_http_url_from_struct(HASH_OF(getThis()) TSRMLS_CC);
        php_http_url_to_struct(purl, return_value TSRMLS_CC);
-       php_url_free(purl);
+       php_http_url_free(&purl);
+}
+
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse, 0, 0, 1)
+       ZEND_ARG_INFO(0, url)
+       ZEND_ARG_INFO(0, flags)
+ZEND_END_ARG_INFO();
+PHP_METHOD(HttpUrl, parse)
+{
+       char *str;
+       int len;
+       long flags = 0;
+       php_http_url_t *url;
+       zend_error_handling zeh;
+
+       php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
+
+       zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+       if ((url = php_http_url_parse(str, len, flags TSRMLS_CC))) {
+               object_init_ex(return_value, php_http_url_class_entry);
+               if (url->scheme) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("scheme"), url->scheme TSRMLS_CC);
+               }
+               if (url->user) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("user"), url->user TSRMLS_CC);
+               }
+               if (url->pass) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("pass"), url->pass TSRMLS_CC);
+               }
+               if (url->host) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("host"), url->host TSRMLS_CC);
+               }
+               if (url->port) {
+                       zend_update_property_long(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("port"), url->port TSRMLS_CC);
+               }
+               if (url->path) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("path"), url->path TSRMLS_CC);
+               }
+               if (url->query) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("query"), url->query TSRMLS_CC);
+               }
+               if (url->fragment) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("fragment"), url->fragment TSRMLS_CC);
+               }
+               php_http_url_free(&url);
+       }
+       zend_restore_error_handling(&zeh TSRMLS_CC);
 }
 
 static zend_function_entry php_http_url_methods[] = {
@@ -467,6 +1439,7 @@ static zend_function_entry php_http_url_methods[] = {
        PHP_ME(HttpUrl, toString,     ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
        ZEND_MALIAS(HttpUrl, __toString, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
        PHP_ME(HttpUrl, toArray,      ai_HttpUrl_toArray, ZEND_ACC_PUBLIC)
+       PHP_ME(HttpUrl, parse,        ai_HttpUrl_parse, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
        EMPTY_FUNCTION_ENTRY
 };
 
@@ -502,6 +1475,15 @@ PHP_MINIT_FUNCTION(http_url)
        zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC);
        zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC);
 
+#ifdef PHP_HTTP_HAVE_WCHAR
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC);
+#endif
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC);
+#ifdef PHP_HTTP_HAVE_IDN
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN TSRMLS_CC);
+#endif
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT TSRMLS_CC);
+
        return SUCCESS;
 }
 
index 5c61daa4f1dff75b4c8dd03a399c908800010f8f..a6dda5351fee73c6f5b21e127b6d83e2ca54bfd2 100644 (file)
 #define PHP_HTTP_URL_FROM_ENV          0x1000
 #define PHP_HTTP_URL_SANITIZE_PATH     0x2000
 
+/* parse multibyte according to locale */
+#define PHP_HTTP_URL_PARSE_MBLOC       0x10000
+/* parse utf8 multibyte sequences */
+#define PHP_HTTP_URL_PARSE_MBUTF8      0x20000
+/* convert multibyte hostnames to IDNA */
+#define PHP_HTTP_URL_PARSE_TOIDN       0x100000
+/* percent encode multibyte sequences in userinfo, path, query and fragment */
+#define PHP_HTTP_URL_PARSE_TOPCT       0x200000
+
+typedef struct php_http_url {
+       /* compatible to php_url, but do not use php_url_free() */
+       char *scheme;
+       char *user;
+       char *pass;
+       char *host;
+       unsigned short port;
+       char *path;
+       char *query;
+       char *fragment;
+} php_http_url_t;
+
+PHP_HTTP_API php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC);
+PHP_HTTP_API void php_http_url_free(php_http_url_t **url);
+
+/* deprecated */
 PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC);
+/* use this instead */
+PHP_HTTP_API php_http_url_t *php_http_url_mod(const php_http_url_t *old_url, const php_http_url_t *new_url, unsigned flags TSRMLS_DC);
 
 PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC);
 PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC);
@@ -48,168 +75,24 @@ static inline void php_http_url_argsep(const char **str, size_t *len TSRMLS_DC)
        }
 }
 
-static inline void php_http_url_to_string(php_url *url, char **url_str, size_t *url_len TSRMLS_DC)
+static inline php_url *php_http_url_to_php_url(php_http_url_t *url)
 {
-       php_http_buffer_t buf;
+       php_url *purl = ecalloc(1, sizeof(*purl));
 
-       php_http_buffer_init(&buf);
+       if (url->scheme)   purl->scheme   = estrdup(url->scheme);
+       if (url->pass)     purl->pass     = estrdup(url->pass);
+       if (url->user)     purl->user     = estrdup(url->user);
+       if (url->host)     purl->host     = estrdup(url->host);
+       if (url->path)     purl->path     = estrdup(url->path);
+       if (url->query)    purl->query    = estrdup(url->query);
+       if (url->fragment) purl->fragment = estrdup(url->fragment);
 
-       if (url->scheme && *url->scheme) {
-               php_http_buffer_appendl(&buf, url->scheme);
-               php_http_buffer_appends(&buf, "://");
-       } else {
-               php_http_buffer_appends(&buf, "//");
-       }
-
-       if (url->user && *url->user) {
-               php_http_buffer_appendl(&buf, url->user);
-               if (url->pass && *url->pass) {
-                       php_http_buffer_appends(&buf, ":");
-                       php_http_buffer_appendl(&buf, url->pass);
-               }
-               php_http_buffer_appends(&buf, "@");
-       }
-
-       if (url->host && *url->host) {
-               php_http_buffer_appendl(&buf, url->host);
-       } else {
-               php_http_buffer_appends(&buf, "localhost");
-       }
-
-       if (url->port) {
-               php_http_buffer_appendf(&buf, ":%hu", url->port);
-       }
-
-       if (url->path && *url->path) {
-               php_http_buffer_appendl(&buf, url->path);
-       }
-
-       if (url->query && *url->query) {
-               php_http_buffer_appends(&buf, "?");
-               php_http_buffer_appendl(&buf, url->query);
-       }
-
-       if (url->fragment && *url->fragment) {
-               php_http_buffer_appends(&buf, "#");
-               php_http_buffer_appendl(&buf, url->fragment);
-       }
-
-       php_http_buffer_shrink(&buf);
-       php_http_buffer_fix(&buf);
-
-       if (url_len) {
-               *url_len = buf.used;
-       }
-
-       if (url_str) {
-               *url_str = buf.data;
-       } else {
-               php_http_buffer_dtor(&buf);
-       }
+       return purl;
 }
 
-static inline php_url *php_http_url_from_struct(php_url *url, HashTable *ht TSRMLS_DC)
-{
-       zval **e;
-       
-       if (!url) {
-               url = emalloc(sizeof(*url));
-       }
-       memset(url, 0, sizeof(*url));
-       
-       if (SUCCESS == zend_hash_find(ht, "scheme", sizeof("scheme"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->scheme = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "user", sizeof("user"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->user = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "pass", sizeof("pass"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->pass = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "host", sizeof("host"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->host = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "path", sizeof("path"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->path = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "query", sizeof("query"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->query = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "fragment", sizeof("fragment"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_STRING, *e);
-               url->fragment = estrndup(Z_STRVAL_P(cpy), Z_STRLEN_P(cpy));
-               zval_ptr_dtor(&cpy);
-       }
-       if (SUCCESS == zend_hash_find(ht, "port", sizeof("port"), (void *) &e)) {
-               zval *cpy = php_http_ztyp(IS_LONG, *e);
-               url->port = (unsigned short) Z_LVAL_P(cpy);
-               zval_ptr_dtor(&cpy);
-       }
-       
-       return url;
-}
-
-static inline HashTable *php_http_url_to_struct(php_url *url, zval *strct TSRMLS_DC)
-{
-       zval arr;
-       
-       if (strct) {
-               switch (Z_TYPE_P(strct)) {
-                       default:
-                               zval_dtor(strct);
-                               array_init(strct);
-                               /* no break */
-                       case IS_ARRAY:
-                       case IS_OBJECT:
-                               INIT_PZVAL_ARRAY((&arr), HASH_OF(strct));
-                               break;
-               }
-       } else {
-               INIT_PZVAL(&arr);
-               array_init(&arr);
-       }
-       
-       if (url) {
-               if (url->scheme) {
-                       add_assoc_string(&arr, "scheme", url->scheme, 1);
-               }
-               if (url->user) {
-                       add_assoc_string(&arr, "user", url->user, 1);
-               }
-               if (url->pass) {
-                       add_assoc_string(&arr, "pass", url->pass, 1);
-               }
-               if (url->host) {
-                       add_assoc_string(&arr, "host", url->host, 1);
-               }
-               if (url->port) {
-                       add_assoc_long(&arr, "port", (long) url->port);
-               }
-               if (url->path) {
-                       add_assoc_string(&arr, "path", url->path, 1);
-               }
-               if (url->query) {
-                       add_assoc_string(&arr, "query", url->query, 1);
-               }
-               if (url->fragment) {
-                       add_assoc_string(&arr, "fragment", url->fragment, 1);
-               }
-       }
-       
-       return Z_ARRVAL(arr);
-}
+PHP_HTTP_API php_http_url_t *php_http_url_from_struct(HashTable *ht TSRMLS_DC);
+PHP_HTTP_API HashTable *php_http_url_to_struct(const php_http_url_t *url, zval *strct TSRMLS_DC);
+PHP_HTTP_API void php_http_url_to_string(const php_http_url_t *url, char **url_str, size_t *url_len TSRMLS_DC);
 
 PHP_HTTP_API zend_class_entry *php_http_url_class_entry;
 PHP_MINIT_FUNCTION(http_url);
diff --git a/php_http_utf8.h b/php_http_utf8.h
new file mode 100644 (file)
index 0000000..c7bcb49
--- /dev/null
@@ -0,0 +1,636 @@
+/*
+    +--------------------------------------------------------------------+
+    | PECL :: http                                                       |
+    +--------------------------------------------------------------------+
+    | Redistribution and use in source and binary forms, with or without |
+    | modification, are permitted provided that the conditions mentioned |
+    | in the accompanying LICENSE file are met.                          |
+    +--------------------------------------------------------------------+
+    | Copyright (c) 2004-2014, Michael Wallner <mike@php.net>            |
+    +--------------------------------------------------------------------+
+*/
+
+#ifndef PHP_HTTP_UTF8_H
+#define PHP_HTTP_UTF8_H
+
+typedef struct utf8_range {
+       unsigned int start;
+       unsigned int end;
+       unsigned char step;
+} utf8_range_t;
+
+static const unsigned char utf8_mblen[256] = {
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
+};
+
+static const unsigned char utf8_mask[] = {
+               0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01
+};
+
+static const utf8_range_t utf8_ranges[] = {
+/* BEGIN::UTF8TABLE */
+       {    0x0041,     0x005A, 1},
+       {    0x0061,     0x007A, 1},
+       {    0x00AA,          0, 0},
+       {    0x00B5,          0, 0},
+       {    0x00BA,          0, 0},
+       {    0x00C0,     0x00D6, 1},
+       {    0x00D8,     0x00F6, 1},
+       {    0x00F8,     0x00FF, 1},
+       {    0x0100,     0x017F, 1},
+       {    0x0180,     0x024F, 1},
+       {    0x0250,     0x02AF, 1},
+       {    0x02B0,     0x02C1, 1},
+       {    0x02C6,     0x02D1, 1},
+       {    0x02E0,     0x02E4, 1},
+       {    0x02EE,          0, 0},
+       {    0x0345,          0, 0},
+       {    0x0370,     0x0373, 1},
+       {    0x0376,     0x0377, 1},
+       {    0x037A,     0x037D, 1},
+       {    0x0386,          0, 0},
+       {    0x0388,     0x038A, 1},
+       {    0x038C,          0, 0},
+       {    0x038E,     0x03A1, 1},
+       {    0x03A3,     0x03CE, 1},
+       {    0x03D0,     0x03F5, 1},
+       {    0x03F7,     0x03FF, 1},
+       {    0x0400,     0x0481, 1},
+       {    0x048A,     0x04FF, 1},
+       {    0x0500,     0x0523, 1},
+       {    0x0531,     0x0556, 1},
+       {    0x0559,          0, 0},
+       {    0x0561,     0x0587, 1},
+       {    0x05D0,     0x05EA, 1},
+       {    0x05F0,     0x05F2, 1},
+       {    0x0621,     0x064A, 1},
+       {    0x066E,     0x066F, 1},
+       {    0x0671,     0x06D3, 1},
+       {    0x06D5,          0, 0},
+       {    0x06E5,     0x06E6, 1},
+       {    0x06EE,     0x06EF, 1},
+       {    0x06FA,     0x06FC, 1},
+       {    0x06FF,          0, 0},
+       {    0x0710,          0, 0},
+       {    0x0712,     0x072F, 1},
+       {    0x074D,     0x074F, 1},
+       {    0x0750,     0x077F, 1},
+       {    0x0780,     0x07A5, 1},
+       {    0x07B1,          0, 0},
+       {    0x07C0,     0x07EA, 1},
+       {    0x07F4,     0x07F5, 1},
+       {    0x07FA,          0, 0},
+       {    0x0901,     0x0939, 1},
+       {    0x093C,     0x094D, 1},
+       {    0x0950,     0x0954, 1},
+       {    0x0958,     0x0961, 1},
+       {    0x0962,          0, 0},
+       {    0x0963,          0, 0},
+       {    0x0972,          0, 0},
+       {    0x097B,     0x097F, 1},
+       {    0x0981,     0x0983, 1},
+       {    0x0985,     0x098C, 1},
+       {    0x098F,          0, 0},
+       {    0x0990,          0, 0},
+       {    0x0993,     0x09A8, 1},
+       {    0x09AA,     0x09B0, 1},
+       {    0x09B2,          0, 0},
+       {    0x09B6,     0x09B9, 1},
+       {    0x09BC,     0x09C4, 1},
+       {    0x09C7,          0, 0},
+       {    0x09C8,          0, 0},
+       {    0x09CB,     0x09CE, 1},
+       {    0x09D7,          0, 0},
+       {    0x09DC,          0, 0},
+       {    0x09DD,          0, 0},
+       {    0x09DF,     0x09E3, 1},
+       {    0x09F0,     0x09FA, 1},
+       {    0x0A01,     0x0A03, 1},
+       {    0x0A05,     0x0A0A, 1},
+       {    0x0A0F,          0, 0},
+       {    0x0A10,          0, 0},
+       {    0x0A13,     0x0A28, 1},
+       {    0x0A2A,     0x0A30, 1},
+       {    0x0A32,          0, 0},
+       {    0x0A33,          0, 0},
+       {    0x0A35,          0, 0},
+       {    0x0A36,          0, 0},
+       {    0x0A38,          0, 0},
+       {    0x0A39,          0, 0},
+       {    0x0A3C,          0, 0},
+       {    0x0A3E,     0x0A42, 1},
+       {    0x0A47,          0, 0},
+       {    0x0A48,          0, 0},
+       {    0x0A4B,     0x0A4D, 1},
+       {    0x0A51,          0, 0},
+       {    0x0A59,     0x0A5C, 1},
+       {    0x0A5E,          0, 0},
+       {    0x0A70,     0x0A75, 1},
+       {    0x0A81,     0x0A83, 1},
+       {    0x0A85,     0x0A8D, 1},
+       {    0x0A8F,     0x0A91, 1},
+       {    0x0A93,     0x0AA8, 1},
+       {    0x0AAA,     0x0AB0, 1},
+       {    0x0AB2,          0, 0},
+       {    0x0AB3,          0, 0},
+       {    0x0AB5,     0x0AB9, 1},
+       {    0x0ABC,     0x0AC5, 1},
+       {    0x0AC7,     0x0AC9, 1},
+       {    0x0ACB,     0x0ACD, 1},
+       {    0x0AD0,          0, 0},
+       {    0x0AE0,     0x0AE3, 1},
+       {    0x0AF1,          0, 0},
+       {    0x0B01,     0x0B03, 1},
+       {    0x0B05,     0x0B0C, 1},
+       {    0x0B0F,          0, 0},
+       {    0x0B10,          0, 0},
+       {    0x0B13,     0x0B28, 1},
+       {    0x0B2A,     0x0B30, 1},
+       {    0x0B32,          0, 0},
+       {    0x0B33,          0, 0},
+       {    0x0B35,     0x0B39, 1},
+       {    0x0B3C,     0x0B44, 1},
+       {    0x0B47,     0x0B48, 1},
+       {    0x0B4B,     0x0B4D, 1},
+       {    0x0B56,     0x0B57, 1},
+       {    0x0B5C,          0, 0},
+       {    0x0B5D,          0, 0},
+       {    0x0B5F,     0x0B63, 1},
+       {    0x0B70,          0, 0},
+       {    0x0B71,          0, 0},
+       {    0x0B82,          0, 0},
+       {    0x0B83,          0, 0},
+       {    0x0B85,     0x0B8A, 1},
+       {    0x0B8E,     0x0B90, 1},
+       {    0x0B92,     0x0B95, 1},
+       {    0x0B99,          0, 0},
+       {    0x0B9A,          0, 0},
+       {    0x0B9C,          0, 0},
+       {    0x0B9E,          0, 0},
+       {    0x0B9F,          0, 0},
+       {    0x0BA3,          0, 0},
+       {    0x0BA4,          0, 0},
+       {    0x0BA8,     0x0BAA, 1},
+       {    0x0BAE,     0x0BB9, 1},
+       {    0x0BBE,     0x0BC2, 1},
+       {    0x0BC6,     0x0BC8, 1},
+       {    0x0BCA,     0x0BCD, 1},
+       {    0x0BD0,          0, 0},
+       {    0x0BD7,          0, 0},
+       {    0x0BF0,     0x0BFA, 1},
+       {    0x0C01,     0x0C03, 1},
+       {    0x0C05,     0x0C0C, 1},
+       {    0x0C0E,     0x0C10, 1},
+       {    0x0C12,     0x0C28, 1},
+       {    0x0C2A,     0x0C33, 1},
+       {    0x0C35,     0x0C39, 1},
+       {    0x0C3D,     0x0C44, 1},
+       {    0x0C46,     0x0C48, 1},
+       {    0x0C4A,     0x0C4D, 1},
+       {    0x0C55,     0x0C56, 1},
+       {    0x0C58,     0x0C59, 1},
+       {    0x0C60,     0x0C63, 1},
+       {    0x0C82,     0x0C83, 1},
+       {    0x0C85,     0x0C8C, 1},
+       {    0x0C8E,     0x0C90, 1},
+       {    0x0C92,     0x0CA8, 1},
+       {    0x0CAA,     0x0CB3, 1},
+       {    0x0CB5,     0x0CB9, 1},
+       {    0x0CBC,     0x0CC4, 1},
+       {    0x0CC6,     0x0CC8, 1},
+       {    0x0CCA,     0x0CCD, 1},
+       {    0x0CD5,     0x0CD6, 1},
+       {    0x0CDE,          0, 0},
+       {    0x0CE0,     0x0CE3, 1},
+       {    0x0CF1,          0, 0},
+       {    0x0CF2,          0, 0},
+       {    0x0D02,     0x0D03, 1},
+       {    0x0D05,     0x0D0C, 1},
+       {    0x0D0E,     0x0D10, 1},
+       {    0x0D12,     0x0D28, 1},
+       {    0x0D2A,     0x0D39, 1},
+       {    0x0D3D,     0x0D44, 1},
+       {    0x0D46,     0x0D48, 1},
+       {    0x0D4A,     0x0D4D, 1},
+       {    0x0D57,          0, 0},
+       {    0x0D60,     0x0D63, 1},
+       {    0x0D79,     0x0D7F, 1},
+       {    0x0D82,     0x0D83, 1},
+       {    0x0D85,     0x0D96, 1},
+       {    0x0D9A,     0x0DB1, 1},
+       {    0x0DB3,     0x0DBB, 1},
+       {    0x0DBD,          0, 0},
+       {    0x0DC0,     0x0DC6, 1},
+       {    0x0DCA,          0, 0},
+       {    0x0DCF,     0x0DD4, 1},
+       {    0x0DD6,          0, 0},
+       {    0x0DD8,     0x0DDF, 1},
+       {    0x0DF2,     0x0DF4, 1},
+       {    0x0E01,     0x0E2E, 1},
+       {    0x0E30,     0x0E3A, 1},
+       {    0x0E40,     0x0E45, 1},
+       {    0x0E47,     0x0E4E, 1},
+       {    0x0E81,     0x0E82, 1},
+       {    0x0E84,          0, 0},
+       {    0x0E87,     0x0E88, 1},
+       {    0x0E8A,          0, 0},
+       {    0x0E8D,          0, 0},
+       {    0x0E94,     0x0E97, 1},
+       {    0x0E99,     0x0E9F, 1},
+       {    0x0EA1,     0x0EA3, 1},
+       {    0x0EA5,          0, 0},
+       {    0x0EA7,          0, 0},
+       {    0x0EAA,     0x0EAB, 1},
+       {    0x0EAD,     0x0EB0, 1},
+       {    0x0EB2,     0x0EB3, 1},
+       {    0x0EBD,          0, 0},
+       {    0x0EC0,     0x0EC4, 1},
+       {    0x0EC6,          0, 0},
+       {    0x0EDC,     0x0EDD, 1},
+       {    0x0F00,          0, 0},
+       {    0x0F40,     0x0F47, 1},
+       {    0x0F49,     0x0F6C, 1},
+       {    0x0F88,     0x0F8B, 1},
+       {    0x1000,     0x102A, 1},
+       {    0x1050,     0x1055, 1},
+       {    0x105A,     0x105D, 1},
+       {    0x1061,          0, 0},
+       {    0x0165,          0, 0},
+       {    0x1066,          0, 0},
+       {    0x106E,     0x1070, 1},
+       {    0x1075,     0x1081, 1},
+       {    0x108E,          0, 0},
+       {    0x10A0,     0x10C5, 1},
+       {    0x10D0,     0x10FA, 1},
+       {    0x10FC,          0, 0},
+       {    0x1100,     0x1159, 1},
+       {    0x115F,     0x11A2, 1},
+       {    0x11A8,     0x11F9, 1},
+       {    0x1200,     0x1248, 1},
+       {    0x124A,     0x124D, 1},
+       {    0x1250,     0x1256, 1},
+       {    0x1258,          0, 0},
+       {    0x125A,     0x125D, 1},
+       {    0x1260,     0x1288, 1},
+       {    0x128A,     0x128D, 1},
+       {    0x1290,     0x12B0, 1},
+       {    0x12B2,     0x12B5, 1},
+       {    0x12B8,     0x12BE, 1},
+       {    0x12C0,          0, 0},
+       {    0x12C2,     0x12C5, 1},
+       {    0x12C8,     0x12D6, 1},
+       {    0x12D8,     0x1310, 1},
+       {    0x1312,     0x1315, 1},
+       {    0x1318,     0x135A, 1},
+       {    0x1380,     0x138F, 1},
+       {    0x13A0,     0x13F4, 1},
+       {    0x1401,     0x166C, 1},
+       {    0x166F,     0x1676, 1},
+       {    0x1681,     0x169A, 1},
+       {    0x16A0,     0x16EA, 1},
+       {    0x16EE,     0x16F0, 1},
+       {    0x1700,     0x170C, 1},
+       {    0x170E,     0x1711, 1},
+       {    0x1720,     0x1731, 1},
+       {    0x1740,     0x1751, 1},
+       {    0x1760,     0x176C, 1},
+       {    0x176E,     0x1770, 1},
+       {    0x1780,     0x17B3, 1},
+       {    0x17D7,          0, 0},
+       {    0x17DC,          0, 0},
+       {    0x1820,     0x1877, 1},
+       {    0x1880,     0x18A8, 1},
+       {    0x18AA,          0, 0},
+       {    0x1900,     0x191C, 1},
+       {    0x1946,     0x194F, 1},
+       {    0x1950,     0x196D, 1},
+       {    0x1970,     0x1974, 1},
+       {    0x1980,     0x19A9, 1},
+       {    0x19C1,     0x19C7, 1},
+       {    0x19D0,     0x19D9, 1},
+       {    0x1A00,     0x1A16, 1},
+       {    0x1B05,     0x1B33, 1},
+       {    0x1B45,     0x1B4B, 1},
+       {    0x1B50,     0x1B59, 1},
+       {    0x1B83,     0x1BA0, 1},
+       {    0x1BAE,     0x1BAF, 1},
+       {    0x1C00,     0x1C23, 1},
+       {    0x1C4D,     0x1C4F, 1},
+       {    0x1C5A,     0x1C7D, 1},
+       {    0x1D00,     0x1DBF, 1},
+       {    0x1E00,     0x1E9F, 1},
+       {    0x1EA0,     0x1EFF, 1},
+       {    0x1F00,     0x1F15, 1},
+       {    0x1F18,     0x1F1D, 1},
+       {    0x1F20,     0x1F45, 1},
+       {    0x1F48,     0x1F4D, 1},
+       {    0x1F50,     0x1F57, 1},
+       {    0x1F59,          0, 0},
+       {    0x1F5B,          0, 0},
+       {    0x1F5D,          0, 0},
+       {    0x1F5F,     0x1F7D, 1},
+       {    0x1F80,     0x1FB4, 1},
+       {    0x1FB6,     0x1FBC, 1},
+       {    0x1FBE,          0, 0},
+       {    0x1FC2,     0x1FC4, 1},
+       {    0x1FC6,     0x1FCC, 1},
+       {    0x1FD0,     0x1FD3, 1},
+       {    0x1FD6,     0x1FDB, 1},
+       {    0x1FE0,     0x1FEC, 1},
+       {    0x1FF2,     0x1FF4, 1},
+       {    0x1FF6,     0x1FFC, 1},
+       {    0x2071,          0, 0},
+       {    0x207F,          0, 0},
+       {    0x2090,     0x2094, 1},
+       {    0x2102,          0, 0},
+       {    0x2107,          0, 0},
+       {    0x210A,     0x2113, 1},
+       {    0x2115,          0, 0},
+       {    0x2119,     0x211D, 1},
+       {    0x2124,          0, 0},
+       {    0x2126,          0, 0},
+       {    0x2128,     0x212D, 1},
+       {    0x212F,     0x2139, 1},
+       {    0x213C,     0x213F, 1},
+       {    0x2145,     0x2149, 1},
+       {    0x214E,          0, 0},
+       {    0x2160,     0x2188, 1},
+       {    0x249C,     0x24E9, 1},
+       {    0x2C00,     0x2C2E, 1},
+       {    0x2C30,     0x2C5E, 1},
+       {    0x2C60,     0x2C6F, 1},
+       {    0x2C71,     0x2C7D, 1},
+       {    0x2C80,     0x2CE4, 1},
+       {    0x2D00,     0x2D25, 1},
+       {    0x2D30,     0x2D65, 1},
+       {    0x2D6F,          0, 0},
+       {    0x2D80,     0x2D96, 1},
+       {    0x2DA0,     0x2DA6, 1},
+       {    0x2DA8,     0x2DAE, 1},
+       {    0x2DB0,     0x2DB6, 1},
+       {    0x2DB8,     0x2DBE, 1},
+       {    0x2DC0,     0x2DC6, 1},
+       {    0x2DC8,     0x2DCE, 1},
+       {    0x2DD0,     0x2DD6, 1},
+       {    0x2DD8,     0x2DDE, 1},
+       {    0x3005,     0x3007, 1},
+       {    0x3021,     0x3029, 1},
+       {    0x3031,     0x3035, 1},
+       {    0x3038,     0x303C, 1},
+       {    0x3041,     0x3096, 1},
+       {    0x309D,     0x309F, 1},
+       {    0x30A1,     0x30FA, 1},
+       {    0x30FC,     0x30FF, 1},
+       {    0x3105,     0x312D, 1},
+       {    0x3131,     0x318E, 1},
+       {    0x31A0,     0x31B7, 1},
+       {    0x31F0,     0x31FF, 1},
+       {    0x3400,     0x4DB5, 1},
+       {    0x4E00,     0x9FBB, 1},
+       {    0xA000,     0xA48C, 1},
+       {    0xA500,     0xA60B, 1},
+       {    0xA610,     0xA61F, 1},
+       {    0xA62A,     0xA62B, 1},
+       {    0xA640,     0xA65F, 1},
+       {    0xA662,     0xA66E, 1},
+       {    0xA680,     0xA697, 1},
+       {    0xA717,     0xA71F, 1},
+       {    0xA722,     0xA78C, 1},
+       {    0xA7FB,     0xA7FF, 1},
+       {    0xA800,          0, 0},
+       {    0xA801,          0, 0},
+       {    0xA803,     0xA805, 1},
+       {    0xA807,     0xA80A, 1},
+       {    0xA80C,     0xA822, 1},
+       {    0xA840,     0xA873, 1},
+       {    0xA882,     0xA8B3, 1},
+       {    0xA90A,     0xA92D, 1},
+       {    0xA930,     0xA946, 1},
+       {    0xAA00,     0xAA28, 1},
+       {    0xAA40,     0xAA42, 1},
+       {    0xAA44,     0xAA4B, 1},
+       {    0xAC00,     0xD7A3, 1},
+       {    0xF900,     0xFA2D, 1},
+       {    0xFA30,     0xFA6A, 1},
+       {    0xFA70,     0xFAD9, 1},
+       {    0xFB00,     0xFB06, 1},
+       {    0xFB13,     0xFB17, 1},
+       {    0xFB1D,          0, 0},
+       {    0xFB1F,     0xFB28, 1},
+       {    0xFB2A,     0xFB36, 1},
+       {    0xFB38,     0xFB3C, 1},
+       {    0xFB3E,          0, 0},
+       {    0xFB40,          0, 0},
+       {    0xFB41,          0, 0},
+       {    0xFB43,          0, 0},
+       {    0xFB44,          0, 0},
+       {    0xFB46,     0xFB4F, 1},
+       {    0xFB50,     0xFBB1, 1},
+       {    0xFBD3,     0xFD3D, 1},
+       {    0xFD50,     0xFD8F, 1},
+       {    0xFD92,     0xFDC7, 1},
+       {    0xFDF0,     0xFDFB, 1},
+       {    0xFE70,     0xFE74, 1},
+       {    0xFE76,     0xFEFC, 1},
+       {    0xFF21,     0xFF3A, 1},
+       {    0xFF41,     0xFF5A, 1},
+       {    0xFF66,     0xFFBE, 1},
+       {    0xFFC2,     0xFFC7, 1},
+       {    0xFFCA,     0xFFCF, 1},
+       {    0xFFD2,     0xFFD7, 1},
+       {    0xFFDA,     0xFFDC, 1},
+       {0x00010000, 0x0001000B, 1},
+       {0x0001000D, 0x00010026, 1},
+       {0x00010028, 0x0001003A, 1},
+       {0x0001003C, 0x0001003D, 1},
+       {0x0001003F, 0x0001004D, 1},
+       {0x00010050, 0x0001005D, 1},
+       {0x00010080, 0x000100FA, 1},
+       {0x00010140, 0x00010174, 1},
+       {0x00010280, 0x0001029C, 1},
+       {0x000102A0, 0x000102D0, 1},
+       {0x00010300, 0x0001031E, 1},
+       {0x00010330, 0x0001034A, 1},
+       {0x00010380, 0x0001039D, 1},
+       {0x000103A0, 0x000103C3, 1},
+       {0x000103C8, 0x000103CF, 1},
+       {0x000103D1, 0x000103D5, 1},
+       {0x00010400, 0x0001044F, 1},
+       {0x00010450, 0x0001047F, 1},
+       {0x00010480, 0x0001049D, 1},
+       {0x000104A0, 0x000104A9, 1},
+       {0x00010800, 0x00010805, 1},
+       {0x00010808,          0, 0},
+       {0x0001080A, 0x00010835, 1},
+       {0x00010837, 0x00010838, 1},
+       {0x0001083C,          0, 0},
+       {0x0001083F,          0, 0},
+       {0x00010900, 0x00010915, 1},
+       {0x00010A00,          0, 0},
+       {0x00010A10, 0x00010A13, 1},
+       {0x00010A15, 0x00010A17, 1},
+       {0x00010A19, 0x00010A33, 1},
+       {0x00012000, 0x0001236E, 1},
+       {0x00012400, 0x00012462, 1},
+       {0x0001D400, 0x0001D454, 1},
+       {0x0001D456, 0x0001D49C, 1},
+       {0x0001D49E, 0x0001D49F, 1},
+       {0x0001D4A2,          0, 0},
+       {0x0001D4A5, 0x0001D4A6, 1},
+       {0x0001D4A9, 0x0001D4AC, 1},
+       {0x0001D4AE, 0x0001D4B9, 1},
+       {0x0001D4BB,          0, 0},
+       {0x0001D4BD, 0x0001D4C3, 1},
+       {0x0001D4C5, 0x0001D505, 1},
+       {0x0001D507, 0x0001D50A, 1},
+       {0x0001D50D, 0x0001D514, 1},
+       {0x0001D516, 0x0001D51C, 1},
+       {0x0001D51E, 0x0001D539, 1},
+       {0x0001D53B, 0x0001D53E, 1},
+       {0x0001D540, 0x0001D544, 1},
+       {0x0001D546,          0, 0},
+       {0x0001D54A, 0x0001D550, 1},
+       {0x0001D552, 0x0001D6A5, 1},
+       {0x0001D6A8, 0x0001D6C0, 1},
+       {0x0001D6C2, 0x0001D6DA, 1},
+       {0x0001D6DC, 0x0001D6FA, 1},
+       {0x0001D6FC, 0x0001D714, 1},
+       {0x0001D716, 0x0001D734, 1},
+       {0x0001D736, 0x0001D74E, 1},
+       {0x0001D750, 0x0001D76E, 1},
+       {0x0001D770, 0x0001D788, 1},
+       {0x0001D78A, 0x0001D7A8, 1},
+       {0x0001D7AA, 0x0001D7C2, 1},
+       {0x0001D7C4, 0x0001D7CB, 1},
+       {0x0001D7CE, 0x0001D7FF, 1},
+       {0x00020000, 0x0002A6D6, 1},
+       {0x0002F800, 0x0002FA1D, 1},
+       {    0x0660,     0x0669, 1},
+       {    0x06F0,     0x06F9, 1},
+       {    0x0966,     0x096F, 1},
+       {    0x09E6,     0x09EF, 1},
+       {    0x0A66,     0x0A6F, 1},
+       {    0x0AE6,     0x0AEF, 1},
+       {    0x0B66,     0x0B6F, 1},
+       {    0x0BE6,     0x0BEF, 1},
+       {    0x0C66,     0x0C6F, 1},
+       {    0x0C78,     0x0C7F, 1},
+       {    0x0CE6,     0x0CEF, 1},
+       {    0x0D66,     0x0D75, 1},
+       {    0x0D70,     0x0D75, 1},
+       {    0x0E50,     0x0E59, 1},
+       {    0x0ED0,     0x0ED9, 1},
+       {    0x0F20,     0x0F29, 1},
+       {    0x1040,     0x1049, 1},
+       {    0x17E0,     0x17E9, 1},
+       {    0x1810,     0x1819, 1},
+       {    0x1BB0,     0x1BB9, 1},
+       {    0x1C40,     0x1C49, 1},
+       {    0x1C50,     0x1C59, 1},
+       {    0xA620,     0xA629, 1},
+       {    0xA8D0,     0xA8D9, 1},
+       {    0xA900,     0xA909, 1},
+       {    0xAA50,     0xAA59, 1},
+       {    0xFF10,     0xFF19, 1},
+
+/* END::UTF8TABLE */
+};
+
+static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len)
+{
+       unsigned char ub = utf8_mblen[*uc];
+
+       if (!ub || ub > len || ub > 3) {
+               return 0;
+       }
+
+       *wc = *uc & utf8_mask[ub];
+
+       switch (ub) {
+       case 4:
+               if ((uc[1] & 0xc0) != 0x80) {
+                       return 0;
+               }
+               *wc <<= 6;
+               *wc += *++uc & 0x3f;
+               /* no break */
+       case 3:
+               if ((uc[1] & 0xc0) != 0x80) {
+                       return 0;
+               }
+               *wc <<= 6;
+               *wc += *++uc & 0x3f;
+               /* no break */
+       case 2:
+               if ((uc[1] & 0xc0) != 0x80) {
+                       return 0;
+               }
+               *wc <<= 6;
+               *wc += *++uc & 0x3f;
+               /* no break */
+       case 1:
+               break;
+
+       default:
+               return 0;
+       }
+
+       return ub;
+}
+
+static inline zend_bool isualpha(unsigned ch)
+{
+       unsigned i, j;
+
+       for (i = 0; i < sizeof(utf8_ranges)/sizeof(utf8_range_t); ++i) {
+               if (utf8_ranges[i].start == ch) {
+                       return 1;
+               } else if (utf8_ranges[i].start <= ch && utf8_ranges[i].end >= ch) {
+                       if (utf8_ranges[i].step == 1) {
+                               return 1;
+                       }
+                       for (j = utf8_ranges[i].start; j <= utf8_ranges[i].end; j+= utf8_ranges[i].step) {
+                               if (ch == j) {
+                                       return 1;
+                               }
+                       }
+                       return 0;
+               }
+       }
+       return 0;
+}
+
+static inline zend_bool isualnum(unsigned ch)
+{
+       /* digits */
+       if (ch >= 0x30 && ch <= 0x39) {
+               return 1;
+       }
+       return isualpha(ch);
+}
+
+#endif /* PHP_HTTP_UTF8_H */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
index fbe94118cf53532bc603b52fbe46f160f14a3951..f4fcfc85367417dee7cb8f711fe39d89f18250b3 100644 (file)
@@ -26,7 +26,7 @@ php_http_version_t *php_http_version_init(php_http_version_t *v, unsigned major,
 
 php_http_version_t *php_http_version_parse(php_http_version_t *v, const char *str TSRMLS_DC)
 {
-       php_http_version_t tmp;
+       long major, minor;
        char separator = 0, *stop = NULL;
        register const char *ptr = str;
 
@@ -40,17 +40,17 @@ php_http_version_t *php_http_version_parse(php_http_version_t *v, const char *st
                ++ptr;
                /* no break */
        default:
-               tmp.major = strtol(ptr, &stop, 10);
-               if (stop && stop != ptr && tmp.major != LONG_MIN && tmp.major != LONG_MAX) {
+               major = strtol(ptr, &stop, 10);
+               if (stop && stop != ptr && major != LONG_MIN && major != LONG_MAX) {
                        separator = *stop;
                        if (separator) {
                                if (separator != '.' && separator != ',') {
                                        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Non-standard version separator '%c' in HTTP protocol version '%s'", separator, ptr);
                                }
                                ptr = stop + 1;
-                               tmp.minor = strtol(ptr, &stop, 10);
-                               if (tmp.minor != LONG_MIN && tmp.minor != LONG_MAX) {
-                                       return php_http_version_init(v, tmp.major, tmp.minor TSRMLS_CC);
+                               minor = strtol(ptr, &stop, 10);
+                               if (minor != LONG_MIN && minor != LONG_MAX) {
+                                       return php_http_version_init(v, major, minor TSRMLS_CC);
                                }
                        }
                }
diff --git a/tests/bug66891.phpt b/tests/bug66891.phpt
new file mode 100644 (file)
index 0000000..0fd84f8
--- /dev/null
@@ -0,0 +1,18 @@
+--TEST--
+Bug #66891 (Unexpected HTTP 401 after NTLM authentication)
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--GET--
+dummy=1
+--FILE--
+<?php
+header("WWW-Authenticate: none");
+$r = new http\Env\Response;
+$r->setResponseCode(200);
+$r->send();
+var_dump(http_response_code());
+?>
+--EXPECT--
+int(200)
\ No newline at end of file
diff --git a/tests/bug67932.phpt b/tests/bug67932.phpt
new file mode 100644 (file)
index 0000000..7ab8251
--- /dev/null
@@ -0,0 +1,18 @@
+--TEST--
+Bug #67932 (php://input always empty)
+--SKIPIF--
+<?php 
+include "skipif.inc";
+?>
+--PUT--
+Content-Type: text/xml
+
+<?xml version="1.0" encoding="utf-8" ?>
+<body>test</body>
+--FILE--
+<?php
+readfile("php://input");
+?>
+--EXPECT--
+<?xml version="1.0" encoding="utf-8" ?>
+<body>test</body>
\ No newline at end of file
diff --git a/tests/messageparser002.phpt b/tests/messageparser002.phpt
new file mode 100644 (file)
index 0000000..2030e93
--- /dev/null
@@ -0,0 +1,66 @@
+--TEST--
+message parser with nonblocking stream
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php 
+echo "Test\n";
+
+$parser = new http\Message\Parser;
+$socket = stream_socket_pair(STREAM_PF_UNIX, STREAM_SOCK_STREAM, STREAM_IPPROTO_IP);
+stream_set_blocking($socket[0], 0);
+
+$message = array(
+"GET / HTTP/1.1\n",
+"Host: localhost\n",
+"Content-length: 3\n",
+"\n",
+"OK\n" 
+);
+
+while ($message) {
+       $line = array_shift($message);
+       $parser->stream($socket[0], 0, $msg);
+       fwrite($socket[1], $line);
+       $parser->stream($socket[0], 0, $msg);
+}
+
+var_dump($msg, (string) $msg->getBody());
+
+?>
+DONE
+--EXPECTF--
+Test
+object(http\Message)#%d (9) {
+  ["type":protected]=>
+  int(1)
+  ["body":protected]=>
+  object(http\Message\Body)#2 (0) {
+  }
+  ["requestMethod":protected]=>
+  string(3) "GET"
+  ["requestUrl":protected]=>
+  string(1) "/"
+  ["responseStatus":protected]=>
+  string(0) ""
+  ["responseCode":protected]=>
+  int(0)
+  ["httpVersion":protected]=>
+  string(3) "1.1"
+  ["headers":protected]=>
+  array(3) {
+    ["Host"]=>
+    string(9) "localhost"
+    ["Content-Length"]=>
+    int(3)
+    ["X-Original-Content-Length"]=>
+    string(1) "3"
+  }
+  ["parentMessage":protected]=>
+  NULL
+}
+string(3) "OK
+"
+DONE
\ No newline at end of file
diff --git a/tests/urlparser001.phpt b/tests/urlparser001.phpt
new file mode 100644 (file)
index 0000000..73bd9d4
--- /dev/null
@@ -0,0 +1,191 @@
+--TEST--
+url parser
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s:",
+       "ss:",
+       "s:a",
+       "ss:aa",
+       "s://",
+       "ss://",
+       "s://a",
+       "ss://aa",
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url));
+}
+
+?>
+DONE
+--EXPECTF--
+Test
+
+s:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s:a
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(1) "a"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss:aa
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(2) "aa"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss://
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://a
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "a"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss://aa
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(2) "aa"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser002.phpt b/tests/urlparser002.phpt
new file mode 100644 (file)
index 0000000..be1cd66
--- /dev/null
@@ -0,0 +1,211 @@
+--TEST--
+url parser with paths
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s:a/",
+       "ss:aa/",
+       "s:/a/",
+       "ss:/aa/",
+       "s://a/",
+       "s://h/a",
+       "ss://hh/aa",
+       "s:///a/b",
+       "ss:///aa/bb",
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s:a/
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(2) "a/"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss:aa/
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(3) "aa/"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s:/a/
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(3) "/a/"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss:/aa/
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(4) "/aa/"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://a/
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "a"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(1) "/"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://h/a
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(2) "/a"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss://hh/aa
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(2) "hh"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(3) "/aa"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s:///a/b
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(4) "/a/b"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+ss:///aa/bb
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(6) "/aa/bb"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser003.phpt b/tests/urlparser003.phpt
new file mode 100644 (file)
index 0000000..68b1e4a
--- /dev/null
@@ -0,0 +1,274 @@
+--TEST--
+url parser with query
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s:?q",
+       "ss:?qq",
+       "s:/?q",
+       "ss:/?qq",
+       "s://?q",
+       "ss://?qq",
+       "s://h?q",
+       "ss://hh?qq",
+       "s://h/p?q",
+       "ss://hh/pp?qq",
+       "s://h:123/p/?q",
+       "ss://hh:123/pp/?qq",
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s:?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss:?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+
+s:/?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(1) "/"
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss:/?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(1) "/"
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+
+s://?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss://?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+
+s://h?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss://hh?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(2) "hh"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+
+s://h/p?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(2) "/p"
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss://hh/pp?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(2) "hh"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(3) "/pp"
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+
+s://h:123/p/?q
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  int(123)
+  ["path"]=>
+  string(3) "/p/"
+  ["query"]=>
+  string(1) "q"
+  ["fragment"]=>
+  NULL
+}
+
+ss://hh:123/pp/?qq
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(2) "ss"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(2) "hh"
+  ["port"]=>
+  int(123)
+  ["path"]=>
+  string(4) "/pp/"
+  ["query"]=>
+  string(2) "qq"
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser004.phpt b/tests/urlparser004.phpt
new file mode 100644 (file)
index 0000000..3aa57fd
--- /dev/null
@@ -0,0 +1,89 @@
+--TEST--
+url parser multibyte/locale
+--SKIPIF--
+<?php
+include "skipif.inc";
+if (!defined("http\\Url::PARSE_MBLOC") or
+       !stristr(setlocale(LC_CTYPE, NULL), "utf")) {
+       die("skip need http\\Url::PARSE_MBLOC support and LC_CTYPE=*.UTF-8");
+}
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s\xc3\xa7heme:",
+       "s\xc3\xa7heme://h\xc6\x9fst",
+       "s\xc3\xa7heme://h\xc6\x9fst:23/päth/öf/fıle"
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url, http\Url::PARSE_MBLOC));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+sçheme:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "hƟst"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst:23/päth/öf/fıle
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "hƟst"
+  ["port"]=>
+  int(23)
+  ["path"]=>
+  string(16) "/päth/öf/fıle"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser005.phpt b/tests/urlparser005.phpt
new file mode 100644 (file)
index 0000000..ff18fe4
--- /dev/null
@@ -0,0 +1,85 @@
+--TEST--
+url parser multibyte/utf-8
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s\xc3\xa7heme:",
+       "s\xc3\xa7heme://h\xc6\x9fst",
+       "s\xc3\xa7heme://h\xc6\x9fst:23/päth/öf/fıle"
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url, http\Url::PARSE_MBUTF8));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+sçheme:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "hƟst"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst:23/päth/öf/fıle
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "hƟst"
+  ["port"]=>
+  int(23)
+  ["path"]=>
+  string(16) "/päth/öf/fıle"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser006.phpt b/tests/urlparser006.phpt
new file mode 100644 (file)
index 0000000..72ee358
--- /dev/null
@@ -0,0 +1,90 @@
+--TEST--
+url parser multibyte/locale/idna
+--SKIPIF--
+<?php
+include "skipif.inc";
+if (!defined("http\\Url::PARSE_MBLOC") or
+       !defined("http\\Url::PARSE_TOIDN") or
+       !stristr(setlocale(LC_CTYPE, NULL), ".utf")) {
+       die("skip need http\\Url::PARSE_MBLOC|http\\Url::PARSE_TOIDN support and LC_CTYPE=*.UTF-8");
+}
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s\xc3\xa7heme:",
+       "s\xc3\xa7heme://h\xc6\x9fst",
+       "s\xc3\xa7heme://h\xc6\x9fst:23/päth/öf/fıle"
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url, http\Url::PARSE_MBLOC|http\Url::PARSE_TOIDN));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+sçheme:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(11) "xn--hst-kwb"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst:23/päth/öf/fıle
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(11) "xn--hst-kwb"
+  ["port"]=>
+  int(23)
+  ["path"]=>
+  string(16) "/päth/öf/fıle"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser007.phpt b/tests/urlparser007.phpt
new file mode 100644 (file)
index 0000000..518bb72
--- /dev/null
@@ -0,0 +1,88 @@
+--TEST--
+url parser multibyte/utf-8/idna
+--SKIPIF--
+<?php
+include "skipif.inc";
+if (!defined("http\\Url::PARSE_TOIDN")) {
+       die("skip need http\\Url::PARSE_TOIDN support");
+}
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s\xc3\xa7heme:",
+       "s\xc3\xa7heme://h\xc6\x9fst",
+       "s\xc3\xa7heme://h\xc6\x9fst:23/päth/öf/fıle"
+);
+
+foreach ($urls as $url) {
+       printf("\n%s\n", $url);
+       var_dump(http\Url::parse($url, http\Url::PARSE_MBUTF8|http\Url::PARSE_TOIDN));
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+sçheme:
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(11) "xn--hst-kwb"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+sçheme://hƟst:23/päth/öf/fıle
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(7) "sçheme"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(11) "xn--hst-kwb"
+  ["port"]=>
+  int(23)
+  ["path"]=>
+  string(16) "/päth/öf/fıle"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser008.phpt b/tests/urlparser008.phpt
new file mode 100644 (file)
index 0000000..98382f4
--- /dev/null
@@ -0,0 +1,76 @@
+--TEST--
+url parser ipv6
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s://[a:80",
+       "s://[0]",
+       "s://[::1]:80",
+       "s://mike@[0:0:0:0:0:FFFF:204.152.189.116]/foo",
+);
+
+foreach ($urls as $url) {
+       try {
+               printf("\n%s\n", $url);
+               var_dump(http\Url::parse($url));
+       } catch (Exception $e) {
+               echo $e->getMessage(),"\n";
+       }
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s://[a:80
+http\Url::parse(): Failed to parse hostinfo; expected ']'
+
+s://[0]
+http\Url::parse(): Failed to parse hostinfo; unexpected '['
+
+s://[::1]:80
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "[::1]"
+  ["port"]=>
+  int(80)
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://mike@[0:0:0:0:0:FFFF:204.152.189.116]/foo
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "mike"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(24) "[::ffff:204.152.189.116]"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(4) "/foo"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser009.phpt b/tests/urlparser009.phpt
new file mode 100644 (file)
index 0000000..f3e2b83
--- /dev/null
@@ -0,0 +1,278 @@
+--TEST--
+url parser userinfo
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s://:@",
+       "s://u@",
+       "s://u:@",
+       "s://u:p@",
+       "s://user:pass@",
+       "s://user:pass@host",
+       "s://u@h",
+       "s://user@h",
+       "s://u@host",
+       "s://user:p@h",
+       "s://user:pass@h",
+       "s://user:pass@host",
+);
+
+foreach ($urls as $url) {
+       try {
+               printf("\n%s\n", $url);
+               var_dump(http\Url::parse($url));
+       } catch (Exception $e) {
+               echo $e->getMessage(),"\n";
+       }
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s://:@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(0) ""
+  ["pass"]=>
+  string(0) ""
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u:@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  string(0) ""
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u:p@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  string(1) "p"
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:p@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(1) "p"
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser010.phpt b/tests/urlparser010.phpt
new file mode 100644 (file)
index 0000000..a82b7a8
--- /dev/null
@@ -0,0 +1,40 @@
+--TEST--
+url parser multibyte/utf-8/topct
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "http://mike:paßwort@sörver.net/for/€/?by=¢#ø"
+);
+
+foreach ($urls as $url) {
+       var_dump(http\Url::parse($url, http\Url::PARSE_MBUTF8|http\Url::PARSE_TOPCT));
+}
+?>
+DONE
+--EXPECTF--
+Test
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(4) "http"
+  ["user"]=>
+  string(4) "mike"
+  ["pass"]=>
+  string(12) "pa%C3%9Fwort"
+  ["host"]=>
+  string(11) "sörver.net"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(15) "/for/%E2%82%AC/"
+  ["query"]=>
+  string(9) "by=%C2%A2"
+  ["fragment"]=>
+  string(6) "%C3%B8"
+}
+DONE