simplify
[m6w6/ext-http] / php_http_url.c
index 175886d561f2cffe7ccb423114827dfa5edebd42..3c3fa4d58d8e2c687e7f91216f73c1b81eacda22 100644 (file)
@@ -6,12 +6,27 @@
     | modification, are permitted provided that the conditions mentioned |
     | in the accompanying LICENSE file are met.                          |
     +--------------------------------------------------------------------+
-    | Copyright (c) 2004-2013, Michael Wallner <mike@php.net>            |
+    | Copyright (c) 2004-2014, Michael Wallner <mike@php.net>            |
     +--------------------------------------------------------------------+
 */
 
 #include "php_http_api.h"
 
+#ifdef PHP_HTTP_HAVE_IDN
+#      include <idna.h>
+#endif
+
+#ifdef PHP_HTTP_HAVE_WCHAR
+#      include <wchar.h>
+#      include <wctype.h>
+#endif
+
+#ifdef HAVE_ARPA_INET_H
+#      include <arpa/inet.h>
+#endif
+
+#include "php_http_utf8.h"
+
 static inline char *localhostname(void)
 {
        char hostname[1024] = {0};
@@ -42,94 +57,6 @@ static inline char *localhostname(void)
        return estrndup("localhost", lenof("localhost"));
 }
 
-static inline unsigned port(const char *scheme)
-{
-       unsigned port = 80;
-
-#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R)
-       int rc;
-       size_t len = 0xff;
-       char *buf = NULL;
-       struct servent *se_res = NULL, se_buf = {0};
-
-       do {
-               buf = erealloc(buf, len);
-               rc = getservbyname_r(scheme, "tcp", &se_buf, buf, len, &se_res);
-               len *= 2;
-       } while (rc == ERANGE && len <= 0xfff);
-
-       if (!rc) {
-               port = ntohs(se_res->s_port);
-       }
-
-       efree(buf);
-#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT)
-       struct servent *se;
-
-       if ((se = getservbyname(scheme, "tcp")) && se->s_port) {
-               port = ntohs(se->s_port);
-       }
-#endif
-
-       return port;
-}
-static inline char *scheme(unsigned port)
-{
-       char *scheme;
-#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R)
-       int rc;
-       size_t len = 0xff;
-       char *buf = NULL;
-       struct servent *se_res = NULL, se_buf = {0};
-#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT)
-       struct servent *se;
-#endif
-
-       switch (port) {
-       case 443:
-               scheme = estrndup("https", lenof("https"));
-               break;
-
-#if defined(ZTS) && !defined(HAVE_GETSERVBYPORT_R)
-       default:
-#elif !defined(ZTS) && !defined(HAVE_GETSERVBYPORT)
-       default:
-#endif
-       case 80:
-       case 0:
-               scheme = estrndup("http", lenof("http"));
-               break;
-
-#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R)
-       default:
-               do {
-                       buf = erealloc(buf, len);
-                       rc = getservbyport_r(htons(port), "tcp", &se_buf, buf, len, &se_res);
-                       len *= 2;
-               } while (rc == ERANGE && len <= 0xfff);
-
-               if (!rc && se_res) {
-                       scheme = estrdup(se_res->s_name);
-               } else {
-                       scheme = estrndup("http", lenof("http"));
-               }
-
-               efree(buf);
-               break;
-
-#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT)
-       default:
-               if ((se = getservbyport(htons(port), "tcp")) && se->s_name) {
-                       scheme = estrdup(se->s_name);
-               } else {
-                       scheme = estrndup("http", lenof("http"));
-               }
-               break;
-#endif
-       }
-       return scheme;
-}
-
 static php_url *php_http_url_from_env(php_url *url TSRMLS_DC)
 {
        zval *https, *zhost, *zport;
@@ -150,7 +77,7 @@ static php_url *php_http_url_from_env(php_url *url TSRMLS_DC)
        if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) {
                url->scheme = estrndup("https", lenof("https"));
        } else {
-               url->scheme = scheme(url->port);
+               url->scheme = estrndup("http", lenof("http"));
        }
 
        /* host */
@@ -183,7 +110,7 @@ static php_url *php_http_url_from_env(php_url *url TSRMLS_DC)
        return url;
 }
 
-PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
+void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
 {
        php_url *url, *tmp_url = NULL;
 
@@ -339,7 +266,6 @@ PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url
        if (url->port) {
                if (    ((url->port == 80) && !strcmp(url->scheme, "http"))
                        ||      ((url->port ==443) && !strcmp(url->scheme, "https"))
-                       ||      ( url->port == port(url->scheme))
                ) {
                        url->port = 0;
                }
@@ -356,7 +282,7 @@ PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url
        }
 }
 
-PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
+STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
 {
        const char *arg_sep_str;
        size_t arg_sep_len;
@@ -375,7 +301,7 @@ PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_en
        return SUCCESS;
 }
 
-PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
+STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
 {
        if (pre_encoded_len && pre_encoded_str) {
                php_http_buffer_append(qstr, pre_encoded_str, pre_encoded_len);
@@ -388,6 +314,626 @@ PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer
        return SUCCESS;
 }
 
+void php_http_url_free(php_http_url_t **url)
+{
+       if (*url) {
+               efree(*url);
+               *url = NULL;
+       }
+}
+
+static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
+{
+       unsigned wchar;
+       size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr);
+
+       if (!consumed || consumed == (size_t) -1) {
+               return 0;
+       }
+       if (!idn && !isualnum(wchar)) {
+               return 0;
+       }
+
+       return consumed;
+}
+
+#ifdef PHP_HTTP_HAVE_WCHAR
+static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
+{
+       wchar_t wchar;
+       size_t consumed = 0;
+#if defined(HAVE_MBRTOWC)
+       mbstate_t ps = {0};
+
+       consumed = mbrtowc(&wchar, ptr, end - ptr, &ps);
+#elif defined(HAVE_MBTOWC)
+       consumed = mbtowc(&wchar, ptr, end - ptr);
+#endif
+
+       if (!consumed || consumed == (size_t) -1) {
+               return 0;
+       }
+       if (!idn && !iswalnum(wchar)) {
+               return 0;
+       }
+
+       return consumed;
+}
+#endif
+
+typedef enum parse_mb_what {
+       PARSE_SCHEME,
+       PARSE_USERINFO,
+       PARSE_HOSTINFO,
+       PARSE_PATH,
+       PARSE_QUERY,
+       PARSE_FRAGMENT
+} parse_mb_what_t;
+
+static const char * const parse_what[] = {
+       "scheme",
+       "userinfo",
+       "hostinfo",
+       "path",
+       "query",
+       "fragment"
+};
+
+static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent)
+{
+       size_t consumed = 0;
+       zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN);
+
+       if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+               consumed = parse_mb_utf8(url, ptr, end, idn);
+       }
+#ifdef PHP_HTTP_HAVE_WCHAR
+       else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+               consumed = parse_mb_loc(url, ptr, end, idn);
+       }
+#endif
+
+       if (consumed) {
+               PHP_HTTP_DUFF(consumed, url->buffer[url->offset++] = *ptr++);
+       } else if (!silent) {
+               TSRMLS_FETCH_FROM_CTX(url->ts);
+               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                               "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
+                               parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
+       }
+
+       return consumed;
+}
+
+static STATUS parse_userinfo(php_http_url_t *url, const char *ptr)
+{
+       size_t mb;
+       const char *password = NULL, *end = url->ptr, *tmp = ptr;
+       TSRMLS_FETCH_FROM_CTX(url->ts);
+
+       do {
+               switch (*ptr) {
+               case ':':
+                       if (password) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse password; duplicate ':' at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       password = ptr + 1;
+                       url->buffer[url->offset++] = *ptr;
+                       break;
+
+               case '%':
+                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr;
+                       break;
+
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       url->buffer[url->offset++] = *ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(url, PARSE_USERINFO, ptr, end, tmp, 0))) {
+                               return FAILURE;
+                       }
+                       ptr += mb - 1;
+               }
+       } while(++ptr != end);
+
+       if (password) {
+               url->user = &url->buffer[url->offset - (end - password) - (password - tmp)];
+               url->buffer[url->offset - (end - password) - 1] = 0;
+               url->pass = &url->buffer[url->offset - (end - password)];
+               url->buffer[url->offset++] = 0;
+       } else {
+               url->user = &url->buffer[url->offset - (end - tmp)];
+               url->buffer[url->offset++] = 0;
+       }
+
+       return SUCCESS;
+}
+
+static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr)
+{
+       size_t mb, len;
+       const char *end = url->ptr, *tmp = ptr, *port = NULL;
+       TSRMLS_FETCH_FROM_CTX(url->ts);
+
+
+#ifdef HAVE_INET_PTON
+       if (*ptr == '[') {
+               char *error = NULL, *tmp = memchr(ptr, ']', end - ptr);
+
+               if (tmp) {
+                       size_t addrlen = tmp - ptr + 1;
+                       char buf[16], *addr = estrndup(ptr + 1, addrlen - 2);
+                       int rv = inet_pton(AF_INET6, addr, buf);
+
+                       efree(addr);
+                       if (rv == 1) {
+                               url->buffer[url->offset] = '[';
+                               url->host = &url->buffer[url->offset];
+                               inet_ntop(AF_INET6, buf, url->host + 1, url->maxlen - url->offset);
+                               url->offset += strlen(url->host);
+                               url->buffer[url->offset++] = ']';
+                               url->buffer[url->offset++] = 0;
+                               ptr = tmp + 1;
+                       } else if (rv == -1) {
+                               error = strerror(errno);
+                       } else {
+                               error = "unexpected '['";
+                       }
+               } else {
+                       error = "expected ']'";
+               }
+
+               if (error) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse hostinfo; %s", error);
+                       return FAILURE;
+               }
+       }
+#endif
+       if (ptr != end) do {
+               switch (*ptr) {
+               case ':':
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; duplicate ':' at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       port = ptr + 1;
+                       break;
+
+               case '%':
+                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr;
+                       break;
+
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
+                                               (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       }
+                       /* no break */
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       if (port) {
+                               url->port *= 10;
+                               url->port += *ptr - '0';
+                       } else {
+                               url->buffer[url->offset++] = *ptr;
+                       }
+                       break;
+
+               default:
+                       if (port) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
+                                               (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+                               return FAILURE;
+                       } else if (!(mb = parse_mb(url, PARSE_HOSTINFO, ptr, end, tmp, 0))) {
+                               return FAILURE;
+                       }
+                       ptr += mb - 1;
+               }
+       } while (++ptr != end);
+
+       if (!url->host) {
+               len = (port ? port - tmp - 1 : end - tmp);
+               url->host = &url->buffer[url->offset - len];
+               url->buffer[url->offset++] = 0;
+       }
+
+#ifdef PHP_HTTP_HAVE_IDN
+       if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
+               char *idn = NULL;
+               int rv = -1;
+
+               if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+                       rv = idna_to_ascii_8z(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+               }
+#      ifdef PHP_HTTP_HAVE_WCHAR
+               else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+                       rv = idna_to_ascii_lz(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+               }
+#      endif
+               if (rv != IDNA_SUCCESS) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
+                       return FAILURE;
+               } else {
+                       size_t idnlen = strlen(idn);
+                       memcpy(url->host, idn, idnlen + 1);
+                       free(idn);
+                       url->offset += idnlen - len;
+               }
+       }
+#endif
+
+       return SUCCESS;
+}
+
+static const char *parse_authority(php_http_url_t *url)
+{
+       const char *tmp = url->ptr;
+
+       do {
+               switch (*url->ptr) {
+               case '@':
+                       /* userinfo delimiter */
+                       if (tmp != url->ptr && SUCCESS != parse_userinfo(url, tmp)) {
+                               return NULL;
+                       }
+                       tmp = url->ptr + 1;
+                       break;
+
+               case '/':
+               case '?':
+               case '#':
+               case '\0':
+                       /* host delimiter */
+                       if (tmp != url->ptr && SUCCESS != parse_hostinfo(url, tmp)) {
+                               return NULL;
+                       }
+                       return url->ptr;
+               }
+       } while (++url->ptr <= url->end);
+
+       return NULL;
+}
+
+static const char *parse_path(php_http_url_t *url)
+{
+       size_t mb;
+       const char *tmp;
+       TSRMLS_FETCH_FROM_CTX(url->ts);
+
+       /* is there actually a path to parse? */
+       if (!*url->ptr) {
+               return url->ptr;
+       }
+       tmp = url->ptr;
+
+       do {
+               switch (*url->ptr) {
+               case '?':
+               case '\0':
+                       /* did we have any path component ? */
+                       if (tmp != url->ptr) {
+                               url->path = &url->buffer[url->offset - (url->ptr - tmp)];
+                               url->buffer[url->offset++] = 0;
+                       }
+                       return url->ptr;
+
+               case '%':
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse path; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (url->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               case '/': /* yeah, well */
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(url, PARSE_PATH, url->ptr, url->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       url->ptr += mb - 1;
+               }
+       } while (++url->ptr <= url->end);
+
+       return NULL;
+}
+
+static const char *parse_query(php_http_url_t *url)
+{
+       size_t mb;
+       const char *tmp = url->ptr + !!*url->ptr;
+       TSRMLS_FETCH_FROM_CTX(url->ts);
+
+       /* is there actually a query to parse ? */
+       if (!*url->ptr || *url->ptr != '?') {
+               return url->ptr;
+       }
+
+       /* skip initial '?' */
+       tmp = url->ptr + 1;
+
+       do {
+               switch (*url->ptr) {
+               case '#':
+               case '\0':
+                       url->query = &url->buffer[url->offset - (url->ptr - tmp)];
+                       url->buffer[url->offset++] = 0;
+                       return url->ptr;
+
+               case '%':
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse query; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (url->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               case '?': case '/': /* yeah, well */
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(url, PARSE_QUERY, url->ptr, url->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       url->ptr += mb - 1;
+               }
+       } while (++url->ptr <= url->end);
+
+       return NULL;
+}
+
+static const char *parse_fragment(php_http_url_t *url)
+{
+       size_t mb;
+       const char *tmp;
+       TSRMLS_FETCH_FROM_CTX(url->ts);
+
+       /* is there actually a fragment to parse */
+       if (!*url->ptr || *url->ptr != '#') {
+               return url->ptr;
+       }
+
+       /* skip initial '#' */
+       tmp = url->ptr + 1;
+
+       do {
+               switch (*url->ptr) {
+               case '\0':
+                       url->fragment = &url->buffer[url->offset - (url->ptr - tmp)];
+                       url->buffer[url->offset++] = 0;
+                       return url->ptr;
+
+               case '%':
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse fragment; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (url->ptr - tmp), tmp);
+                               return NULL;
+                       }
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               case '?': case '/': /* yeah, well */
+               case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+               case '+': case ',': case ';': case '=': /* sub-delims */
+               case '-': case '.': case '_': case '~': /* unreserved */
+               case ':': case '@': /* pchar */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+                       /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(url, PARSE_FRAGMENT, url->ptr, url->end, tmp, 0))) {
+                               return NULL;
+                       }
+                       url->ptr += mb - 1;
+               }
+       } while (++url->ptr <= url->end);
+
+       return NULL;
+}
+
+static const char *parse_hier(php_http_url_t *url)
+{
+       if (*url->ptr == '/') {
+               if (url->end - url->ptr > 1) {
+                       if (*(url->ptr + 1) == '/') {
+                               url->ptr += 2;
+                               if (!(url->ptr = parse_authority(url))) {
+                                       return NULL;
+                               }
+                       }
+               }
+       }
+       return parse_path(url);
+}
+
+static const char *parse_scheme(php_http_url_t *url)
+{
+       size_t mb;
+       const char *tmp = url->ptr;
+
+       do {
+               switch (*url->ptr) {
+               case ':':
+                       /* scheme delimiter */
+                       url->scheme = &url->buffer[0];
+                       url->buffer[url->offset++] = 0;
+                       return ++url->ptr;
+
+               case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+               case '7': case '8': case '9':
+               case '+': case '-': case '.':
+                       if (url->ptr == tmp) {
+                               return tmp;
+                       }
+                       /* no break */
+               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+               case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+               case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+               case 'V': case 'W': case 'X': case 'Y': case 'Z':
+               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+               case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+               case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+               case 'v': case 'w': case 'x': case 'y': case 'z':
+                       /* scheme part */
+                       url->buffer[url->offset++] = *url->ptr;
+                       break;
+
+               default:
+                       if (!(mb = parse_mb(url, PARSE_SCHEME, url->ptr, url->end, tmp, 1))) {
+                               /* soft fail; parse path next */
+                               return tmp;
+                       }
+                       url->ptr += mb - 1;
+               }
+       } while (++url->ptr != url->end);
+
+       return tmp;
+}
+
+struct parser_state {
+};
+
+php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC)
+{
+       size_t maxlen = 3 * len;
+       php_http_url_t *url = ecalloc(1, sizeof(*url) + maxlen);
+
+       url->end = str + len;
+       url->ptr = str;
+       url->flags = flags;
+       url->maxlen = maxlen;
+       TSRMLS_SET_CTX(url->ts);
+
+       if (!parse_scheme(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", url->ptr);
+               php_http_url_free(&url);
+               return NULL;
+       }
+
+       if (!parse_hier(url)) {
+               php_http_url_free(&url);
+               return NULL;
+       }
+
+       if (!parse_query(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", url->ptr);
+               php_http_url_free(&url);
+               return NULL;
+       }
+
+       if (!parse_fragment(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", url->ptr);
+               php_http_url_free(&url);
+               return NULL;
+       }
+
+       return url;
+}
+
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0)
        ZEND_ARG_INFO(0, old_url)
        ZEND_ARG_INFO(0, new_url)
@@ -395,67 +941,70 @@ ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0)
 ZEND_END_ARG_INFO();
 PHP_METHOD(HttpUrl, __construct)
 {
-       with_error_handling(EH_THROW, php_http_exception_class_entry) {
-               zval *new_url = NULL, *old_url = NULL;
-               long flags = PHP_HTTP_URL_FROM_ENV;
-
-               if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags)) {
-                       with_error_handling(EH_THROW, php_http_exception_class_entry) {
-                               php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
-
-                               if (new_url) {
-                                       switch (Z_TYPE_P(new_url)) {
-                                               case IS_OBJECT:
-                                               case IS_ARRAY:
-                                                       new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
-                                                       break;
-                                               default: {
-                                                       zval *cpy = php_http_ztyp(IS_STRING, new_url);
+       zval *new_url = NULL, *old_url = NULL;
+       long flags = PHP_HTTP_URL_FROM_ENV;
+       zend_error_handling zeh;
 
-                                                       new_purl = php_url_parse(Z_STRVAL_P(cpy));
-                                                       zval_ptr_dtor(&cpy);
-                                                       break;
-                                               }
-                                       }
-                                       if (!new_purl) {
-                                               return;
-                                       }
-                               }
-                               if (old_url) {
-                                       switch (Z_TYPE_P(old_url)) {
-                                               case IS_OBJECT:
-                                               case IS_ARRAY:
-                                                       old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
-                                                       break;
-                                               default: {
-                                                       zval *cpy = php_http_ztyp(IS_STRING, old_url);
+       php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags), invalid_arg, return);
 
-                                                       old_purl = php_url_parse(Z_STRVAL_P(cpy));
-                                                       zval_ptr_dtor(&cpy);
-                                                       break;
-                                               }
-                                       }
-                                       if (!old_purl) {
-                                               if (new_purl) {
-                                                       php_url_free(new_purl);
-                                               }
-                                               return;
-                                       }
-                               }
+       zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+       {
+               php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
 
-                               php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
-                               php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
+               if (new_url) {
+                       switch (Z_TYPE_P(new_url)) {
+                               case IS_OBJECT:
+                               case IS_ARRAY:
+                                       new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
+                                       break;
+                               default: {
+                                       zval *cpy = php_http_ztyp(IS_STRING, new_url);
+
+                                       new_purl = php_url_parse(Z_STRVAL_P(cpy));
+                                       zval_ptr_dtor(&cpy);
+                                       break;
+                               }
+                       }
+                       if (!new_purl) {
+                               zend_restore_error_handling(&zeh TSRMLS_CC);
+                               return;
+                       }
+               }
+               if (old_url) {
+                       switch (Z_TYPE_P(old_url)) {
+                               case IS_OBJECT:
+                               case IS_ARRAY:
+                                       old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
+                                       break;
+                               default: {
+                                       zval *cpy = php_http_ztyp(IS_STRING, old_url);
 
-                               php_url_free(res_purl);
-                               if (old_purl) {
-                                       php_url_free(old_purl);
+                                       old_purl = php_url_parse(Z_STRVAL_P(cpy));
+                                       zval_ptr_dtor(&cpy);
+                                       break;
                                }
+                       }
+                       if (!old_purl) {
                                if (new_purl) {
                                        php_url_free(new_purl);
                                }
-                       } end_error_handling();
+                               zend_restore_error_handling(&zeh TSRMLS_CC);
+                               return;
+                       }
+               }
+
+               php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
+               php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
+
+               php_url_free(res_purl);
+               if (old_purl) {
+                       php_url_free(old_purl);
+               }
+               if (new_purl) {
+                       php_url_free(new_purl);
                }
-       } end_error_handling();
+       }
+       zend_restore_error_handling(&zeh TSRMLS_CC);
 }
 
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod, 0, 0, 1)
@@ -466,8 +1015,12 @@ PHP_METHOD(HttpUrl, mod)
 {
        zval *new_url = NULL;
        long flags = PHP_HTTP_URL_JOIN_PATH | PHP_HTTP_URL_JOIN_QUERY;
+       zend_error_handling zeh;
+
+       php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags), invalid_arg, return);
 
-       if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags)) {
+       zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+       {
                php_url *new_purl = NULL, *old_purl = NULL;
 
                if (new_url) {
@@ -485,6 +1038,7 @@ PHP_METHOD(HttpUrl, mod)
                                }
                        }
                        if (!new_purl) {
+                               zend_restore_error_handling(&zeh TSRMLS_CC);
                                return;
                        }
                }
@@ -504,6 +1058,7 @@ PHP_METHOD(HttpUrl, mod)
                        php_url_free(new_purl);
                }
        }
+       zend_restore_error_handling(&zeh TSRMLS_CC);
 }
 
 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString, 0, 0, 0)
@@ -541,12 +1096,67 @@ PHP_METHOD(HttpUrl, toArray)
        php_url_free(purl);
 }
 
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse, 0, 0, 1)
+       ZEND_ARG_INFO(0, url)
+       ZEND_ARG_INFO(0, flags)
+ZEND_END_ARG_INFO();
+PHP_METHOD(HttpUrl, parse)
+{
+       char *str;
+       int len;
+       long flags = 0;
+       php_http_url_t *url;
+       zend_error_handling zeh;
+
+       php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
+
+       zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+       if ((url = php_http_url_parse(str, len, flags TSRMLS_CC))) {
+               object_init_ex(return_value, php_http_url_class_entry);
+               if (url->scheme) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("scheme"), url->scheme TSRMLS_CC);
+               }
+               if (url->user) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("user"), url->user TSRMLS_CC);
+               }
+               if (url->pass) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("pass"), url->pass TSRMLS_CC);
+               }
+               if (url->host) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("host"), url->host TSRMLS_CC);
+               }
+               if (url->port) {
+                       zend_update_property_long(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("port"), url->port TSRMLS_CC);
+               }
+               if (url->path) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("path"), url->path TSRMLS_CC);
+               }
+               if (url->query) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("query"), url->query TSRMLS_CC);
+               }
+               if (url->fragment) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("fragment"), url->fragment TSRMLS_CC);
+               }
+               php_http_url_free(&url);
+       }
+       zend_restore_error_handling(&zeh TSRMLS_CC);
+}
+
 static zend_function_entry php_http_url_methods[] = {
        PHP_ME(HttpUrl, __construct,  ai_HttpUrl___construct, ZEND_ACC_PUBLIC|ZEND_ACC_CTOR)
        PHP_ME(HttpUrl, mod,          ai_HttpUrl_mod, ZEND_ACC_PUBLIC)
        PHP_ME(HttpUrl, toString,     ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
        ZEND_MALIAS(HttpUrl, __toString, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
        PHP_ME(HttpUrl, toArray,      ai_HttpUrl_toArray, ZEND_ACC_PUBLIC)
+       PHP_ME(HttpUrl, parse,        ai_HttpUrl_parse, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
        EMPTY_FUNCTION_ENTRY
 };
 
@@ -557,7 +1167,7 @@ PHP_MINIT_FUNCTION(http_url)
        zend_class_entry ce = {0};
 
        INIT_NS_CLASS_ENTRY(ce, "http", "Url", php_http_url_methods);
-       php_http_url_class_entry = zend_register_internal_class_ex(&ce, php_http_object_class_entry, NULL TSRMLS_CC);
+       php_http_url_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
 
        zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC);
        zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC);
@@ -582,6 +1192,14 @@ PHP_MINIT_FUNCTION(http_url)
        zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC);
        zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC);
 
+#ifdef PHP_HTTP_HAVE_WCHAR
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC);
+#endif
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC);
+#ifdef PHP_HTTP_HAVE_IDN
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC);
+#endif
+
        return SUCCESS;
 }