simplify
authorMichael Wallner <mike@php.net>
Thu, 30 Oct 2014 15:36:59 +0000 (16:36 +0100)
committerMichael Wallner <mike@php.net>
Thu, 6 Nov 2014 08:12:56 +0000 (09:12 +0100)
config9.m4
php_http_url.c
php_http_url.h
php_http_utf8.h
tests/urlparser008.phpt [new file with mode: 0644]
tests/urlparser009.phpt [new file with mode: 0644]

index cd0ae7e5f4f8296e83c9c8bb6b3e9b4704d1a3e7..3b1f7015554d1683469836e9548efd85b2eb0a08 100644 (file)
@@ -100,12 +100,13 @@ dnl ----
        AC_TYPE_OFF_T
        AC_TYPE_MBSTATE_T
        dnl getdomainname() is declared in netdb.h on some platforms: AIX, OSF
        AC_TYPE_OFF_T
        AC_TYPE_MBSTATE_T
        dnl getdomainname() is declared in netdb.h on some platforms: AIX, OSF
-       AC_CHECK_HEADERS([netdb.h unistd.h wchar.h wctype.h])
+       AC_CHECK_HEADERS([netdb.h unistd.h wchar.h wctype.h arpa/inet.h])
        PHP_CHECK_FUNC(gethostname, nsl)
        PHP_CHECK_FUNC(getdomainname, nsl)
        PHP_CHECK_FUNC(mbrtowc)
        PHP_CHECK_FUNC(mbtowc)
        PHP_CHECK_FUNC(iswalnum)
        PHP_CHECK_FUNC(gethostname, nsl)
        PHP_CHECK_FUNC(getdomainname, nsl)
        PHP_CHECK_FUNC(mbrtowc)
        PHP_CHECK_FUNC(mbtowc)
        PHP_CHECK_FUNC(iswalnum)
+       PHP_CHECK_FUNC(inet_pton)
 
 dnl ----
 dnl IDN
 
 dnl ----
 dnl IDN
index 5ace03d3432dc681c0b5f0194e98405c615db6c8..3c3fa4d58d8e2c687e7f91216f73c1b81eacda22 100644 (file)
 #      include <wctype.h>
 #endif
 
 #      include <wctype.h>
 #endif
 
+#ifdef HAVE_ARPA_INET_H
+#      include <arpa/inet.h>
+#endif
+
 #include "php_http_utf8.h"
 
 static inline char *localhostname(void)
 #include "php_http_utf8.h"
 
 static inline char *localhostname(void)
@@ -310,21 +314,9 @@ STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, con
        return SUCCESS;
 }
 
        return SUCCESS;
 }
 
-void php_http_url_dtor(php_http_url_t *url)
-{
-       STR_FREE(url->scheme.str);
-       STR_FREE(url->authority.userinfo.username.str);
-       STR_FREE(url->authority.userinfo.password.str);
-       STR_FREE(url->authority.host.str);
-       STR_FREE(url->path.str);
-       STR_FREE(url->query.str);
-       STR_FREE(url->fragment.str);
-}
-
 void php_http_url_free(php_http_url_t **url)
 {
        if (*url) {
 void php_http_url_free(php_http_url_t **url)
 {
        if (*url) {
-               php_http_url_dtor(*url);
                efree(*url);
                *url = NULL;
        }
                efree(*url);
                *url = NULL;
        }
@@ -401,7 +393,9 @@ static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *pt
        }
 #endif
 
        }
 #endif
 
-       if (!consumed && !silent) {
+       if (consumed) {
+               PHP_HTTP_DUFF(consumed, url->buffer[url->offset++] = *ptr++);
+       } else if (!silent) {
                TSRMLS_FETCH_FROM_CTX(url->ts);
                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
                TSRMLS_FETCH_FROM_CTX(url->ts);
                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
@@ -411,10 +405,10 @@ static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *pt
        return consumed;
 }
 
        return consumed;
 }
 
-static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *end)
+static STATUS parse_userinfo(php_http_url_t *url, const char *ptr)
 {
        size_t mb;
 {
        size_t mb;
-       const char *password = NULL, *tmp = ptr;
+       const char *password = NULL, *end = url->ptr, *tmp = ptr;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        do {
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        do {
@@ -427,6 +421,7 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e
                                return FAILURE;
                        }
                        password = ptr + 1;
                                return FAILURE;
                        }
                        password = ptr + 1;
+                       url->buffer[url->offset++] = *ptr;
                        break;
 
                case '%':
                        break;
 
                case '%':
@@ -436,7 +431,9 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e
                                                (unsigned) (ptr - tmp), tmp);
                                return FAILURE;
                        }
                                                (unsigned) (ptr - tmp), tmp);
                                return FAILURE;
                        }
-                       ptr += 2;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr;
                        break;
 
                case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
                        break;
 
                case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
@@ -453,6 +450,7 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
+                       url->buffer[url->offset++] = *ptr;
                        break;
 
                default:
                        break;
 
                default:
@@ -464,32 +462,59 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e
        } while(++ptr != end);
 
        if (password) {
        } while(++ptr != end);
 
        if (password) {
-               if ((url->authority.userinfo.username.len = password - tmp - 1)) {
-                       url->authority.userinfo.username.str = estrndup(tmp,
-                               url->authority.userinfo.username.len);
-               }
-               if ((url->authority.userinfo.password.len = end - password)) {
-                       url->authority.userinfo.password.str = estrndup(password,
-                               url->authority.userinfo.password.len);
-               }
+               url->user = &url->buffer[url->offset - (end - password) - (password - tmp)];
+               url->buffer[url->offset - (end - password) - 1] = 0;
+               url->pass = &url->buffer[url->offset - (end - password)];
+               url->buffer[url->offset++] = 0;
        } else {
        } else {
-               if ((url->authority.userinfo.username.len = end - tmp)) {
-                       url->authority.userinfo.username.str = estrndup(tmp,
-                               url->authority.userinfo.username.len);
-               }
+               url->user = &url->buffer[url->offset - (end - tmp)];
+               url->buffer[url->offset++] = 0;
        }
 
        return SUCCESS;
 }
 
        }
 
        return SUCCESS;
 }
 
-static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *end)
+static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr)
 {
 {
-       size_t mb;
-       const char *tmp = ptr, *port = NULL;
+       size_t mb, len;
+       const char *end = url->ptr, *tmp = ptr, *port = NULL;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
-       /* FIXME: IP(v6) addresses */
-       do {
+
+#ifdef HAVE_INET_PTON
+       if (*ptr == '[') {
+               char *error = NULL, *tmp = memchr(ptr, ']', end - ptr);
+
+               if (tmp) {
+                       size_t addrlen = tmp - ptr + 1;
+                       char buf[16], *addr = estrndup(ptr + 1, addrlen - 2);
+                       int rv = inet_pton(AF_INET6, addr, buf);
+
+                       efree(addr);
+                       if (rv == 1) {
+                               url->buffer[url->offset] = '[';
+                               url->host = &url->buffer[url->offset];
+                               inet_ntop(AF_INET6, buf, url->host + 1, url->maxlen - url->offset);
+                               url->offset += strlen(url->host);
+                               url->buffer[url->offset++] = ']';
+                               url->buffer[url->offset++] = 0;
+                               ptr = tmp + 1;
+                       } else if (rv == -1) {
+                               error = strerror(errno);
+                       } else {
+                               error = "unexpected '['";
+                       }
+               } else {
+                       error = "expected ']'";
+               }
+
+               if (error) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse hostinfo; %s", error);
+                       return FAILURE;
+               }
+       }
+#endif
+       if (ptr != end) do {
                switch (*ptr) {
                case ':':
                        if (port) {
                switch (*ptr) {
                case ':':
                        if (port) {
@@ -508,7 +533,9 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e
                                                (unsigned) (ptr - tmp), tmp);
                                return FAILURE;
                        }
                                                (unsigned) (ptr - tmp), tmp);
                                return FAILURE;
                        }
-                       ptr += 2;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr++;
+                       url->buffer[url->offset++] = *ptr;
                        break;
 
                case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
                        break;
 
                case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
@@ -533,8 +560,10 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e
                case '7': case '8': case '9':
                        /* allowed */
                        if (port) {
                case '7': case '8': case '9':
                        /* allowed */
                        if (port) {
-                               url->authority.port *= 10;
-                               url->authority.port += *ptr - '0';
+                               url->port *= 10;
+                               url->port += *ptr - '0';
+                       } else {
+                               url->buffer[url->offset++] = *ptr;
                        }
                        break;
 
                        }
                        break;
 
@@ -551,34 +580,33 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e
                }
        } while (++ptr != end);
 
                }
        } while (++ptr != end);
 
-       if (port) {
-               url->authority.host.len = port - tmp - 1;
-       } else {
-               url->authority.host.len = end - tmp;
+       if (!url->host) {
+               len = (port ? port - tmp - 1 : end - tmp);
+               url->host = &url->buffer[url->offset - len];
+               url->buffer[url->offset++] = 0;
        }
 
        }
 
-       url->authority.host.str = estrndup(tmp, url->authority.host.len);
-
 #ifdef PHP_HTTP_HAVE_IDN
        if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
                char *idn = NULL;
                int rv = -1;
 
                if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
 #ifdef PHP_HTTP_HAVE_IDN
        if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
                char *idn = NULL;
                int rv = -1;
 
                if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
-                       rv = idna_to_ascii_8z(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+                       rv = idna_to_ascii_8z(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
                }
 #      ifdef PHP_HTTP_HAVE_WCHAR
                else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
                }
 #      ifdef PHP_HTTP_HAVE_WCHAR
                else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
-                       rv = idna_to_ascii_lz(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+                       rv = idna_to_ascii_lz(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
                }
 #      endif
                if (rv != IDNA_SUCCESS) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
                        return FAILURE;
                } else {
                }
 #      endif
                if (rv != IDNA_SUCCESS) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
                        return FAILURE;
                } else {
-                       STR_SET(url->authority.host.str, estrdup(idn));
-                       url->authority.host.len = strlen(idn);
+                       size_t idnlen = strlen(idn);
+                       memcpy(url->host, idn, idnlen + 1);
                        free(idn);
                        free(idn);
+                       url->offset += idnlen - len;
                }
        }
 #endif
                }
        }
 #endif
@@ -586,18 +614,18 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e
        return SUCCESS;
 }
 
        return SUCCESS;
 }
 
-static const char *parse_authority(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_authority(php_http_url_t *url)
 {
 {
-       const char *tmp = ptr;
+       const char *tmp = url->ptr;
 
        do {
 
        do {
-               switch (*ptr) {
+               switch (*url->ptr) {
                case '@':
                        /* userinfo delimiter */
                case '@':
                        /* userinfo delimiter */
-                       if (tmp != ptr && SUCCESS != parse_userinfo(url, tmp, ptr)) {
+                       if (tmp != url->ptr && SUCCESS != parse_userinfo(url, tmp)) {
                                return NULL;
                        }
                                return NULL;
                        }
-                       tmp = ptr + 1;
+                       tmp = url->ptr + 1;
                        break;
 
                case '/':
                        break;
 
                case '/':
@@ -605,39 +633,49 @@ static const char *parse_authority(php_http_url_t *url, const char *ptr, const c
                case '#':
                case '\0':
                        /* host delimiter */
                case '#':
                case '\0':
                        /* host delimiter */
-                       if (tmp != ptr && SUCCESS != parse_hostinfo(url, tmp, ptr)) {
+                       if (tmp != url->ptr && SUCCESS != parse_hostinfo(url, tmp)) {
                                return NULL;
                        }
                                return NULL;
                        }
-                       return ptr;
+                       return url->ptr;
                }
                }
-       } while (++ptr <= end);
+       } while (++url->ptr <= url->end);
 
        return NULL;
 }
 
 
        return NULL;
 }
 
-static const char *parse_path(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_path(php_http_url_t *url)
 {
        size_t mb;
 {
        size_t mb;
-       const char *tmp = ptr;
+       const char *tmp;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
+       /* is there actually a path to parse? */
+       if (!*url->ptr) {
+               return url->ptr;
+       }
+       tmp = url->ptr;
+
        do {
        do {
-               switch (*ptr) {
+               switch (*url->ptr) {
                case '?':
                case '\0':
                case '?':
                case '\0':
-                       if ((url->path.len = ptr - tmp)) {
-                               url->path.str = estrndup(tmp, url->path.len);
+                       /* did we have any path component ? */
+                       if (tmp != url->ptr) {
+                               url->path = &url->buffer[url->offset - (url->ptr - tmp)];
+                               url->buffer[url->offset++] = 0;
                        }
                        }
-                       return ptr;
+                       return url->ptr;
 
                case '%':
 
                case '%':
-                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                                "Failed to parse path; invalid percent encoding at pos %u in '%s'",
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                                "Failed to parse path; invalid percent encoding at pos %u in '%s'",
-                                               (unsigned) (ptr - tmp), tmp);
+                                               (unsigned) (url->ptr - tmp), tmp);
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += 2;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                case '/': /* yeah, well */
                        break;
 
                case '/': /* yeah, well */
@@ -656,42 +694,52 @@ static const char *parse_path(php_http_url_t *url, const char *ptr, const char *
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                default:
                        break;
 
                default:
-                       if (!(mb = parse_mb(url, PARSE_PATH, ptr, end, tmp, 0))) {
+                       if (!(mb = parse_mb(url, PARSE_PATH, url->ptr, url->end, tmp, 0))) {
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += mb - 1;
+                       url->ptr += mb - 1;
                }
                }
-       } while (++ptr <= end);
+       } while (++url->ptr <= url->end);
 
        return NULL;
 }
 
 
        return NULL;
 }
 
-static const char *parse_query(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_query(php_http_url_t *url)
 {
        size_t mb;
 {
        size_t mb;
-       const char *tmp = ptr + !!*ptr;
+       const char *tmp = url->ptr + !!*url->ptr;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
+       /* is there actually a query to parse ? */
+       if (!*url->ptr || *url->ptr != '?') {
+               return url->ptr;
+       }
+
+       /* skip initial '?' */
+       tmp = url->ptr + 1;
+
        do {
        do {
-               switch (*ptr) {
+               switch (*url->ptr) {
                case '#':
                case '\0':
                case '#':
                case '\0':
-                       if ((url->query.len = ptr - tmp)) {
-                               url->query.str = estrndup(tmp, url->query.len);
-                       }
-                       return ptr;
+                       url->query = &url->buffer[url->offset - (url->ptr - tmp)];
+                       url->buffer[url->offset++] = 0;
+                       return url->ptr;
 
                case '%':
 
                case '%':
-                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                                "Failed to parse query; invalid percent encoding at pos %u in '%s'",
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                                "Failed to parse query; invalid percent encoding at pos %u in '%s'",
-                                               (unsigned) (ptr - tmp), tmp);
+                                               (unsigned) (url->ptr - tmp), tmp);
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += 2;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                case '?': case '/': /* yeah, well */
                        break;
 
                case '?': case '/': /* yeah, well */
@@ -710,41 +758,51 @@ static const char *parse_query(php_http_url_t *url, const char *ptr, const char
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                default:
                        break;
 
                default:
-                       if (!(mb = parse_mb(url, PARSE_QUERY, ptr, end, tmp, 0))) {
+                       if (!(mb = parse_mb(url, PARSE_QUERY, url->ptr, url->end, tmp, 0))) {
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += mb - 1;
+                       url->ptr += mb - 1;
                }
                }
-       } while (++ptr <= end);
+       } while (++url->ptr <= url->end);
 
        return NULL;
 }
 
 
        return NULL;
 }
 
-static const char *parse_fragment(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_fragment(php_http_url_t *url)
 {
        size_t mb;
 {
        size_t mb;
-       const char *tmp = ptr + !!*ptr;
+       const char *tmp;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
+       /* is there actually a fragment to parse */
+       if (!*url->ptr || *url->ptr != '#') {
+               return url->ptr;
+       }
+
+       /* skip initial '#' */
+       tmp = url->ptr + 1;
+
        do {
        do {
-               switch (*ptr) {
+               switch (*url->ptr) {
                case '\0':
                case '\0':
-                       if ((url->fragment.len = ptr - tmp)) {
-                               url->fragment.str = estrndup(tmp, url->fragment.len);
-                       }
-                       return ptr;
+                       url->fragment = &url->buffer[url->offset - (url->ptr - tmp)];
+                       url->buffer[url->offset++] = 0;
+                       return url->ptr;
 
                case '%':
 
                case '%':
-                       if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+                       if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
-                                               "Failed to parse query; invalid percent encoding at pos %u in '%s'",
-                                               (unsigned) (ptr - tmp), tmp);
+                                               "Failed to parse fragment; invalid percent encoding at pos %u in '%s'",
+                                               (unsigned) (url->ptr - tmp), tmp);
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += 2;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr++;
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                case '?': case '/': /* yeah, well */
                        break;
 
                case '?': case '/': /* yeah, well */
@@ -763,50 +821,52 @@ static const char *parse_fragment(php_http_url_t *url, const char *ptr, const ch
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                        /* allowed */
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                default:
                        break;
 
                default:
-                       if (!(mb = parse_mb(url, PARSE_FRAGMENT, ptr, end, tmp, 0))) {
+                       if (!(mb = parse_mb(url, PARSE_FRAGMENT, url->ptr, url->end, tmp, 0))) {
                                return NULL;
                        }
                                return NULL;
                        }
-                       ptr += mb - 1;
+                       url->ptr += mb - 1;
                }
                }
-       } while (++ptr <= end);
+       } while (++url->ptr <= url->end);
 
        return NULL;
 }
 
 
        return NULL;
 }
 
-static const char *parse_hier(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_hier(php_http_url_t *url)
 {
 {
-       if (*ptr == '/') {
-               if (end - ptr > 1) {
-                       if (*(ptr + 1) == '/') {
-                               if (!(ptr = parse_authority(url, ptr + 2, end))) {
+       if (*url->ptr == '/') {
+               if (url->end - url->ptr > 1) {
+                       if (*(url->ptr + 1) == '/') {
+                               url->ptr += 2;
+                               if (!(url->ptr = parse_authority(url))) {
                                        return NULL;
                                }
                        }
                }
        }
                                        return NULL;
                                }
                        }
                }
        }
-       return parse_path(url, ptr, end);
+       return parse_path(url);
 }
 
 }
 
-static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char *end)
+static const char *parse_scheme(php_http_url_t *url)
 {
        size_t mb;
 {
        size_t mb;
-       const char *tmp = ptr;
+       const char *tmp = url->ptr;
 
        do {
 
        do {
-               switch (*ptr) {
+               switch (*url->ptr) {
                case ':':
                        /* scheme delimiter */
                case ':':
                        /* scheme delimiter */
-                       url->scheme.len = ptr - tmp;
-                       url->scheme.str = estrndup(tmp, url->scheme.len);
-                       return ++ptr;
+                       url->scheme = &url->buffer[0];
+                       url->buffer[url->offset++] = 0;
+                       return ++url->ptr;
 
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                case '+': case '-': case '.':
 
                case '0': case '1': case '2': case '3': case '4': case '5': case '6':
                case '7': case '8': case '9':
                case '+': case '-': case '.':
-                       if (ptr == tmp) {
+                       if (url->ptr == tmp) {
                                return tmp;
                        }
                        /* no break */
                                return tmp;
                        }
                        /* no break */
@@ -819,70 +879,55 @@ static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char
                case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
                case 'v': case 'w': case 'x': case 'y': case 'z':
                        /* scheme part */
                case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
                case 'v': case 'w': case 'x': case 'y': case 'z':
                        /* scheme part */
+                       url->buffer[url->offset++] = *url->ptr;
                        break;
 
                default:
                        break;
 
                default:
-                       if (!(mb = parse_mb(url, PARSE_SCHEME, ptr, end, tmp, 1))) {
+                       if (!(mb = parse_mb(url, PARSE_SCHEME, url->ptr, url->end, tmp, 1))) {
                                /* soft fail; parse path next */
                                return tmp;
                        }
                                /* soft fail; parse path next */
                                return tmp;
                        }
-                       ptr += mb - 1;
+                       url->ptr += mb - 1;
                }
                }
-       } while (++ptr != end);
+       } while (++url->ptr != url->end);
 
        return tmp;
 }
 
 
        return tmp;
 }
 
-php_http_url_t *php_http_url_init(php_http_url_t *url, const char *str, size_t len, unsigned flags TSRMLS_DC)
-{
-       const char *ptr, *end = str + len;
-       zend_bool free_url = !url;
+struct parser_state {
+};
 
 
-       if (url) {
-               memset(url, 0, sizeof(*url));
-       } else {
-               url = ecalloc(1, sizeof(*url));
-       }
+php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC)
+{
+       size_t maxlen = 3 * len;
+       php_http_url_t *url = ecalloc(1, sizeof(*url) + maxlen);
 
 
+       url->end = str + len;
+       url->ptr = str;
        url->flags = flags;
        url->flags = flags;
+       url->maxlen = maxlen;
        TSRMLS_SET_CTX(url->ts);
 
        TSRMLS_SET_CTX(url->ts);
 
-       if ((ptr = str) && !(str = parse_scheme(url, ptr, end))) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", ptr);
-               if (free_url) {
-                       php_http_url_free(&url);
-               } else {
-                       php_http_url_dtor(url);
-               }
+       if (!parse_scheme(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", url->ptr);
+               php_http_url_free(&url);
                return NULL;
        }
 
                return NULL;
        }
 
-       if ((ptr = str) && !(str = parse_hier(url, ptr, end))) {
-               if (free_url) {
-                       php_http_url_free(&url);
-               } else {
-                       php_http_url_dtor(url);
-               }
+       if (!parse_hier(url)) {
+               php_http_url_free(&url);
                return NULL;
        }
 
                return NULL;
        }
 
-       if ((ptr = str) && !(str = parse_query(url, ptr, end))) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", ptr);
-               if (free_url) {
-                       php_http_url_free(&url);
-               } else {
-                       php_http_url_dtor(url);
-               }
+       if (!parse_query(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", url->ptr);
+               php_http_url_free(&url);
                return NULL;
        }
 
                return NULL;
        }
 
-       if ((ptr = str) && !(str = parse_fragment(url, ptr, end))) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", ptr);
-               if (free_url) {
-                       php_http_url_free(&url);
-               } else {
-                       php_http_url_dtor(url);
-               }
+       if (!parse_fragment(url)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", url->ptr);
+               php_http_url_free(&url);
                return NULL;
        }
 
                return NULL;
        }
 
@@ -1060,47 +1105,47 @@ PHP_METHOD(HttpUrl, parse)
        char *str;
        int len;
        long flags = 0;
        char *str;
        int len;
        long flags = 0;
-       php_http_url_t url;
+       php_http_url_t *url;
        zend_error_handling zeh;
 
        php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
 
        zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
        zend_error_handling zeh;
 
        php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
 
        zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
-       if (php_http_url_init(&url, str, len, flags TSRMLS_CC)) {
+       if ((url = php_http_url_parse(str, len, flags TSRMLS_CC))) {
                object_init_ex(return_value, php_http_url_class_entry);
                object_init_ex(return_value, php_http_url_class_entry);
-               if (url.scheme.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("scheme"),
-                                       url.scheme.str, url.scheme.len TSRMLS_CC);
+               if (url->scheme) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("scheme"), url->scheme TSRMLS_CC);
                }
                }
-               if (url.authority.userinfo.username.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("user"),
-                                       url.authority.userinfo.username.str, url.authority.userinfo.username.len TSRMLS_CC);
+               if (url->user) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("user"), url->user TSRMLS_CC);
                }
                }
-               if (url.authority.userinfo.password.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("pass"),
-                                       url.authority.userinfo.password.str, url.authority.userinfo.password.len TSRMLS_CC);
+               if (url->pass) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("pass"), url->pass TSRMLS_CC);
                }
                }
-               if (url.authority.host.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("host"),
-                                       url.authority.host.str, url.authority.host.len TSRMLS_CC);
+               if (url->host) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("host"), url->host TSRMLS_CC);
                }
                }
-               if (url.authority.port) {
-                       zend_update_property_long(php_http_url_class_entry, return_value, ZEND_STRL("port"),
-                                       url.authority.port TSRMLS_CC);
+               if (url->port) {
+                       zend_update_property_long(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("port"), url->port TSRMLS_CC);
                }
                }
-               if (url.path.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("path"),
-                                       url.path.str, url.path.len TSRMLS_CC);
+               if (url->path) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("path"), url->path TSRMLS_CC);
                }
                }
-               if (url.query.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("query"),
-                                       url.query.str, url.query.len TSRMLS_CC);
+               if (url->query) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("query"), url->query TSRMLS_CC);
                }
                }
-               if (url.fragment.len) {
-                       zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("fragment"),
-                                       url.fragment.str, url.fragment.len TSRMLS_CC);
+               if (url->fragment) {
+                       zend_update_property_string(php_http_url_class_entry, return_value,
+                                       ZEND_STRL("fragment"), url->fragment TSRMLS_CC);
                }
                }
-               php_http_url_dtor(&url);
+               php_http_url_free(&url);
        }
        zend_restore_error_handling(&zeh TSRMLS_CC);
 }
        }
        zend_restore_error_handling(&zeh TSRMLS_CC);
 }
index b0655e0940d8f69a4c5d4bd1f7c1364437818ddf..df8cae513bae25eaf8179222f619a57677325963 100644 (file)
@@ -40,31 +40,36 @@ typedef struct php_http_url_part {
        size_t len;
 } php_http_url_part_t;
 
        size_t len;
 } php_http_url_part_t;
 
+/* parse multibyte according to locale */
 #define PHP_HTTP_URL_PARSE_MBLOC  0x001
 #define PHP_HTTP_URL_PARSE_MBLOC  0x001
+/* parse utf8 multibyte sequences */
 #define PHP_HTTP_URL_PARSE_MBUTF8 0x002
 #define PHP_HTTP_URL_PARSE_MBUTF8 0x002
+/* convert multibyte hostnames to IDNA */
 #define PHP_HTTP_URL_PARSE_IDN    0x010
 
 typedef struct php_http_url {
 #define PHP_HTTP_URL_PARSE_IDN    0x010
 
 typedef struct php_http_url {
-       php_http_url_part_t scheme;
-       struct {
-               struct {
-                       php_http_url_part_t username;
-                       php_http_url_part_t password;
-               } userinfo;
-               php_http_url_part_t host;
-               unsigned short port;
-       } authority;
-       php_http_url_part_t path;
-       php_http_url_part_t query;
-       php_http_url_part_t fragment;
-       unsigned flags;
+       /* compatible to php_url, but do not use php_url_free() */
+       char *scheme;
+       char *user;
+       char *pass;
+       char *host;
+       unsigned short port;
+       char *path;
+       char *query;
+       char *fragment;
+       /* our stuff */
 #ifdef ZTS
        void ***ts;
 #endif
 #ifdef ZTS
        void ***ts;
 #endif
+       const char *ptr;
+       const char *end;
+       size_t maxlen;
+       off_t offset;
+       unsigned flags;
+       char buffer[];
 } php_http_url_t;
 
 } php_http_url_t;
 
-PHP_HTTP_API php_http_url_t *php_http_url_init(php_http_url_t *url, const char *str, size_t len, unsigned flags TSRMLS_DC);
-PHP_HTTP_API void php_http_url_dtor(php_http_url_t *url);
+PHP_HTTP_API php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC);
 PHP_HTTP_API void php_http_url_free(php_http_url_t **url);
 
 PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC);
 PHP_HTTP_API void php_http_url_free(php_http_url_t **url);
 
 PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC);
index b9b7b28f270c99d50926e9ed1fd8bb0c385aad14..2baec7dbe8d9cdd0274552d5d3e7a771858ea72c 100644 (file)
@@ -737,7 +737,7 @@ static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len)
 
 static inline zend_bool isualpha(unsigned ch)
 {
 
 static inline zend_bool isualpha(unsigned ch)
 {
-       unsigned i;
+       unsigned i, j;
 
        for (i = 0; i < sizeof(utf8_ranges)/sizeof(utf8_range_t); ++i) {
                if (utf8_ranges[i].start == ch) {
 
        for (i = 0; i < sizeof(utf8_ranges)/sizeof(utf8_range_t); ++i) {
                if (utf8_ranges[i].start == ch) {
@@ -746,7 +746,11 @@ static inline zend_bool isualpha(unsigned ch)
                        if (utf8_ranges[i].step == 1) {
                                return 1;
                        }
                        if (utf8_ranges[i].step == 1) {
                                return 1;
                        }
-                       /* FIXME step */
+                       for (j = utf8_ranges[i].start; j <= utf8_ranges[i].end; j+= utf8_ranges[i].step) {
+                               if (ch == j) {
+                                       return 1;
+                               }
+                       }
                        return 0;
                }
        }
                        return 0;
                }
        }
diff --git a/tests/urlparser008.phpt b/tests/urlparser008.phpt
new file mode 100644 (file)
index 0000000..98382f4
--- /dev/null
@@ -0,0 +1,76 @@
+--TEST--
+url parser ipv6
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s://[a:80",
+       "s://[0]",
+       "s://[::1]:80",
+       "s://mike@[0:0:0:0:0:FFFF:204.152.189.116]/foo",
+);
+
+foreach ($urls as $url) {
+       try {
+               printf("\n%s\n", $url);
+               var_dump(http\Url::parse($url));
+       } catch (Exception $e) {
+               echo $e->getMessage(),"\n";
+       }
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s://[a:80
+http\Url::parse(): Failed to parse hostinfo; expected ']'
+
+s://[0]
+http\Url::parse(): Failed to parse hostinfo; unexpected '['
+
+s://[::1]:80
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  NULL
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(5) "[::1]"
+  ["port"]=>
+  int(80)
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://mike@[0:0:0:0:0:FFFF:204.152.189.116]/foo
+object(http\Url)#%d (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "mike"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(24) "[::ffff:204.152.189.116]"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  string(4) "/foo"
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE
diff --git a/tests/urlparser009.phpt b/tests/urlparser009.phpt
new file mode 100644 (file)
index 0000000..f3e2b83
--- /dev/null
@@ -0,0 +1,278 @@
+--TEST--
+url parser userinfo
+--SKIPIF--
+<?php
+include "skipif.inc";
+?>
+--FILE--
+<?php
+echo "Test\n";
+
+$urls = array(
+       "s://:@",
+       "s://u@",
+       "s://u:@",
+       "s://u:p@",
+       "s://user:pass@",
+       "s://user:pass@host",
+       "s://u@h",
+       "s://user@h",
+       "s://u@host",
+       "s://user:p@h",
+       "s://user:pass@h",
+       "s://user:pass@host",
+);
+
+foreach ($urls as $url) {
+       try {
+               printf("\n%s\n", $url);
+               var_dump(http\Url::parse($url));
+       } catch (Exception $e) {
+               echo $e->getMessage(),"\n";
+       }
+}
+?>
+DONE
+--EXPECTF--
+Test
+
+s://:@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(0) ""
+  ["pass"]=>
+  string(0) ""
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u:@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  string(0) ""
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u:p@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  string(1) "p"
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  NULL
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://u@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(1) "u"
+  ["pass"]=>
+  NULL
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:p@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(1) "p"
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@h
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(1) "h"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+
+s://user:pass@host
+object(http\Url)#1 (8) {
+  ["scheme"]=>
+  string(1) "s"
+  ["user"]=>
+  string(4) "user"
+  ["pass"]=>
+  string(4) "pass"
+  ["host"]=>
+  string(4) "host"
+  ["port"]=>
+  NULL
+  ["path"]=>
+  NULL
+  ["query"]=>
+  NULL
+  ["fragment"]=>
+  NULL
+}
+DONE