X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=php_http_url.c;h=3c3fa4d58d8e2c687e7f91216f73c1b81eacda22;hp=d95eb5aeae969459ae4cf23ec7622d1524778781;hb=a760e170d36cc40ce992ebcdb13f0877ddfa3d73;hpb=0b83632b2b0a03eeca090f993259ccd95ab646fb diff --git a/php_http_url.c b/php_http_url.c index d95eb5a..3c3fa4d 100644 --- a/php_http_url.c +++ b/php_http_url.c @@ -6,12 +6,27 @@ | modification, are permitted provided that the conditions mentioned | | in the accompanying LICENSE file are met. | +--------------------------------------------------------------------+ - | Copyright (c) 2004-2013, Michael Wallner | + | Copyright (c) 2004-2014, Michael Wallner | +--------------------------------------------------------------------+ */ #include "php_http_api.h" +#ifdef PHP_HTTP_HAVE_IDN +# include +#endif + +#ifdef PHP_HTTP_HAVE_WCHAR +# include +# include +#endif + +#ifdef HAVE_ARPA_INET_H +# include +#endif + +#include "php_http_utf8.h" + static inline char *localhostname(void) { char hostname[1024] = {0}; @@ -42,94 +57,6 @@ static inline char *localhostname(void) return estrndup("localhost", lenof("localhost")); } -static inline unsigned port(const char *scheme) -{ - unsigned port = 80; - -#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R) - int rc; - size_t len = 0xff; - char *buf = NULL; - struct servent *se_res = NULL, se_buf = {0}; - - do { - buf = erealloc(buf, len); - rc = getservbyname_r(scheme, "tcp", &se_buf, buf, len, &se_res); - len *= 2; - } while (rc == ERANGE && len <= 0xfff); - - if (!rc) { - port = ntohs(se_res->s_port); - } - - efree(buf); -#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT) - struct servent *se; - - if ((se = getservbyname(scheme, "tcp")) && se->s_port) { - port = ntohs(se->s_port); - } -#endif - - return port; -} -static inline char *scheme(unsigned port) -{ - char *scheme; -#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R) - int rc; - size_t len = 0xff; - char *buf = NULL; - struct servent *se_res = NULL, se_buf = {0}; -#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT) - struct servent *se; -#endif - - switch (port) { - case 443: - scheme = estrndup("https", lenof("https")); - break; - -#if defined(ZTS) && !defined(HAVE_GETSERVBYPORT_R) - default: -#elif !defined(ZTS) && !defined(HAVE_GETSERVBYPORT) - default: -#endif - case 80: - case 0: - scheme = estrndup("http", lenof("http")); - break; - -#if defined(ZTS) && defined(HAVE_GETSERVBYPORT_R) - default: - do { - buf = erealloc(buf, len); - rc = getservbyport_r(htons(port), "tcp", &se_buf, buf, len, &se_res); - len *= 2; - } while (rc == ERANGE && len <= 0xfff); - - if (!rc && se_res) { - scheme = estrdup(se_res->s_name); - } else { - scheme = estrndup("http", lenof("http")); - } - - efree(buf); - break; - -#elif !defined(ZTS) && defined(HAVE_GETSERVBYPORT) - default: - if ((se = getservbyport(htons(port), "tcp")) && se->s_name) { - scheme = estrdup(se->s_name); - } else { - scheme = estrndup("http", lenof("http")); - } - break; -#endif - } - return scheme; -} - static php_url *php_http_url_from_env(php_url *url TSRMLS_DC) { zval *https, *zhost, *zport; @@ -150,7 +77,7 @@ static php_url *php_http_url_from_env(php_url *url TSRMLS_DC) if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) { url->scheme = estrndup("https", lenof("https")); } else { - url->scheme = scheme(url->port); + url->scheme = estrndup("http", lenof("http")); } /* host */ @@ -183,7 +110,7 @@ static php_url *php_http_url_from_env(php_url *url TSRMLS_DC) return url; } -PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC) +void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC) { php_url *url, *tmp_url = NULL; @@ -339,7 +266,6 @@ PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url if (url->port) { if ( ((url->port == 80) && !strcmp(url->scheme, "http")) || ((url->port ==443) && !strcmp(url->scheme, "https")) - || ( url->port == port(url->scheme)) ) { url->port = 0; } @@ -356,7 +282,7 @@ PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url } } -PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC) +STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC) { const char *arg_sep_str; size_t arg_sep_len; @@ -375,7 +301,7 @@ PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_en return SUCCESS; } -PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC) +STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC) { if (pre_encoded_len && pre_encoded_str) { php_http_buffer_append(qstr, pre_encoded_str, pre_encoded_len); @@ -388,6 +314,626 @@ PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer return SUCCESS; } +void php_http_url_free(php_http_url_t **url) +{ + if (*url) { + efree(*url); + *url = NULL; + } +} + +static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn) +{ + unsigned wchar; + size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr); + + if (!consumed || consumed == (size_t) -1) { + return 0; + } + if (!idn && !isualnum(wchar)) { + return 0; + } + + return consumed; +} + +#ifdef PHP_HTTP_HAVE_WCHAR +static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn) +{ + wchar_t wchar; + size_t consumed = 0; +#if defined(HAVE_MBRTOWC) + mbstate_t ps = {0}; + + consumed = mbrtowc(&wchar, ptr, end - ptr, &ps); +#elif defined(HAVE_MBTOWC) + consumed = mbtowc(&wchar, ptr, end - ptr); +#endif + + if (!consumed || consumed == (size_t) -1) { + return 0; + } + if (!idn && !iswalnum(wchar)) { + return 0; + } + + return consumed; +} +#endif + +typedef enum parse_mb_what { + PARSE_SCHEME, + PARSE_USERINFO, + PARSE_HOSTINFO, + PARSE_PATH, + PARSE_QUERY, + PARSE_FRAGMENT +} parse_mb_what_t; + +static const char * const parse_what[] = { + "scheme", + "userinfo", + "hostinfo", + "path", + "query", + "fragment" +}; + +static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent) +{ + size_t consumed = 0; + zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN); + + if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + consumed = parse_mb_utf8(url, ptr, end, idn); + } +#ifdef PHP_HTTP_HAVE_WCHAR + else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) { + consumed = parse_mb_loc(url, ptr, end, idn); + } +#endif + + if (consumed) { + PHP_HTTP_DUFF(consumed, url->buffer[url->offset++] = *ptr++); + } else if (!silent) { + TSRMLS_FETCH_FROM_CTX(url->ts); + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'", + parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin); + } + + return consumed; +} + +static STATUS parse_userinfo(php_http_url_t *url, const char *ptr) +{ + size_t mb; + const char *password = NULL, *end = url->ptr, *tmp = ptr; + TSRMLS_FETCH_FROM_CTX(url->ts); + + do { + switch (*ptr) { + case ':': + if (password) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse password; duplicate ':' at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + return FAILURE; + } + password = ptr + 1; + url->buffer[url->offset++] = *ptr; + break; + + case '%': + if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + return FAILURE; + } + url->buffer[url->offset++] = *ptr++; + url->buffer[url->offset++] = *ptr++; + url->buffer[url->offset++] = *ptr; + break; + + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ + case '-': case '.': case '_': case '~': /* unreserved */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + /* allowed */ + url->buffer[url->offset++] = *ptr; + break; + + default: + if (!(mb = parse_mb(url, PARSE_USERINFO, ptr, end, tmp, 0))) { + return FAILURE; + } + ptr += mb - 1; + } + } while(++ptr != end); + + if (password) { + url->user = &url->buffer[url->offset - (end - password) - (password - tmp)]; + url->buffer[url->offset - (end - password) - 1] = 0; + url->pass = &url->buffer[url->offset - (end - password)]; + url->buffer[url->offset++] = 0; + } else { + url->user = &url->buffer[url->offset - (end - tmp)]; + url->buffer[url->offset++] = 0; + } + + return SUCCESS; +} + +static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr) +{ + size_t mb, len; + const char *end = url->ptr, *tmp = ptr, *port = NULL; + TSRMLS_FETCH_FROM_CTX(url->ts); + + +#ifdef HAVE_INET_PTON + if (*ptr == '[') { + char *error = NULL, *tmp = memchr(ptr, ']', end - ptr); + + if (tmp) { + size_t addrlen = tmp - ptr + 1; + char buf[16], *addr = estrndup(ptr + 1, addrlen - 2); + int rv = inet_pton(AF_INET6, addr, buf); + + efree(addr); + if (rv == 1) { + url->buffer[url->offset] = '['; + url->host = &url->buffer[url->offset]; + inet_ntop(AF_INET6, buf, url->host + 1, url->maxlen - url->offset); + url->offset += strlen(url->host); + url->buffer[url->offset++] = ']'; + url->buffer[url->offset++] = 0; + ptr = tmp + 1; + } else if (rv == -1) { + error = strerror(errno); + } else { + error = "unexpected '['"; + } + } else { + error = "expected ']'"; + } + + if (error) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse hostinfo; %s", error); + return FAILURE; + } + } +#endif + if (ptr != end) do { + switch (*ptr) { + case ':': + if (port) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse port; duplicate ':' at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + return FAILURE; + } + port = ptr + 1; + break; + + case '%': + if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + return FAILURE; + } + url->buffer[url->offset++] = *ptr++; + url->buffer[url->offset++] = *ptr++; + url->buffer[url->offset++] = *ptr; + break; + + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ + case '-': case '.': case '_': case '~': /* unreserved */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + if (port) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse port; unexpected char '%c' at pos %u in '%s'", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + return FAILURE; + } + /* no break */ + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + /* allowed */ + if (port) { + url->port *= 10; + url->port += *ptr - '0'; + } else { + url->buffer[url->offset++] = *ptr; + } + break; + + default: + if (port) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + return FAILURE; + } else if (!(mb = parse_mb(url, PARSE_HOSTINFO, ptr, end, tmp, 0))) { + return FAILURE; + } + ptr += mb - 1; + } + } while (++ptr != end); + + if (!url->host) { + len = (port ? port - tmp - 1 : end - tmp); + url->host = &url->buffer[url->offset - len]; + url->buffer[url->offset++] = 0; + } + +#ifdef PHP_HTTP_HAVE_IDN + if (url->flags & PHP_HTTP_URL_PARSE_IDN) { + char *idn = NULL; + int rv = -1; + + if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + rv = idna_to_ascii_8z(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); + } +# ifdef PHP_HTTP_HAVE_WCHAR + else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) { + rv = idna_to_ascii_lz(url->host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); + } +# endif + if (rv != IDNA_SUCCESS) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv)); + return FAILURE; + } else { + size_t idnlen = strlen(idn); + memcpy(url->host, idn, idnlen + 1); + free(idn); + url->offset += idnlen - len; + } + } +#endif + + return SUCCESS; +} + +static const char *parse_authority(php_http_url_t *url) +{ + const char *tmp = url->ptr; + + do { + switch (*url->ptr) { + case '@': + /* userinfo delimiter */ + if (tmp != url->ptr && SUCCESS != parse_userinfo(url, tmp)) { + return NULL; + } + tmp = url->ptr + 1; + break; + + case '/': + case '?': + case '#': + case '\0': + /* host delimiter */ + if (tmp != url->ptr && SUCCESS != parse_hostinfo(url, tmp)) { + return NULL; + } + return url->ptr; + } + } while (++url->ptr <= url->end); + + return NULL; +} + +static const char *parse_path(php_http_url_t *url) +{ + size_t mb; + const char *tmp; + TSRMLS_FETCH_FROM_CTX(url->ts); + + /* is there actually a path to parse? */ + if (!*url->ptr) { + return url->ptr; + } + tmp = url->ptr; + + do { + switch (*url->ptr) { + case '?': + case '\0': + /* did we have any path component ? */ + if (tmp != url->ptr) { + url->path = &url->buffer[url->offset - (url->ptr - tmp)]; + url->buffer[url->offset++] = 0; + } + return url->ptr; + + case '%': + if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse path; invalid percent encoding at pos %u in '%s'", + (unsigned) (url->ptr - tmp), tmp); + return NULL; + } + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr; + break; + + case '/': /* yeah, well */ + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ + case '-': case '.': case '_': case '~': /* unreserved */ + case ':': case '@': /* pchar */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + /* allowed */ + url->buffer[url->offset++] = *url->ptr; + break; + + default: + if (!(mb = parse_mb(url, PARSE_PATH, url->ptr, url->end, tmp, 0))) { + return NULL; + } + url->ptr += mb - 1; + } + } while (++url->ptr <= url->end); + + return NULL; +} + +static const char *parse_query(php_http_url_t *url) +{ + size_t mb; + const char *tmp = url->ptr + !!*url->ptr; + TSRMLS_FETCH_FROM_CTX(url->ts); + + /* is there actually a query to parse ? */ + if (!*url->ptr || *url->ptr != '?') { + return url->ptr; + } + + /* skip initial '?' */ + tmp = url->ptr + 1; + + do { + switch (*url->ptr) { + case '#': + case '\0': + url->query = &url->buffer[url->offset - (url->ptr - tmp)]; + url->buffer[url->offset++] = 0; + return url->ptr; + + case '%': + if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse query; invalid percent encoding at pos %u in '%s'", + (unsigned) (url->ptr - tmp), tmp); + return NULL; + } + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr; + break; + + case '?': case '/': /* yeah, well */ + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ + case '-': case '.': case '_': case '~': /* unreserved */ + case ':': case '@': /* pchar */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + /* allowed */ + url->buffer[url->offset++] = *url->ptr; + break; + + default: + if (!(mb = parse_mb(url, PARSE_QUERY, url->ptr, url->end, tmp, 0))) { + return NULL; + } + url->ptr += mb - 1; + } + } while (++url->ptr <= url->end); + + return NULL; +} + +static const char *parse_fragment(php_http_url_t *url) +{ + size_t mb; + const char *tmp; + TSRMLS_FETCH_FROM_CTX(url->ts); + + /* is there actually a fragment to parse */ + if (!*url->ptr || *url->ptr != '#') { + return url->ptr; + } + + /* skip initial '#' */ + tmp = url->ptr + 1; + + do { + switch (*url->ptr) { + case '\0': + url->fragment = &url->buffer[url->offset - (url->ptr - tmp)]; + url->buffer[url->offset++] = 0; + return url->ptr; + + case '%': + if (url->ptr[1] != '%' && (url->end - url->ptr <= 2 || !isxdigit(*(url->ptr+1)) || !isxdigit(*(url->ptr+2)))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse fragment; invalid percent encoding at pos %u in '%s'", + (unsigned) (url->ptr - tmp), tmp); + return NULL; + } + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr++; + url->buffer[url->offset++] = *url->ptr; + break; + + case '?': case '/': /* yeah, well */ + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ + case '-': case '.': case '_': case '~': /* unreserved */ + case ':': case '@': /* pchar */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + /* allowed */ + url->buffer[url->offset++] = *url->ptr; + break; + + default: + if (!(mb = parse_mb(url, PARSE_FRAGMENT, url->ptr, url->end, tmp, 0))) { + return NULL; + } + url->ptr += mb - 1; + } + } while (++url->ptr <= url->end); + + return NULL; +} + +static const char *parse_hier(php_http_url_t *url) +{ + if (*url->ptr == '/') { + if (url->end - url->ptr > 1) { + if (*(url->ptr + 1) == '/') { + url->ptr += 2; + if (!(url->ptr = parse_authority(url))) { + return NULL; + } + } + } + } + return parse_path(url); +} + +static const char *parse_scheme(php_http_url_t *url) +{ + size_t mb; + const char *tmp = url->ptr; + + do { + switch (*url->ptr) { + case ':': + /* scheme delimiter */ + url->scheme = &url->buffer[0]; + url->buffer[url->offset++] = 0; + return ++url->ptr; + + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + case '+': case '-': case '.': + if (url->ptr == tmp) { + return tmp; + } + /* no break */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + /* scheme part */ + url->buffer[url->offset++] = *url->ptr; + break; + + default: + if (!(mb = parse_mb(url, PARSE_SCHEME, url->ptr, url->end, tmp, 1))) { + /* soft fail; parse path next */ + return tmp; + } + url->ptr += mb - 1; + } + } while (++url->ptr != url->end); + + return tmp; +} + +struct parser_state { +}; + +php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC) +{ + size_t maxlen = 3 * len; + php_http_url_t *url = ecalloc(1, sizeof(*url) + maxlen); + + url->end = str + len; + url->ptr = str; + url->flags = flags; + url->maxlen = maxlen; + TSRMLS_SET_CTX(url->ts); + + if (!parse_scheme(url)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", url->ptr); + php_http_url_free(&url); + return NULL; + } + + if (!parse_hier(url)) { + php_http_url_free(&url); + return NULL; + } + + if (!parse_query(url)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", url->ptr); + php_http_url_free(&url); + return NULL; + } + + if (!parse_fragment(url)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", url->ptr); + php_http_url_free(&url); + return NULL; + } + + return url; +} + ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0) ZEND_ARG_INFO(0, old_url) ZEND_ARG_INFO(0, new_url) @@ -395,67 +941,70 @@ ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0) ZEND_END_ARG_INFO(); PHP_METHOD(HttpUrl, __construct) { - with_error_handling(EH_THROW, php_http_exception_class_entry) { - zval *new_url = NULL, *old_url = NULL; - long flags = PHP_HTTP_URL_FROM_ENV; - - if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags)) { - with_error_handling(EH_THROW, php_http_exception_class_entry) { - php_url *res_purl, *new_purl = NULL, *old_purl = NULL; - - if (new_url) { - switch (Z_TYPE_P(new_url)) { - case IS_OBJECT: - case IS_ARRAY: - new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC); - break; - default: { - zval *cpy = php_http_ztyp(IS_STRING, new_url); + zval *new_url = NULL, *old_url = NULL; + long flags = PHP_HTTP_URL_FROM_ENV; + zend_error_handling zeh; - new_purl = php_url_parse(Z_STRVAL_P(cpy)); - zval_ptr_dtor(&cpy); - break; - } - } - if (!new_purl) { - return; - } - } - if (old_url) { - switch (Z_TYPE_P(old_url)) { - case IS_OBJECT: - case IS_ARRAY: - old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC); - break; - default: { - zval *cpy = php_http_ztyp(IS_STRING, old_url); + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags), invalid_arg, return); - old_purl = php_url_parse(Z_STRVAL_P(cpy)); - zval_ptr_dtor(&cpy); - break; - } - } - if (!old_purl) { - if (new_purl) { - php_url_free(new_purl); - } - return; - } - } + zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC); + { + php_url *res_purl, *new_purl = NULL, *old_purl = NULL; - php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC); - php_http_url_to_struct(res_purl, getThis() TSRMLS_CC); + if (new_url) { + switch (Z_TYPE_P(new_url)) { + case IS_OBJECT: + case IS_ARRAY: + new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC); + break; + default: { + zval *cpy = php_http_ztyp(IS_STRING, new_url); - php_url_free(res_purl); - if (old_purl) { - php_url_free(old_purl); + new_purl = php_url_parse(Z_STRVAL_P(cpy)); + zval_ptr_dtor(&cpy); + break; + } + } + if (!new_purl) { + zend_restore_error_handling(&zeh TSRMLS_CC); + return; + } + } + if (old_url) { + switch (Z_TYPE_P(old_url)) { + case IS_OBJECT: + case IS_ARRAY: + old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC); + break; + default: { + zval *cpy = php_http_ztyp(IS_STRING, old_url); + + old_purl = php_url_parse(Z_STRVAL_P(cpy)); + zval_ptr_dtor(&cpy); + break; } + } + if (!old_purl) { if (new_purl) { php_url_free(new_purl); } - } end_error_handling(); + zend_restore_error_handling(&zeh TSRMLS_CC); + return; + } + } + + php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC); + php_http_url_to_struct(res_purl, getThis() TSRMLS_CC); + + php_url_free(res_purl); + if (old_purl) { + php_url_free(old_purl); + } + if (new_purl) { + php_url_free(new_purl); } - } end_error_handling(); + } + zend_restore_error_handling(&zeh TSRMLS_CC); } ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod, 0, 0, 1) @@ -466,8 +1015,12 @@ PHP_METHOD(HttpUrl, mod) { zval *new_url = NULL; long flags = PHP_HTTP_URL_JOIN_PATH | PHP_HTTP_URL_JOIN_QUERY; + zend_error_handling zeh; + + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags), invalid_arg, return); - if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags)) { + zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC); + { php_url *new_purl = NULL, *old_purl = NULL; if (new_url) { @@ -485,6 +1038,7 @@ PHP_METHOD(HttpUrl, mod) } } if (!new_purl) { + zend_restore_error_handling(&zeh TSRMLS_CC); return; } } @@ -504,6 +1058,7 @@ PHP_METHOD(HttpUrl, mod) php_url_free(new_purl); } } + zend_restore_error_handling(&zeh TSRMLS_CC); } ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString, 0, 0, 0) @@ -529,11 +1084,70 @@ ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray, 0, 0, 0) ZEND_END_ARG_INFO(); PHP_METHOD(HttpUrl, toArray) { + php_url *purl; + if (SUCCESS != zend_parse_parameters_none()) { - RETURN_FALSE; + return; } - array_init(return_value); - array_copy(HASH_OF(getThis()), HASH_OF(return_value)); + + /* strip any non-URL properties */ + purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC); + php_http_url_to_struct(purl, return_value TSRMLS_CC); + php_url_free(purl); +} + +ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse, 0, 0, 1) + ZEND_ARG_INFO(0, url) + ZEND_ARG_INFO(0, flags) +ZEND_END_ARG_INFO(); +PHP_METHOD(HttpUrl, parse) +{ + char *str; + int len; + long flags = 0; + php_http_url_t *url; + zend_error_handling zeh; + + php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return); + + zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC); + if ((url = php_http_url_parse(str, len, flags TSRMLS_CC))) { + object_init_ex(return_value, php_http_url_class_entry); + if (url->scheme) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("scheme"), url->scheme TSRMLS_CC); + } + if (url->user) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("user"), url->user TSRMLS_CC); + } + if (url->pass) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("pass"), url->pass TSRMLS_CC); + } + if (url->host) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("host"), url->host TSRMLS_CC); + } + if (url->port) { + zend_update_property_long(php_http_url_class_entry, return_value, + ZEND_STRL("port"), url->port TSRMLS_CC); + } + if (url->path) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("path"), url->path TSRMLS_CC); + } + if (url->query) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("query"), url->query TSRMLS_CC); + } + if (url->fragment) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("fragment"), url->fragment TSRMLS_CC); + } + php_http_url_free(&url); + } + zend_restore_error_handling(&zeh TSRMLS_CC); } static zend_function_entry php_http_url_methods[] = { @@ -542,6 +1156,7 @@ static zend_function_entry php_http_url_methods[] = { PHP_ME(HttpUrl, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC) ZEND_MALIAS(HttpUrl, __toString, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC) PHP_ME(HttpUrl, toArray, ai_HttpUrl_toArray, ZEND_ACC_PUBLIC) + PHP_ME(HttpUrl, parse, ai_HttpUrl_parse, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) EMPTY_FUNCTION_ENTRY }; @@ -552,7 +1167,7 @@ PHP_MINIT_FUNCTION(http_url) zend_class_entry ce = {0}; INIT_NS_CLASS_ENTRY(ce, "http", "Url", php_http_url_methods); - php_http_url_class_entry = zend_register_internal_class_ex(&ce, php_http_object_class_entry, NULL TSRMLS_CC); + php_http_url_class_entry = zend_register_internal_class(&ce TSRMLS_CC); zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC); zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC); @@ -577,6 +1192,14 @@ PHP_MINIT_FUNCTION(http_url) zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC); zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC); +#ifdef PHP_HTTP_HAVE_WCHAR + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC); +#endif + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC); +#ifdef PHP_HTTP_HAVE_IDN + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC); +#endif + return SUCCESS; }