X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=src%2Fphp_http_url.c;h=5e8592ed8c7c0ae28adb95c19ba068ed172e40b2;hp=3ed2e3cbf2fd6beeee72a41ca50345b957efbdf6;hb=294724730ce2865ecb887ffbb3a1ea8afdea37f5;hpb=ab1eba311be2f8aab98eed7a6164e79d69b402b2 diff --git a/src/php_http_url.c b/src/php_http_url.c index 3ed2e3c..5e8592e 100644 --- a/src/php_http_url.c +++ b/src/php_http_url.c @@ -12,18 +12,26 @@ #include "php_http_api.h" -#if PHP_HTTP_HAVE_IDN2 +#if PHP_HTTP_HAVE_LIBIDN2 # include -#elif PHP_HTTP_HAVE_IDN +#endif +#if PHP_HTTP_HAVE_LIBIDN # include #endif +#if PHP_HTTP_HAVE_LIBICU +# include +#endif +#if PHP_HTTP_HAVE_LIBIDNKIT || PHP_HTTP_HAVE_LIBIDNKIT2 +# include +# include +#endif -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR # include # include #endif -#ifdef HAVE_ARPA_INET_H +#if HAVE_ARPA_INET_H # include #endif @@ -33,13 +41,13 @@ static inline char *localhostname(void) { char hostname[1024] = {0}; -#ifdef PHP_WIN32 +#if PHP_WIN32 if (SUCCESS == gethostname(hostname, lenof(hostname))) { return estrdup(hostname); } -#elif defined(HAVE_GETHOSTNAME) +#elif HAVE_GETHOSTNAME if (SUCCESS == gethostname(hostname, lenof(hostname))) { -# if defined(HAVE_GETDOMAINNAME) +# if HAVE_GETDOMAINNAME size_t hlen = strlen(hostname); if (hlen <= lenof(hostname) - lenof("(none)")) { hostname[hlen++] = '.'; @@ -503,7 +511,7 @@ php_http_url_t *php_http_url_from_struct(HashTable *ht) HashTable *php_http_url_to_struct(const php_http_url_t *url, zval *strct) { - HashTable *ht; + HashTable *ht = NULL; zval tmp; if (strct) { @@ -523,8 +531,8 @@ HashTable *php_http_url_to_struct(const php_http_url_t *url, zval *strct) } #define url_struct_add(part) \ - if (Z_TYPE_P(strct) == IS_ARRAY) { \ - zend_hash_str_update(Z_ARRVAL_P(strct), part, lenof(part), &tmp); \ + if (!strct || Z_TYPE_P(strct) == IS_ARRAY) { \ + zend_hash_str_update(ht, part, lenof(part), &tmp); \ } else { \ zend_update_property(Z_OBJCE_P(strct), strct, part, lenof(part), &tmp); \ zval_ptr_dtor(&tmp); \ @@ -670,17 +678,17 @@ static size_t parse_mb_utf8(unsigned *wc, const char *ptr, const char *end) return consumed; } -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR static size_t parse_mb_loc(unsigned *wc, const char *ptr, const char *end) { wchar_t wchar; size_t consumed = 0; -#if defined(HAVE_MBRTOWC) +#if HAVE_MBRTOWC mbstate_t ps; memset(&ps, 0, sizeof(ps)); consumed = mbrtowc(&wchar, ptr, end - ptr, &ps); -#elif defined(HAVE_MBTOWC) +#elif HAVE_MBTOWC consumed = mbtowc(&wchar, ptr, end - ptr); #endif @@ -723,7 +731,7 @@ static size_t parse_mb(struct parse_state *state, parse_mb_what_t what, const ch if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { consumed = parse_mb_utf8(&wchar, ptr, end); } -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { consumed = parse_mb_loc(&wchar, ptr, end); } @@ -737,7 +745,7 @@ static size_t parse_mb(struct parse_state *state, parse_mb_what_t what, const ch if (!isualnum(wchar)) { break; } -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { if (!iswalnum(wchar)) { break; @@ -785,10 +793,16 @@ static ZEND_RESULT_CODE parse_userinfo(struct parse_state *state, const char *pt switch (*ptr) { case ':': if (password) { - php_error_docref(NULL, E_WARNING, - "Failed to parse password; duplicate ':' at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse password; duplicate ':' at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + state->buffer[state->offset++] = *ptr; + break; } password = ptr + 1; state->buffer[state->offset++] = 0; @@ -797,16 +811,31 @@ static ZEND_RESULT_CODE parse_userinfo(struct parse_state *state, const char *pt case '%': if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { - php_error_docref(NULL, E_WARNING, - "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + state->buffer[state->offset++] = *ptr++; + break; } state->buffer[state->offset++] = *ptr++; state->buffer[state->offset++] = *ptr++; state->buffer[state->offset++] = *ptr; break; + default: + if ((mb = parse_mb(state, PARSE_USERINFO, ptr, end, tmp, state->flags & PHP_HTTP_URL_SILENT_ERRORS))) { + ptr += mb - 1; + break; + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + /* no break */ case '!': case '$': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case ';': case '=': /* sub-delims */ case '-': case '.': case '_': case '~': /* unreserved */ @@ -824,13 +853,8 @@ static ZEND_RESULT_CODE parse_userinfo(struct parse_state *state, const char *pt state->buffer[state->offset++] = *ptr; break; - default: - if (!(mb = parse_mb(state, PARSE_USERINFO, ptr, end, tmp, 0))) { - return FAILURE; - } - ptr += mb - 1; } - } while(++ptr != end); + } while(++ptr < end); state->buffer[state->offset++] = 0; @@ -838,7 +862,7 @@ static ZEND_RESULT_CODE parse_userinfo(struct parse_state *state, const char *pt return SUCCESS; } -#if defined(PHP_WIN32) || defined(HAVE_UIDNA_IDNTOASCII) +#if PHP_WIN32 || HAVE_UIDNA_IDNTOASCII typedef size_t (*parse_mb_func)(unsigned *wc, const char *ptr, const char *end); static ZEND_RESULT_CODE to_utf16(parse_mb_func fn, const char *u8, uint16_t **u16, size_t *len) { @@ -879,12 +903,11 @@ static ZEND_RESULT_CODE to_utf16(parse_mb_func fn, const char *u8, uint16_t **u1 } #endif -#ifndef MAXHOSTNAMELEN -# define MAXHOSTNAMELEN 256 -#endif - -#if PHP_HTTP_HAVE_IDN2 -static ZEND_RESULT_CODE parse_idn2(struct parse_state *state, size_t prev_len) +#if PHP_HTTP_HAVE_LIBIDN2 +# if __GNUC__ +__attribute__ ((unused)) +# endif +static ZEND_RESULT_CODE parse_gidn_2008(struct parse_state *state, size_t prev_len) { char *idn = NULL; int rv = -1; @@ -892,103 +915,240 @@ static ZEND_RESULT_CODE parse_idn2(struct parse_state *state, size_t prev_len) if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { rv = idn2_lookup_u8((const unsigned char *) state->url.host, (unsigned char **) &idn, IDN2_NFC_INPUT); } -# ifdef PHP_HTTP_HAVE_WCHAR +# if PHP_HTTP_HAVE_WCHAR else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { rv = idn2_lookup_ul(state->url.host, &idn, 0); } # endif if (rv != IDN2_OK) { - php_error_docref(NULL, E_WARNING, "Failed to parse IDN; %s", idn2_strerror(rv)); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, "Failed to parse IDN (IDNA2008); %s", idn2_strerror(rv)); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } } else { size_t idnlen = strlen(idn); memcpy(state->url.host, idn, idnlen + 1); free(idn); state->offset += idnlen - prev_len; - return SUCCESS; } + return SUCCESS; } -#elif PHP_HTTP_HAVE_IDN -static ZEND_RESULT_CODE parse_idn(struct parse_state *state, size_t prev_len) +#endif + +#if PHP_HTTP_HAVE_LIBIDN +# if __GNUC__ +__attribute__ ((unused)) +# endif +static ZEND_RESULT_CODE parse_gidn_2003(struct parse_state *state, size_t prev_len) { char *idn = NULL; int rv = -1; if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { - rv = idna_to_ascii_8z(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); + rv = idna_to_ascii_8z(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED); } -# ifdef PHP_HTTP_HAVE_WCHAR +# if PHP_HTTP_HAVE_WCHAR else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { - rv = idna_to_ascii_lz(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); + rv = idna_to_ascii_lz(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED); } # endif if (rv != IDNA_SUCCESS) { - php_error_docref(NULL, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv)); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, "Failed to parse IDN (IDNA2003); %s", idna_strerror(rv)); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } } else { size_t idnlen = strlen(idn); memcpy(state->url.host, idn, idnlen + 1); free(idn); state->offset += idnlen - prev_len; - return SUCCESS; } + return SUCCESS; } #endif -#ifdef HAVE_UIDNA_IDNTOASCII -# if HAVE_UNICODE_UIDNA_H +#if HAVE_UIDNA_IDNTOASCII +# if PHP_HTTP_HAVE_LIBICU # include # else typedef uint16_t UChar; typedef enum { U_ZERO_ERROR = 0 } UErrorCode; int32_t uidna_IDNToASCII(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, void *parseError, UErrorCode *status); # endif -static ZEND_RESULT_CODE parse_uidn(struct parse_state *state) +static ZEND_RESULT_CODE parse_uidn_2003(struct parse_state *state) { - char *host_ptr; - uint16_t *uhost_str, ahost_str[MAXHOSTNAMELEN], *ahost_ptr; + char *host_ptr = state->url.host, ebuf[64] = {0}, *error = NULL; + uint16_t *uhost_str, ahost_str[256], *ahost_ptr; size_t uhost_len, ahost_len; - UErrorCode error = U_ZERO_ERROR; + UErrorCode rc = U_ZERO_ERROR; if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { if (SUCCESS != to_utf16(parse_mb_utf8, state->url.host, &uhost_str, &uhost_len)) { - return FAILURE; + error = "failed to convert to UTF-16"; + goto error; } -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { if (SUCCESS != to_utf16(parse_mb_loc, state->url.host, &uhost_str, &uhost_len)) { - return FAILURE; + error = "failed to convert to UTF-16"; + goto error; } #endif } else { - php_error_docref(NULL, E_WARNING, "Failed to parse IDN; codepage not specified"); - return FAILURE; + error = "codepage not specified"; + goto error; } - ahost_len = uidna_IDNToASCII(uhost_str, uhost_len, ahost_str, MAXHOSTNAMELEN, 3, NULL, &error); +# if __GNUC__ >= 5 +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" +# endif + ahost_len = uidna_IDNToASCII(uhost_str, uhost_len, ahost_str, 256, 3, NULL, &rc); +# if __GNUC__ >= 5 +# pragma GCC diagnostic pop +# endif + efree(uhost_str); + if (error > U_ZERO_ERROR) { + goto error; + } + + ahost_ptr = ahost_str; + PHP_HTTP_DUFF(ahost_len, *host_ptr++ = *ahost_ptr++); + *host_ptr = '\0'; + state->offset += host_ptr - state->url.host; + + return SUCCESS; + + error: + if (!error) { + slprintf(ebuf, sizeof(ebuf)-1, "errorcode: %d", rc); + error = ebuf; + } + php_error_docref(NULL, E_WARNING, "Failed to parse IDN (ICU IDNA2003); %s", error); - if (error != U_ZERO_ERROR) { - php_error_docref(NULL, E_WARNING, "Failed to parse IDN; ICU error %d", error); + return FAILURE; +} +#endif + +#if HAVE_UIDNA_IDNTOASCII +# if PHP_HTTP_HAVE_LIBICU +# include +# endif +static ZEND_RESULT_CODE parse_uidn_2008(struct parse_state *state) +{ + char *host_ptr, *error = NULL, ebuf[64] = {0}; + UErrorCode rc = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + UIDNA *uidna = uidna_openUTS46(UIDNA_ALLOW_UNASSIGNED, &rc); + + if (!uidna || U_FAILURE(rc)) { return FAILURE; } host_ptr = state->url.host; - ahost_ptr = ahost_str; - PHP_HTTP_DUFF(ahost_len, *host_ptr++ = *ahost_ptr++); + + if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + char ahost_str[256], *ahost_ptr = &ahost_str[0]; + size_t ahost_len = uidna_nameToASCII_UTF8(uidna, host_ptr, -1, ahost_str, sizeof(ahost_str)-1, &info, &rc); + + if (U_FAILURE(rc) || info.errors) { + goto error; + } + PHP_HTTP_DUFF(ahost_len, *host_ptr++ = *ahost_ptr++); +#if PHP_HTTP_HAVE_WCHAR + } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { + uint16_t *uhost_str, whost_str[256], *whost_ptr = &whost_str[0]; + size_t uhost_len, whost_len; + + if (SUCCESS != to_utf16(parse_mb_loc, host_ptr, &uhost_str, &uhost_len)) { + error = "could not convert to UTF-16"; + goto error; + } + + whost_len = uidna_nameToASCII(uidna, uhost_str, uhost_len, whost_str, sizeof(whost_str)-1, &info, &rc); + whost_ptr = whost_str; + efree(uhost_str); + if (U_FAILURE(rc) || info.errors) { + goto error; + } + PHP_HTTP_DUFF(whost_len, *host_ptr++ = *whost_ptr++); +#endif + } else { + error = "codepage not specified"; + goto error; + } *host_ptr = '\0'; state->offset += host_ptr - state->url.host; + uidna_close(uidna); return SUCCESS; + + error: + if (!error) { + if (U_FAILURE(rc)) { + slprintf(ebuf, sizeof(ebuf)-1, "%s", u_errorName(rc)); + error = ebuf; + } else if (info.errors) { + slprintf(ebuf, sizeof(ebuf)-1, "ICU IDNA error codes: 0x%x", info.errors); + error = ebuf; + } else { + error = "unknown error"; + } + } + php_error_docref(NULL, E_WARNING, "Failed to parse IDN (ICU IDNA2008); %s", error); + + uidna_close(uidna); + return FAILURE; } #endif -#if 0 && defined(PHP_WIN32) -static ZEND_RESULT_CODE parse_widn(struct parse_state *state) +#if PHP_HTTP_HAVE_LIBIDNKIT || PHP_HTTP_HAVE_LIBIDNKIT2 +# if __GNUC__ +__attribute__ ((unused)) +# endif +static ZEND_RESULT_CODE parse_kidn(struct parse_state *state) +{ + idn_result_t rc; +#if PHP_HTTP_HAVE_LIBIDNKIT + int actions = IDN_DELIMMAP|IDN_LOCALMAP|IDN_NAMEPREP|IDN_IDNCONV|IDN_LENCHECK; +#elif PHP_HTTP_HAVE_LIBIDNKIT2 + int actions = IDN_MAP|IDN_ASCLOWER|IDN_RTCONV|IDN_PROHCHECK|IDN_NFCCHECK|IDN_PREFCHECK|IDN_COMBCHECK|IDN_CTXOLITECHECK|IDN_BIDICHECK|IDN_LOCALCHECK|IDN_IDNCONV|IDN_LENCHECK|IDN_RTCHECK; +#endif + char ahost_str[256] = {0}, *ahost_ptr = &ahost_str[0], *host_ptr = state->url.host; + + if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { +#if PHP_HTTP_HAVE_LIBIDNKIT + actions |= IDN_LOCALCONV; +#elif PHP_HTTP_HAVE_LIBIDNKIT2 + actions |= IDN_UNICODECONV; +#endif + } + + rc = idn_encodename(actions, state->url.host, ahost_str, 256); + if (rc == idn_success) { + PHP_HTTP_DUFF(strlen(ahost_str), *host_ptr++ = *ahost_ptr++); + + *host_ptr = '\0'; + state->offset += host_ptr - state->url.host; + + return SUCCESS; + } else { + php_error_docref(NULL, E_WARNING, "Failed to parse IDN; %s", idn_result_tostring(rc)); + return FAILURE; + } +} +#endif + +#if 0 && PHP_WIN32 +static ZEND_RESULT_CODE parse_widn_2003(struct parse_state *state) { char *host_ptr; - uint16_t *uhost_str, ahost_str[MAXHOSTNAMELEN], *ahost_ptr; + uint16_t *uhost_str, ahost_str[256], *ahost_ptr; size_t uhost_len; if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { @@ -996,7 +1156,7 @@ static ZEND_RESULT_CODE parse_widn(struct parse_state *state) php_error_docref(NULL, E_WARNING, "Failed to parse IDN"); return FAILURE; } -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { if (SUCCESS != to_utf16(parse_mb_loc, state->url.host, &uhost_str, &uhost_len)) { php_error_docref(NULL, E_WARNING, "Failed to parse IDN"); @@ -1008,7 +1168,7 @@ static ZEND_RESULT_CODE parse_widn(struct parse_state *state) return FAILURE; } - if (!IdnToAscii(IDN_ALLOW_UNASSIGNED|IDN_USE_STD3_ASCII_RULES, uhost_str, uhost_len, ahost_str, MAXHOSTNAMELEN)) { + if (!IdnToAscii(IDN_ALLOW_UNASSIGNED, uhost_str, uhost_len, ahost_str, 256)) { efree(uhost_str); php_error_docref(NULL, E_WARNING, "Failed to parse IDN"); return FAILURE; @@ -1018,7 +1178,6 @@ static ZEND_RESULT_CODE parse_widn(struct parse_state *state) host_ptr = state->url.host; ahost_ptr = ahost_str; PHP_HTTP_DUFF(wcslen(ahost_str), *host_ptr++ = *ahost_ptr++); - efree(ahost_str); *host_ptr = '\0'; state->offset += host_ptr - state->url.host; @@ -1027,9 +1186,65 @@ static ZEND_RESULT_CODE parse_widn(struct parse_state *state) } #endif -#ifdef HAVE_INET_PTON +static ZEND_RESULT_CODE parse_idna(struct parse_state *state, size_t len) +{ +#if PHP_HTTP_HAVE_IDNA2008 + if ((state->flags & PHP_HTTP_URL_PARSE_TOIDN_2008) == PHP_HTTP_URL_PARSE_TOIDN_2008 +# if PHP_HTTP_HAVE_IDNA2003 + || (state->flags & PHP_HTTP_URL_PARSE_TOIDN_2003) != PHP_HTTP_URL_PARSE_TOIDN_2003 +# endif + ) { +#if HAVE_UIDNA_NAMETOASCII_UTF8 + return parse_uidn_2008(state); +#elif PHP_HTTP_HAVE_LIBIDN2 + return parse_gidn_2008(state, len); +#elif PHP_HTTP_HAVE_LIBIDNKIT2 + return parse_kidn(state); +#endif + } +#endif + +#if PHP_HTTP_HAVE_IDNA2003 + if ((state->flags & PHP_HTTP_URL_PARSE_TOIDN_2003) == PHP_HTTP_URL_PARSE_TOIDN_2003 +# if PHP_HTTP_HAVE_IDNA2008 + || (state->flags & PHP_HTTP_URL_PARSE_TOIDN_2008) != PHP_HTTP_URL_PARSE_TOIDN_2008 +#endif + ) { +#if HAVE_UIDNA_IDNTOASCII + return parse_uidn_2003(state); +#elif PHP_HTTP_HAVE_LIBIDN + return parse_gidn_2003(state, len); +#elif PHP_HTTP_HAVE_LIBIDNKIT + return parse_kidn(state); +#endif + } +#endif + +#if 0 && PHP_WIN32 + return parse_widn_2003(state); +#endif + +#if HAVE_UIDNA_NAMETOASCII_UTF8 + return parse_uidn_2008(state); +#elif PHP_HTTP_HAVE_LIBIDN2 + return parse_gidn_2008(state, len); +#elif PHP_HTTP_HAVE_LIBIDNKIT2 + return parse_kidn(state); +#elif HAVE_UIDNA_IDNTOASCII + return parse_uidn_2003(state); +#elif PHP_HTTP_HAVE_LIBIDN + return parse_gidn_2003(state, len); +#elif PHP_HTTP_HAVE_LIBIDNKIT + return parse_kidn(state); +#endif + + return SUCCESS; +} + +#if HAVE_INET_PTON static const char *parse_ip6(struct parse_state *state, const char *ptr) { + unsigned pos = 0; const char *error = NULL, *end = state->ptr, *tmp = memchr(ptr, ']', end - ptr); if (tmp) { @@ -1046,17 +1261,21 @@ static const char *parse_ip6(struct parse_state *state, const char *ptr) state->buffer[state->offset++] = 0; ptr = tmp + 1; } else if (rv == -1) { + pos = 1; error = strerror(errno); } else { error = "unexpected '['"; } efree(addr); } else { + pos = end - ptr; error = "expected ']'"; } if (error) { - php_error_docref(NULL, E_WARNING, "Failed to parse hostinfo; %s", error); + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, "Failed to parse hostinfo; %s at pos %u in '%s'", error, pos, ptr); + } return NULL; } @@ -1066,12 +1285,15 @@ static const char *parse_ip6(struct parse_state *state, const char *ptr) static ZEND_RESULT_CODE parse_hostinfo(struct parse_state *state, const char *ptr) { - size_t mb, len; + size_t mb, len = state->offset; const char *end = state->ptr, *tmp = ptr, *port = NULL, *label = NULL; -#ifdef HAVE_INET_PTON +#if HAVE_INET_PTON if (*ptr == '[' && !(ptr = parse_ip6(state, ptr))) { - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + ptr = tmp; } #endif @@ -1079,43 +1301,76 @@ static ZEND_RESULT_CODE parse_hostinfo(struct parse_state *state, const char *pt switch (*ptr) { case ':': if (port) { - php_error_docref(NULL, E_WARNING, - "Failed to parse port; unexpected ':' at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse port; unexpected ':' at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } } port = ptr + 1; break; case '%': if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { - php_error_docref(NULL, E_WARNING, - "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'", + (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + state->buffer[state->offset++] = *ptr++; + break; } state->buffer[state->offset++] = *ptr++; state->buffer[state->offset++] = *ptr++; state->buffer[state->offset++] = *ptr; break; - case '!': case '$': case '&': case '\'': case '(': case ')': case '*': - case '+': case ',': case ';': case '=': /* sub-delims */ - case '-': case '.': case '_': case '~': /* unreserved */ + case '.': if (port || !label) { /* sort of a compromise, just ensure we don't end up * with a dot at the beginning or two consecutive dots */ - php_error_docref(NULL, E_WARNING, - "Failed to parse %s; unexpected '%c' at pos %u in '%s'", - port ? "port" : "host", - (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse %s; unexpected '%c' at pos %u in '%s'", + port ? "port" : "host", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + break; } state->buffer[state->offset++] = *ptr; label = NULL; break; + case '-': + if (!label) { + /* sort of a compromise, just ensure we don't end up + * with a hyphen at the beginning + */ + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse %s; unexpected '%c' at pos %u in '%s'", + port ? "port" : "host", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + break; + } + /* no break */ + case '_': case '~': /* unreserved */ + case '!': case '$': case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case ';': case '=': /* sub-delims */ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': @@ -1125,10 +1380,15 @@ static ZEND_RESULT_CODE parse_hostinfo(struct parse_state *state, const char *pt case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': if (port) { - php_error_docref(NULL, E_WARNING, - "Failed to parse port; unexpected char '%c' at pos %u in '%s'", - (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse port; unexpected char '%c' at pos %u in '%s'", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + break; } /* no break */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': @@ -1147,36 +1407,34 @@ static ZEND_RESULT_CODE parse_hostinfo(struct parse_state *state, const char *pt if (ptr == end) { break; } else if (port) { - php_error_docref(NULL, E_WARNING, - "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'", - (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); - return FAILURE; - } else if (!(mb = parse_mb(state, PARSE_HOSTINFO, ptr, end, tmp, 0))) { - return FAILURE; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'", + (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + break; + } else if (!(mb = parse_mb(state, PARSE_HOSTINFO, ptr, end, tmp, state->flags & PHP_HTTP_URL_SILENT_ERRORS))) { + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return FAILURE; + } + break; } label = ptr; ptr += mb - 1; } - } while (++ptr != end); + } while (++ptr < end); if (!state->url.host) { - len = (port ? port - tmp - 1 : end - tmp); + len = state->offset - len; state->url.host = &state->buffer[state->offset - len]; state->buffer[state->offset++] = 0; } if (state->flags & PHP_HTTP_URL_PARSE_TOIDN) { -#if PHP_HTTP_HAVE_IDN2 - return parse_idn2(state, len); -#elif PHP_HTTP_HAVE_IDN - return parse_idn(state, len); -#endif -#ifdef HAVE_UIDNA_IDNTOASCII - return parse_uidn(state); -#endif -#if 0 && defined(PHP_WIN32) - return parse_widn(state); -#endif + return parse_idna(state, len); } return SUCCESS; @@ -1191,9 +1449,14 @@ static const char *parse_authority(struct parse_state *state) case '@': /* userinfo delimiter */ if (host) { - php_error_docref(NULL, E_WARNING, - "Failed to parse userinfo; unexpected '@'"); - return NULL; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse userinfo; unexpected '@'"); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + break; } host = state->ptr + 1; if (tmp != state->ptr && SUCCESS != parse_userinfo(state, tmp)) { @@ -1239,10 +1502,16 @@ static const char *parse_path(struct parse_state *state) case '%': if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { - php_error_docref(NULL, E_WARNING, - "Failed to parse path; invalid percent encoding at pos %u in '%s'", - (unsigned) (state->ptr - tmp), tmp); - return NULL; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse path; invalid percent encoding at pos %u in '%s'", + (unsigned) (state->ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + state->buffer[state->offset++] = *state->ptr; + break; } state->buffer[state->offset++] = *state->ptr++; state->buffer[state->offset++] = *state->ptr++; @@ -1269,8 +1538,11 @@ static const char *parse_path(struct parse_state *state) break; default: - if (!(mb = parse_mb(state, PARSE_PATH, state->ptr, state->end, tmp, 0))) { - return NULL; + if (!(mb = parse_mb(state, PARSE_PATH, state->ptr, state->end, tmp, state->flags & PHP_HTTP_URL_SILENT_ERRORS))) { + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + break; } state->ptr += mb - 1; } @@ -1307,21 +1579,27 @@ static const char *parse_query(struct parse_state *state) case '%': if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { - php_error_docref(NULL, E_WARNING, - "Failed to parse query; invalid percent encoding at pos %u in '%s'", - (unsigned) (state->ptr - tmp), tmp); - return NULL; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse query; invalid percent encoding at pos %u in '%s'", + (unsigned) (state->ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + /* fallthrough, pct-encode the percent sign */ + } else { + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr; + break; } - state->buffer[state->offset++] = *state->ptr++; - state->buffer[state->offset++] = *state->ptr++; - state->buffer[state->offset++] = *state->ptr; - break; - - /* RFC1738 unsafe */ + /* no break */ case '{': case '}': case '<': case '>': case '[': case ']': case '|': case '\\': case '^': case '`': case '"': case ' ': + /* RFC1738 unsafe */ if (state->flags & PHP_HTTP_URL_PARSE_TOPCT) { state->buffer[state->offset++] = '%'; state->buffer[state->offset++] = parse_xdigits[((unsigned char) *state->ptr) >> 4]; @@ -1350,8 +1628,11 @@ static const char *parse_query(struct parse_state *state) break; default: - if (!(mb = parse_mb(state, PARSE_QUERY, state->ptr, state->end, tmp, 0))) { - return NULL; + if (!(mb = parse_mb(state, PARSE_QUERY, state->ptr, state->end, tmp, state->flags & PHP_HTTP_URL_SILENT_ERRORS))) { + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + break; } state->ptr += mb - 1; } @@ -1380,23 +1661,42 @@ static const char *parse_fragment(struct parse_state *state) do { switch (*state->ptr) { - case '%': - if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { + case '#': + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { php_error_docref(NULL, E_WARNING, - "Failed to parse fragment; invalid percent encoding at pos %u in '%s'", + "Failed to parse fragment; invalid fragment identifier at pos %u in '%s'", (unsigned) (state->ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { return NULL; } - state->buffer[state->offset++] = *state->ptr++; - state->buffer[state->offset++] = *state->ptr++; state->buffer[state->offset++] = *state->ptr; break; - /* RFC1738 unsafe */ + case '%': + if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse fragment; invalid percent encoding at pos %u in '%s'", + (unsigned) (state->ptr - tmp), tmp); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + /* fallthrough */ + } else { + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr; + break; + } + /* no break */ + case '{': case '}': case '<': case '>': case '[': case ']': case '|': case '\\': case '^': case '`': case '"': case ' ': + /* RFC1738 unsafe */ if (state->flags & PHP_HTTP_URL_PARSE_TOPCT) { state->buffer[state->offset++] = '%'; state->buffer[state->offset++] = parse_xdigits[((unsigned char) *state->ptr) >> 4]; @@ -1425,8 +1725,11 @@ static const char *parse_fragment(struct parse_state *state) break; default: - if (!(mb = parse_mb(state, PARSE_FRAGMENT, state->ptr, state->end, tmp, 0))) { - return NULL; + if (!(mb = parse_mb(state, PARSE_FRAGMENT, state->ptr, state->end, tmp, state->flags & PHP_HTTP_URL_SILENT_ERRORS))) { + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + return NULL; + } + break; } state->ptr += mb - 1; } @@ -1489,7 +1792,7 @@ static const char *parse_scheme(struct parse_state *state) } state->ptr += mb - 1; } - } while (++state->ptr != state->end); + } while (++state->ptr < state->end); softfail: state->offset = 0; @@ -1548,11 +1851,15 @@ php_http_url_t *php_http_url_parse_authority(const char *str, size_t len, unsign } if (state->ptr != state->end) { - php_error_docref(NULL, E_WARNING, - "Failed to parse URL authority, unexpected character at pos %u in '%s'", - (unsigned) (state->ptr - str), str); - efree(state); - return NULL; + if (!(state->flags & PHP_HTTP_URL_SILENT_ERRORS)) { + php_error_docref(NULL, E_WARNING, + "Failed to parse URL authority, unexpected character at pos %u in '%s'", + (unsigned) (state->ptr - str), str); + } + if (!(state->flags & PHP_HTTP_URL_IGNORE_ERRORS)) { + efree(state); + return NULL; + } } return (php_http_url_t *) state; @@ -1589,7 +1896,13 @@ PHP_METHOD(HttpUrl, __construct) flags |= PHP_HTTP_URL_FROM_ENV; } - zend_replace_error_handling(EH_THROW, php_http_get_exception_bad_url_class_entry(), &zeh); + if (flags & PHP_HTTP_URL_SILENT_ERRORS) { + zend_replace_error_handling(EH_SUPPRESS, NULL, &zeh); + } else if (flags & PHP_HTTP_URL_IGNORE_ERRORS) { + zend_replace_error_handling(EH_NORMAL, NULL, &zeh); + } else { + zend_replace_error_handling(EH_THROW, php_http_get_exception_bad_url_class_entry(), &zeh); + } { php_http_url_t *res_purl, *new_purl = NULL, *old_purl = NULL; @@ -1637,7 +1950,13 @@ PHP_METHOD(HttpUrl, mod) php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS(), "z!|l", &new_url, &flags), invalid_arg, return); - zend_replace_error_handling(EH_THROW, php_http_get_exception_bad_url_class_entry(), &zeh); + if (flags & PHP_HTTP_URL_SILENT_ERRORS) { + zend_replace_error_handling(EH_SUPPRESS, NULL, &zeh); + } else if (flags & PHP_HTTP_URL_IGNORE_ERRORS) { + zend_replace_error_handling(EH_NORMAL, NULL, &zeh); + } else { + zend_replace_error_handling(EH_THROW, php_http_get_exception_bad_url_class_entry(), &zeh); + } { php_http_url_t *new_purl = NULL, *old_purl = NULL; @@ -1741,18 +2060,29 @@ PHP_MINIT_FUNCTION(http_url) zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV); zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH); -#ifdef PHP_HTTP_HAVE_WCHAR +#if PHP_HTTP_HAVE_WCHAR zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC); #endif zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8); -#if defined(PHP_HTTP_HAVE_IDN2) || defined(PHP_HTTP_HAVE_IDN) || defined(HAVE_UIDNA_IDNTOASCII) +#if PHP_HTTP_HAVE_LIBIDN2 || PHP_HTTP_HAVE_LIBIDN || PHP_HTTP_HAVE_LIBIDNKIT || PHP_HTTP_HAVE_LIBIDNKIT2 || HAVE_UIDNA_IDNTOASCII || HAVE_UIDNA_NAMETOASCII_UTF8 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN); +# if PHP_HTTP_HAVE_IDNA2003 + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN_2003"), PHP_HTTP_URL_PARSE_TOIDN_2003); +# endif +# if PHP_HTTP_HAVE_IDNA2008 + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN_2008"), PHP_HTTP_URL_PARSE_TOIDN_2008); +# endif #endif zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT); INIT_NS_CLASS_ENTRY(ce, "http\\Env", "Url", php_http_url_methods); php_http_env_url_class_entry = zend_register_internal_class_ex(&ce, php_http_url_class_entry); + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("IGNORE_ERRORS"), PHP_HTTP_URL_IGNORE_ERRORS); + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SILENT_ERRORS"), PHP_HTTP_URL_SILENT_ERRORS); + + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STDFLAGS"), PHP_HTTP_URL_STDFLAGS); + return SUCCESS; }