release 2.4.0
[m6w6/ext-http] / php_http_url.c
index 3c82e6102d58f628a04d50ef6ff6679f47a72d58..35178dc882a8b2314c821681088e9b6d0b43d3b9 100644 (file)
@@ -550,7 +550,7 @@ HashTable *php_http_url_to_struct(const php_http_url_t *url, zval *strct TSRMLS_
        return Z_ARRVAL(arr);
 }
 
-STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
+ZEND_RESULT_CODE php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
 {
        const char *arg_sep_str;
        size_t arg_sep_len;
@@ -569,7 +569,7 @@ STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, si
        return SUCCESS;
 }
 
-STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
+ZEND_RESULT_CODE php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
 {
        if (pre_encoded_len && pre_encoded_str) {
                php_http_buffer_append(qstr, pre_encoded_str, pre_encoded_len);
@@ -746,15 +746,21 @@ static size_t parse_mb(struct parse_state *state, parse_mb_what_t what, const ch
 
        if (!silent) {
                TSRMLS_FETCH_FROM_CTX(state->ts);
-               php_error_docref(NULL TSRMLS_CC, E_WARNING,
-                               "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
-                               parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
+               if (consumed) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                       "Failed to parse %s; unexpected multibyte sequence 0x%x at pos %u in '%s'",
+                                       parse_what[what], wchar, (unsigned) (ptr - begin), begin);
+               } else {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                       "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
+                                       parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
+               }
        }
 
        return 0;
 }
 
-static STATUS parse_userinfo(struct parse_state *state, const char *ptr)
+static ZEND_RESULT_CODE parse_userinfo(struct parse_state *state, const char *ptr)
 {
        size_t mb;
        const char *password = NULL, *end = state->ptr, *tmp = ptr;
@@ -819,11 +825,57 @@ static STATUS parse_userinfo(struct parse_state *state, const char *ptr)
        return SUCCESS;
 }
 
+#if defined(PHP_WIN32) || defined(HAVE_UIDNA_IDNTOASCII)
+typedef size_t (*parse_mb_func)(unsigned *wc, const char *ptr, const char *end);
+static ZEND_RESULT_CODE to_utf16(parse_mb_func fn, const char *u8, uint16_t **u16, size_t *len)
+{
+       size_t offset = 0, u8_len = strlen(u8);
+
+       *u16 = ecalloc(4 * sizeof(uint16_t), u8_len + 1);
+       *len = 0;
+
+       while (offset < u8_len) {
+               unsigned wc;
+               uint16_t buf[2], *ptr = buf;
+               size_t consumed = fn(&wc, &u8[offset], &u8[u8_len]);
+
+               if (!consumed) {
+                       efree(*u16);
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse UTF-8 at pos %zu of '%s'", offset, u8);
+                       return FAILURE;
+               } else {
+                       offset += consumed;
+               }
+
+               switch (wctoutf16(buf, wc)) {
+               case 2:
+                       (*u16)[(*len)++] = *ptr++;
+                       /* no break */
+               case 1:
+                       (*u16)[(*len)++] = *ptr++;
+                       break;
+               case 0:
+               default:
+                       efree(*u16);
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to convert UTF-32 'U+%X' to UTF-16", wc);
+                       return FAILURE;
+               }
+       }
+
+       return SUCCESS;
+}
+#endif
+
+#ifndef MAXHOSTNAMELEN
+#      define MAXHOSTNAMELEN 256
+#endif
+
 #ifdef PHP_HTTP_HAVE_IDN
-static STATUS parse_idn(struct parse_state *state)
+static ZEND_RESULT_CODE parse_idn(struct parse_state *state, size_t prev_len)
 {
        char *idn = NULL;
        int rv = -1;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
 
        if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
                rv = idna_to_ascii_8z(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
@@ -840,7 +892,7 @@ static STATUS parse_idn(struct parse_state *state)
                size_t idnlen = strlen(idn);
                memcpy(state->url.host, idn, idnlen + 1);
                free(idn);
-               state->offset += idnlen - len;
+               state->offset += idnlen - prev_len;
                return SUCCESS;
        }
 }
@@ -854,57 +906,64 @@ typedef uint16_t UChar;
 typedef enum { U_ZERO_ERROR = 0 } UErrorCode;
 int32_t uidna_IDNToASCII(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, void *parseError, UErrorCode *status);
 #      endif
-typedef size_t (*parse_mb_func)(unsigned *wc, const char *ptr, const char *end);
-static STATUS toutf16(parse_mb_func fn, const char *u8, uint16_t **u16, size_t *len)
+static ZEND_RESULT_CODE parse_uidn(struct parse_state *state)
 {
-       size_t offset = 0, u8_len = strlen(u8);
-
-       *u16 = ecalloc(4 * sizeof(uint16_t), u8_len + 1);
-       *len = 0;
-
-       while (offset < u8_len) {
-               unsigned wc;
-               uint16_t buf[2], *ptr = buf;
-               size_t consumed = fn(&wc, &u8[offset], &u8[u8_len]);
+       char *host_ptr;
+       uint16_t *uhost_str, ahost_str[MAXHOSTNAMELEN], *ahost_ptr;
+       size_t uhost_len, ahost_len;
+       UErrorCode error = U_ZERO_ERROR;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
 
-               if (!consumed) {
-                       efree(*u16);
+       if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+               if (SUCCESS != to_utf16(parse_mb_utf8, state->url.host, &uhost_str, &uhost_len)) {
                        return FAILURE;
-               } else {
-                       offset += consumed;
                }
-
-               switch (wctoutf16(buf, wc)) {
-               case 2:
-                       (*u16)[(*len)++] = *ptr++;
-                       /* no break */
-               case 1:
-                       (*u16)[(*len)++] = *ptr++;
-                       break;
-               case 0:
-               default:
-                       efree(*u16);
+#ifdef PHP_HTTP_HAVE_WCHAR
+       } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+               if (SUCCESS != to_utf16(parse_mb_loc, state->url.host, &uhost_str, &uhost_len)) {
                        return FAILURE;
                }
+#endif
+       } else {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; codepage not specified");
+               return FAILURE;
        }
 
+       ahost_len = uidna_IDNToASCII(uhost_str, uhost_len, ahost_str, MAXHOSTNAMELEN, 3, NULL, &error);
+       efree(uhost_str);
+
+       if (error != U_ZERO_ERROR) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; ICU error %d", error);
+               return FAILURE;
+       }
+
+       host_ptr = state->url.host;
+       ahost_ptr = ahost_str;
+       PHP_HTTP_DUFF(ahost_len, *host_ptr++ = *ahost_ptr++);
+
+       *host_ptr = '\0';
+       state->offset += host_ptr - state->url.host;
+
        return SUCCESS;
 }
-static STATUS parse_uidn(struct parse_state *state)
+#endif
+
+#if 0 && defined(PHP_WIN32)
+static ZEND_RESULT_CODE parse_widn(struct parse_state *state)
 {
        char *host_ptr;
-       uint16_t *uhost_str = NULL, *ahost_str, *ahost_ptr;
-       size_t uhost_len, ahost_len;
-       UErrorCode error = U_ZERO_ERROR;
+       uint16_t *uhost_str, ahost_str[MAXHOSTNAMELEN], *ahost_ptr;
+       size_t uhost_len;
+       TSRMLS_FETCH_FROM_CTX(state->ts);
 
        if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
-               if (SUCCESS != toutf16(parse_mb_utf8, state->url.host, &uhost_str, &uhost_len)) {
+               if (SUCCESS != to_utf16(parse_mb_utf8, state->url.host, &uhost_str, &uhost_len)) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN");
                        return FAILURE;
                }
 #ifdef PHP_HTTP_HAVE_WCHAR
        } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) {
-               if (SUCCESS != toutf16(parse_mb_loc, state->url.host, &uhost_str, &uhost_len)) {
+               if (SUCCESS != to_utf16(parse_mb_loc, state->url.host, &uhost_str, &uhost_len)) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN");
                        return FAILURE;
                }
@@ -914,20 +973,16 @@ static STATUS parse_uidn(struct parse_state *state)
                return FAILURE;
        }
 
-       ahost_len = uhost_len * 3;
-       ahost_str = ecalloc(sizeof(uint16_t), ahost_len);
-
-       ahost_len = uidna_IDNToASCII(uhost_str, uhost_len, ahost_str, ahost_len, 3, NULL, &error);
-       efree(uhost_str);
-
-       if (error != U_ZERO_ERROR) {
+       if (!IdnToAscii(IDN_ALLOW_UNASSIGNED|IDN_USE_STD3_ASCII_RULES, uhost_str, uhost_len, ahost_str, MAXHOSTNAMELEN)) {
+               efree(uhost_str);
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN");
                return FAILURE;
        }
 
+       efree(uhost_str);
        host_ptr = state->url.host;
        ahost_ptr = ahost_str;
-       PHP_HTTP_DUFF(ahost_len, *host_ptr++ = *ahost_ptr++);
+       PHP_HTTP_DUFF(wcslen(ahost_str), *host_ptr++ = *ahost_ptr++);
        efree(ahost_str);
 
        *host_ptr = '\0';
@@ -937,13 +992,12 @@ static STATUS parse_uidn(struct parse_state *state)
 }
 #endif
 
-static STATUS parse_hostinfo(struct parse_state *state, const char *ptr)
+static ZEND_RESULT_CODE parse_hostinfo(struct parse_state *state, const char *ptr)
 {
        size_t mb, len;
        const char *end = state->ptr, *tmp = ptr, *port = NULL;
        TSRMLS_FETCH_FROM_CTX(state->ts);
 
-
 #ifdef HAVE_INET_PTON
        if (*ptr == '[') {
                char *error = NULL, *tmp = memchr(ptr, ']', end - ptr);
@@ -1053,10 +1107,13 @@ static STATUS parse_hostinfo(struct parse_state *state, const char *ptr)
 
        if (state->flags & PHP_HTTP_URL_PARSE_TOIDN) {
 #ifdef PHP_HTTP_HAVE_IDN
-               return parse_idn(state);
+               return parse_idn(state, len);
 #endif
 #ifdef HAVE_UIDNA_IDNTOASCII
                return parse_uidn(state);
+#endif
+#if 0 && defined(PHP_WIN32)
+               return parse_widn(state);
 #endif
        }