X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=php_http_url.c;h=05850e8c42a74482c90a8725147ebf4d4d36f751;hp=2bf40b77c49af552a2c226c0e335acca7d6e35b2;hb=31636a8ec8578e7e4d45da180d55e1260bb527db;hpb=fbd9ab2bcfe653d1189a025cc75c38ba3f89f19d diff --git a/php_http_url.c b/php_http_url.c index 2bf40b7..05850e8 100644 --- a/php_http_url.c +++ b/php_http_url.c @@ -21,6 +21,12 @@ # include #endif +#ifdef HAVE_ARPA_INET_H +# include +#endif + +#include "php_http_utf8.h" + static inline char *localhostname(void) { char hostname[1024] = {0}; @@ -308,114 +314,28 @@ STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, con return SUCCESS; } -void php_http_url_dtor(php_http_url_t *url) -{ - STR_FREE(url->scheme.str); - STR_FREE(url->authority.userinfo.username.str); - STR_FREE(url->authority.userinfo.password.str); - STR_FREE(url->authority.host.str); - STR_FREE(url->path.str); - STR_FREE(url->query.str); - STR_FREE(url->fragment.str); -} +struct parse_state { + php_http_url_t url; +#ifdef ZTS + void ***ts; +#endif + const char *ptr; + const char *end; + size_t maxlen; + off_t offset; + unsigned flags; + char buffer[1]; /* last member */ +}; void php_http_url_free(php_http_url_t **url) { if (*url) { - php_http_url_dtor(*url); efree(*url); *url = NULL; } } -static const unsigned char utf8mblen[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6 -}; -static const unsigned char utf8mask[] = { - 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 -}; - -static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len) -{ - unsigned char ub = utf8mblen[*uc]; - - if (!ub || ub > len || ub > 3) { - return 0; - } - - *wc = *uc & utf8mask[ub]; - - switch (ub) { - case 4: - if ((uc[1] & 0xc0) != 0x80) { - return 0; - } - *wc <<= 6; - *wc += *++uc & 0x3f; - /* no break */ - case 3: - if ((uc[1] & 0xc0) != 0x80) { - return 0; - } - *wc <<= 6; - *wc += *++uc & 0x3f; - /* no break */ - case 2: - if ((uc[1] & 0xc0) != 0x80) { - return 0; - } - *wc <<= 6; - *wc += *++uc & 0x3f; - break; - - default: - return 0; - } - - return ub; -} - -#include "ualpha.h" - -static inline zend_bool isualnum(unsigned ch) -{ - unsigned i; - - /* digits */ - if (ch >= 0x30 && ch <= 0x39) { - return 1; - } - - for (i = 0; i < sizeof(utf8_ranges)/sizeof(utf8_range_t); ++i) { - if (utf8_ranges[i].start == ch) { - return 1; - } else if (utf8_ranges[i].start <= ch && utf8_ranges[i].end >= ch) { - if (utf8_ranges[i].step == 1) { - return 1; - } - /* FIXME step */ - return 0; - } - } - return 0; -} - -static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn) +static size_t parse_mb_utf8(unsigned *wc, const char *ptr, const char *end) { unsigned wchar; size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr); @@ -423,15 +343,15 @@ static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *en if (!consumed || consumed == (size_t) -1) { return 0; } - if (!idn && !isualnum(wchar)) { - return 0; - } + if (wc) { + *wc = wchar; + } return consumed; } #ifdef PHP_HTTP_HAVE_WCHAR -static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn) +static size_t parse_mb_loc(unsigned *wc, const char *ptr, const char *end) { wchar_t wchar; size_t consumed = 0; @@ -446,10 +366,10 @@ static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end if (!consumed || consumed == (size_t) -1) { return 0; } - if (!idn && !iswalnum(wchar)) { - return 0; - } + if (wc) { + *wc = wchar; + } return consumed; } #endif @@ -472,35 +392,69 @@ static const char * const parse_what[] = { "fragment" }; -static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent) +static const char parse_xdigits[] = "0123456789ABCDEF"; + +static size_t parse_mb(struct parse_state *state, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent) { + unsigned wchar; size_t consumed = 0; - zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN); - if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) { - consumed = parse_mb_utf8(url, ptr, end, idn); + if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + consumed = parse_mb_utf8(&wchar, ptr, end); } #ifdef PHP_HTTP_HAVE_WCHAR - else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) { - consumed = parse_mb_loc(url, ptr, end, idn); + else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { + consumed = parse_mb_loc(&wchar, ptr, end); } #endif - if (!consumed && !silent) { - TSRMLS_FETCH_FROM_CTX(url->ts); + while (consumed) { + if (!(state->flags & PHP_HTTP_URL_PARSE_TOPCT) || what == PARSE_HOSTINFO || what == PARSE_SCHEME) { + if (what == PARSE_HOSTINFO && (state->flags & PHP_HTTP_URL_PARSE_TOIDN)) { + /* idna */ + } else if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + if (!isualnum(wchar)) { + break; + } +#ifdef PHP_HTTP_HAVE_WCHAR + } else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { + if (!iswalnum(wchar)) { + break; + } +#endif + } + PHP_HTTP_DUFF(consumed, state->buffer[state->offset++] = *ptr++); + } else { + int i = 0; + + PHP_HTTP_DUFF(consumed, + state->buffer[state->offset++] = '%'; + state->buffer[state->offset++] = parse_xdigits[((unsigned char) ptr[i]) >> 4]; + state->buffer[state->offset++] = parse_xdigits[((unsigned char) ptr[i]) & 0xf]; + ++i; + ); + } + + return consumed; + } + + if (!silent) { + TSRMLS_FETCH_FROM_CTX(state->ts); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'", parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin); } - return consumed; + return 0; } -static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *end) +static STATUS parse_userinfo(struct parse_state *state, const char *ptr) { size_t mb; - const char *password = NULL, *tmp = ptr; - TSRMLS_FETCH_FROM_CTX(url->ts); + const char *password = NULL, *end = state->ptr, *tmp = ptr; + TSRMLS_FETCH_FROM_CTX(state->ts); + + state->url.user = &state->buffer[state->offset]; do { switch (*ptr) { @@ -512,6 +466,8 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e return FAILURE; } password = ptr + 1; + state->buffer[state->offset++] = 0; + state->url.pass = &state->buffer[state->offset]; break; case '%': @@ -521,7 +477,9 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e (unsigned) (ptr - tmp), tmp); return FAILURE; } - ptr += 2; + state->buffer[state->offset++] = *ptr++; + state->buffer[state->offset++] = *ptr++; + state->buffer[state->offset++] = *ptr; break; case '!': case '$': case '&': case '\'': case '(': case ')': case '*': @@ -538,48 +496,69 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *e case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* allowed */ + state->buffer[state->offset++] = *ptr; break; default: - if (!(mb = parse_mb(url, PARSE_USERINFO, ptr, end, tmp, 0))) { + if (!(mb = parse_mb(state, PARSE_USERINFO, ptr, end, tmp, 0))) { return FAILURE; } ptr += mb - 1; } } while(++ptr != end); - if (password) { - if ((url->authority.userinfo.username.len = password - tmp - 1)) { - url->authority.userinfo.username.str = estrndup(tmp, - url->authority.userinfo.username.len); - } - if ((url->authority.userinfo.password.len = end - password)) { - url->authority.userinfo.password.str = estrndup(password, - url->authority.userinfo.password.len); - } - } else { - if ((url->authority.userinfo.username.len = end - tmp)) { - url->authority.userinfo.username.str = estrndup(tmp, - url->authority.userinfo.username.len); - } - } + + state->buffer[state->offset++] = 0; return SUCCESS; } -static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *end) +static STATUS parse_hostinfo(struct parse_state *state, const char *ptr) { - size_t mb; - const char *tmp = ptr, *port = NULL; - TSRMLS_FETCH_FROM_CTX(url->ts); + size_t mb, len; + const char *end = state->ptr, *tmp = ptr, *port = NULL; + TSRMLS_FETCH_FROM_CTX(state->ts); + + +#ifdef HAVE_INET_PTON + if (*ptr == '[') { + char *error = NULL, *tmp = memchr(ptr, ']', end - ptr); + + if (tmp) { + size_t addrlen = tmp - ptr + 1; + char buf[16], *addr = estrndup(ptr + 1, addrlen - 2); + int rv = inet_pton(AF_INET6, addr, buf); + + efree(addr); + if (rv == 1) { + state->buffer[state->offset] = '['; + state->url.host = &state->buffer[state->offset]; + inet_ntop(AF_INET6, buf, state->url.host + 1, state->maxlen - state->offset); + state->offset += strlen(state->url.host); + state->buffer[state->offset++] = ']'; + state->buffer[state->offset++] = 0; + ptr = tmp + 1; + } else if (rv == -1) { + error = strerror(errno); + } else { + error = "unexpected '['"; + } + } else { + error = "expected ']'"; + } - /* FIXME: IP(v6) addresses */ - do { + if (error) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse hostinfo; %s", error); + return FAILURE; + } + } +#endif + if (ptr != end) do { switch (*ptr) { case ':': if (port) { php_error_docref(NULL TSRMLS_CC, E_WARNING, - "Failed to parse port; duplicate ':' at pos %u in '%s'", + "Failed to parse port; unexpected ':' at pos %u in '%s'", (unsigned) (ptr - tmp), tmp); return FAILURE; } @@ -593,7 +572,9 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e (unsigned) (ptr - tmp), tmp); return FAILURE; } - ptr += 2; + state->buffer[state->offset++] = *ptr++; + state->buffer[state->offset++] = *ptr++; + state->buffer[state->offset++] = *ptr; break; case '!': case '$': case '&': case '\'': case '(': case ')': case '*': @@ -618,8 +599,10 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e case '7': case '8': case '9': /* allowed */ if (port) { - url->authority.port *= 10; - url->authority.port += *ptr - '0'; + state->url.port *= 10; + state->url.port += *ptr - '0'; + } else { + state->buffer[state->offset++] = *ptr; } break; @@ -629,69 +612,66 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *e "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'", (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp); return FAILURE; - } else if (!(mb = parse_mb(url, PARSE_HOSTINFO, ptr, end, tmp, 0))) { + } else if (!(mb = parse_mb(state, PARSE_HOSTINFO, ptr, end, tmp, 0))) { return FAILURE; } ptr += mb - 1; } } while (++ptr != end); - if (port) { - url->authority.host.len = port - tmp - 1; - } else { - url->authority.host.len = end - tmp; + if (!state->url.host) { + len = (port ? port - tmp - 1 : end - tmp); + state->url.host = &state->buffer[state->offset - len]; + state->buffer[state->offset++] = 0; } - url->authority.host.str = estrndup(tmp, url->authority.host.len); - #ifdef PHP_HTTP_HAVE_IDN - if (url->flags & PHP_HTTP_URL_PARSE_IDN) { - if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) { - char *idn = NULL; - int rv = idna_to_ascii_8z(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); + if (state->flags & PHP_HTTP_URL_PARSE_TOIDN) { + char *idn = NULL; + int rv = -1; - if (rv != IDNA_SUCCESS) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv)); - return FAILURE; - } else { - STR_SET(url->authority.host.str, estrdup(idn)); - url->authority.host.len = strlen(idn); - free(idn); - } + if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) { + rv = idna_to_ascii_8z(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); } # ifdef PHP_HTTP_HAVE_WCHAR - else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) { - char *idn = NULL; - int rv = idna_to_ascii_lz(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); - - if (rv != IDNA_SUCCESS) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv)); - return FAILURE; - } else { - STR_SET(url->authority.host.str, estrdup(idn)); - url->authority.host.len = strlen(idn); - free(idn); - } + else if (state->flags & PHP_HTTP_URL_PARSE_MBLOC) { + rv = idna_to_ascii_lz(state->url.host, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES); } - } # endif + if (rv != IDNA_SUCCESS) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv)); + return FAILURE; + } else { + size_t idnlen = strlen(idn); + memcpy(state->url.host, idn, idnlen + 1); + free(idn); + state->offset += idnlen - len; + } + } #endif return SUCCESS; } -static const char *parse_authority(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_authority(struct parse_state *state) { - const char *tmp = ptr; + const char *tmp = state->ptr, *host = NULL; do { - switch (*ptr) { + switch (*state->ptr) { case '@': /* userinfo delimiter */ - if (tmp != ptr && SUCCESS != parse_userinfo(url, tmp, ptr)) { + if (host) { + TSRMLS_FETCH_FROM_CTX(state->ts); + php_error_docref(NULL TSRMLS_CC, E_WARNING, + "Failed to parse userinfo; unexpected '@'"); return NULL; } - tmp = ptr + 1; + host = state->ptr + 1; + if (tmp != state->ptr && SUCCESS != parse_userinfo(state, tmp)) { + return NULL; + } + tmp = state->ptr + 1; break; case '/': @@ -699,39 +679,52 @@ static const char *parse_authority(php_http_url_t *url, const char *ptr, const c case '#': case '\0': /* host delimiter */ - if (tmp != ptr && SUCCESS != parse_hostinfo(url, tmp, ptr)) { + if (tmp != state->ptr && SUCCESS != parse_hostinfo(state, tmp)) { return NULL; } - return ptr; + return state->ptr; } - } while (++ptr <= end); + } while (++state->ptr <= state->end); return NULL; } -static const char *parse_path(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_path(struct parse_state *state) { size_t mb; - const char *tmp = ptr; - TSRMLS_FETCH_FROM_CTX(url->ts); + const char *tmp; + TSRMLS_FETCH_FROM_CTX(state->ts); + + /* is there actually a path to parse? */ + if (!*state->ptr) { + return state->ptr; + } + tmp = state->ptr; + state->url.path = &state->buffer[state->offset]; do { - switch (*ptr) { + switch (*state->ptr) { + case '#': case '?': case '\0': - if ((url->path.len = ptr - tmp)) { - url->path.str = estrndup(tmp, url->path.len); + /* did we have any path component ? */ + if (tmp != state->ptr) { + state->buffer[state->offset++] = 0; + } else { + state->url.path = NULL; } - return ptr; + return state->ptr; case '%': - if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { + if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse path; invalid percent encoding at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); + (unsigned) (state->ptr - tmp), tmp); return NULL; } - ptr += 2; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr; break; case '/': /* yeah, well */ @@ -750,42 +743,52 @@ static const char *parse_path(php_http_url_t *url, const char *ptr, const char * case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* allowed */ + state->buffer[state->offset++] = *state->ptr; break; default: - if (!(mb = parse_mb(url, PARSE_PATH, ptr, end, tmp, 0))) { + if (!(mb = parse_mb(state, PARSE_PATH, state->ptr, state->end, tmp, 0))) { return NULL; } - ptr += mb - 1; + state->ptr += mb - 1; } - } while (++ptr <= end); + } while (++state->ptr <= state->end); return NULL; } -static const char *parse_query(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_query(struct parse_state *state) { size_t mb; - const char *tmp = ptr + !!*ptr; - TSRMLS_FETCH_FROM_CTX(url->ts); + const char *tmp = state->ptr + !!*state->ptr; + TSRMLS_FETCH_FROM_CTX(state->ts); + + /* is there actually a query to parse? */ + if (*state->ptr != '?') { + return state->ptr; + } + + /* skip initial '?' */ + tmp = ++state->ptr; + state->url.query = &state->buffer[state->offset]; do { - switch (*ptr) { + switch (*state->ptr) { case '#': case '\0': - if ((url->query.len = ptr - tmp)) { - url->query.str = estrndup(tmp, url->query.len); - } - return ptr; + state->buffer[state->offset++] = 0; + return state->ptr; case '%': - if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { + if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse query; invalid percent encoding at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); + (unsigned) (state->ptr - tmp), tmp); return NULL; } - ptr += 2; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr; break; case '?': case '/': /* yeah, well */ @@ -804,44 +807,54 @@ static const char *parse_query(php_http_url_t *url, const char *ptr, const char case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* allowed */ + state->buffer[state->offset++] = *state->ptr; break; default: - if (!(mb = parse_mb(url, PARSE_QUERY, ptr, end, tmp, 0))) { + if (!(mb = parse_mb(state, PARSE_QUERY, state->ptr, state->end, tmp, 0))) { return NULL; } - ptr += mb - 1; + state->ptr += mb - 1; } - } while (++ptr <= end); + } while (++state->ptr <= state->end); return NULL; } -static const char *parse_fragment(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_fragment(struct parse_state *state) { size_t mb; - const char *tmp = ptr + !!*ptr; - TSRMLS_FETCH_FROM_CTX(url->ts); + const char *tmp; + TSRMLS_FETCH_FROM_CTX(state->ts); + + /* is there actually a fragment to parse? */ + if (*state->ptr != '#') { + return state->ptr; + } + + /* skip initial '#' */ + tmp = ++state->ptr; + state->url.fragment = &state->buffer[state->offset]; do { - switch (*ptr) { + switch (*state->ptr) { case '\0': - if ((url->fragment.len = ptr - tmp)) { - url->fragment.str = estrndup(tmp, url->fragment.len); - } - return ptr; + state->buffer[state->offset++] = 0; + return state->ptr; case '%': - if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) { + if (state->ptr[1] != '%' && (state->end - state->ptr <= 2 || !isxdigit(*(state->ptr+1)) || !isxdigit(*(state->ptr+2)))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, - "Failed to parse query; invalid percent encoding at pos %u in '%s'", - (unsigned) (ptr - tmp), tmp); + "Failed to parse fragment; invalid percent encoding at pos %u in '%s'", + (unsigned) (state->ptr - tmp), tmp); return NULL; } - ptr += 2; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr++; + state->buffer[state->offset++] = *state->ptr; break; - case '?': case '/': /* yeah, well */ + case '?': case '/': case '!': case '$': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case ';': case '=': /* sub-delims */ case '-': case '.': case '_': case '~': /* unreserved */ @@ -857,50 +870,52 @@ static const char *parse_fragment(php_http_url_t *url, const char *ptr, const ch case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* allowed */ + state->buffer[state->offset++] = *state->ptr; break; default: - if (!(mb = parse_mb(url, PARSE_FRAGMENT, ptr, end, tmp, 0))) { + if (!(mb = parse_mb(state, PARSE_FRAGMENT, state->ptr, state->end, tmp, 0))) { return NULL; } - ptr += mb - 1; + state->ptr += mb - 1; } - } while (++ptr <= end); + } while (++state->ptr <= state->end); return NULL; } -static const char *parse_hier(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_hier(struct parse_state *state) { - if (*ptr == '/') { - if (end - ptr > 1) { - if (*(ptr + 1) == '/') { - if (!(ptr = parse_authority(url, ptr + 2, end))) { + if (*state->ptr == '/') { + if (state->end - state->ptr > 1) { + if (*(state->ptr + 1) == '/') { + state->ptr += 2; + if (!(state->ptr = parse_authority(state))) { return NULL; } } } } - return parse_path(url, ptr, end); + return parse_path(state); } -static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char *end) +static const char *parse_scheme(struct parse_state *state) { size_t mb; - const char *tmp = ptr; + const char *tmp = state->ptr; do { - switch (*ptr) { + switch (*state->ptr) { case ':': /* scheme delimiter */ - url->scheme.len = ptr - tmp; - url->scheme.str = estrndup(tmp, url->scheme.len); - return ++ptr; + state->url.scheme = &state->buffer[0]; + state->buffer[state->offset++] = 0; + return ++state->ptr; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '+': case '-': case '.': - if (ptr == tmp) { + if (state->ptr == tmp) { return tmp; } /* no break */ @@ -913,74 +928,56 @@ static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': /* scheme part */ + state->buffer[state->offset++] = *state->ptr; break; default: - if (!(mb = parse_mb(url, PARSE_SCHEME, ptr, end, tmp, 1))) { + if (!(mb = parse_mb(state, PARSE_SCHEME, state->ptr, state->end, tmp, 1))) { /* soft fail; parse path next */ return tmp; } - ptr += mb - 1; + state->ptr += mb - 1; } - } while (++ptr != end); + } while (++state->ptr != state->end); return tmp; } -php_http_url_t *php_http_url_init(php_http_url_t *url, const char *str, size_t len, unsigned flags TSRMLS_DC) +php_http_url_t *php_http_url_parse(const char *str, size_t len, unsigned flags TSRMLS_DC) { - const char *ptr, *end = str + len; - zend_bool free_url = !url; - - if (url) { - memset(url, 0, sizeof(*url)); - } else { - url = ecalloc(1, sizeof(*url)); - } - - url->flags = flags; - TSRMLS_SET_CTX(url->ts); - - if ((ptr = str) && !(str = parse_scheme(url, ptr, end))) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", ptr); - if (free_url) { - php_http_url_free(&url); - } else { - php_http_url_dtor(url); - } + size_t maxlen = 3 * len; + struct parse_state *state = ecalloc(1, sizeof(*state) + maxlen); + + state->end = str + len; + state->ptr = str; + state->flags = flags; + state->maxlen = maxlen; + TSRMLS_SET_CTX(state->ts); + + if (!parse_scheme(state)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", state->ptr); + efree(state); return NULL; } - if ((ptr = str) && !(str = parse_hier(url, ptr, end))) { - if (free_url) { - php_http_url_free(&url); - } else { - php_http_url_dtor(url); - } + if (!parse_hier(state)) { + efree(state); return NULL; } - if ((ptr = str) && !(str = parse_query(url, ptr, end))) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", ptr); - if (free_url) { - php_http_url_free(&url); - } else { - php_http_url_dtor(url); - } + if (!parse_query(state)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", state->ptr); + efree(state); return NULL; } - if ((ptr = str) && !(str = parse_fragment(url, ptr, end))) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", ptr); - if (free_url) { - php_http_url_free(&url); - } else { - php_http_url_dtor(url); - } + if (!parse_fragment(state)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", state->ptr); + efree(state); return NULL; } - return url; + return (php_http_url_t *) state; } ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0) @@ -1154,47 +1151,47 @@ PHP_METHOD(HttpUrl, parse) char *str; int len; long flags = 0; - php_http_url_t url; + php_http_url_t *url; zend_error_handling zeh; php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return); zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC); - if (php_http_url_init(&url, str, len, flags TSRMLS_CC)) { + if ((url = php_http_url_parse(str, len, flags TSRMLS_CC))) { object_init_ex(return_value, php_http_url_class_entry); - if (url.scheme.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("scheme"), - url.scheme.str, url.scheme.len TSRMLS_CC); + if (url->scheme) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("scheme"), url->scheme TSRMLS_CC); } - if (url.authority.userinfo.username.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("user"), - url.authority.userinfo.username.str, url.authority.userinfo.username.len TSRMLS_CC); + if (url->user) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("user"), url->user TSRMLS_CC); } - if (url.authority.userinfo.password.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("pass"), - url.authority.userinfo.password.str, url.authority.userinfo.password.len TSRMLS_CC); + if (url->pass) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("pass"), url->pass TSRMLS_CC); } - if (url.authority.host.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("host"), - url.authority.host.str, url.authority.host.len TSRMLS_CC); + if (url->host) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("host"), url->host TSRMLS_CC); } - if (url.authority.port) { - zend_update_property_long(php_http_url_class_entry, return_value, ZEND_STRL("port"), - url.authority.port TSRMLS_CC); + if (url->port) { + zend_update_property_long(php_http_url_class_entry, return_value, + ZEND_STRL("port"), url->port TSRMLS_CC); } - if (url.path.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("path"), - url.path.str, url.path.len TSRMLS_CC); + if (url->path) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("path"), url->path TSRMLS_CC); } - if (url.query.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("query"), - url.query.str, url.query.len TSRMLS_CC); + if (url->query) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("query"), url->query TSRMLS_CC); } - if (url.fragment.len) { - zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("fragment"), - url.fragment.str, url.fragment.len TSRMLS_CC); + if (url->fragment) { + zend_update_property_string(php_http_url_class_entry, return_value, + ZEND_STRL("fragment"), url->fragment TSRMLS_CC); } - php_http_url_dtor(&url); + php_http_url_free(&url); } zend_restore_error_handling(&zeh TSRMLS_CC); } @@ -1246,8 +1243,9 @@ PHP_MINIT_FUNCTION(http_url) #endif zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC); #ifdef PHP_HTTP_HAVE_IDN - zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC); + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN TSRMLS_CC); #endif + zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT TSRMLS_CC); return SUCCESS; }