| modification, are permitted provided that the conditions mentioned |
| in the accompanying LICENSE file are met. |
+--------------------------------------------------------------------+
- | Copyright (c) 2004-2011, Michael Wallner <mike@php.net> |
+ | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
+--------------------------------------------------------------------+
*/
#include "php_http_api.h"
+#ifdef PHP_HTTP_HAVE_IDN
+# include <idna.h>
+#endif
+
+#ifdef PHP_HTTP_HAVE_WCHAR
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
static inline char *localhostname(void)
{
char hostname[1024] = {0};
{
zval *https, *zhost, *zport;
long port;
-#ifdef HAVE_GETSERVBYPORT
- struct servent *se;
-#endif
if (!url) {
url = ecalloc(1, sizeof(*url));
https = php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC);
if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) {
url->scheme = estrndup("https", lenof("https"));
- } else switch (url->port) {
- case 443:
- url->scheme = estrndup("https", lenof("https"));
- break;
-
-#ifndef HAVE_GETSERVBYPORT
- default:
-#endif
- case 80:
- case 0:
- url->scheme = estrndup("http", lenof("http"));
- break;
-
-#ifdef HAVE_GETSERVBYPORT
- default:
- if ((se = getservbyport(htons(url->port), "tcp")) && se->s_name) {
- url->scheme = estrdup(se->s_name);
- } else {
- url->scheme = estrndup("http", lenof("http"));
- }
- break;
-#endif
+ } else {
+ url->scheme = estrndup("http", lenof("http"));
}
/* host */
return url;
}
-PHP_HTTP_API void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
+void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
{
php_url *url, *tmp_url = NULL;
-#ifdef HAVE_GETSERVBYNAME
- struct servent *se;
-#endif
/* set from env if requested */
if (flags & PHP_HTTP_URL_FROM_ENV) {
if (url->port) {
if ( ((url->port == 80) && !strcmp(url->scheme, "http"))
|| ((url->port ==443) && !strcmp(url->scheme, "https"))
-#ifdef HAVE_GETSERVBYNAME
- || ((se = getservbyname(url->scheme, "tcp")) && se->s_port &&
- (url->port == ntohs(se->s_port)))
-#endif
) {
url->port = 0;
}
}
}
-PHP_HTTP_API STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
+STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
{
const char *arg_sep_str;
size_t arg_sep_len;
return SUCCESS;
}
-PHP_HTTP_API STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
+STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
{
if (pre_encoded_len && pre_encoded_str) {
php_http_buffer_append(qstr, pre_encoded_str, pre_encoded_len);
return SUCCESS;
}
-#define PHP_HTTP_BEGIN_ARGS(method, req_args) PHP_HTTP_BEGIN_ARGS_EX(HttpUrl, method, 0, req_args)
-#define PHP_HTTP_EMPTY_ARGS(method) PHP_HTTP_EMPTY_ARGS_EX(HttpUrl, method, 0)
-#define PHP_HTTP_URL_ME(method, visibility) PHP_ME(HttpUrl, method, PHP_HTTP_ARGS(HttpUrl, method), visibility)
+void php_http_url_dtor(php_http_url_t *url)
+{
+ STR_FREE(url->scheme.str);
+ STR_FREE(url->authority.userinfo.username.str);
+ STR_FREE(url->authority.userinfo.password.str);
+ STR_FREE(url->authority.host.str);
+ STR_FREE(url->path.str);
+ STR_FREE(url->query.str);
+ STR_FREE(url->fragment.str);
+}
+
+void php_http_url_free(php_http_url_t **url)
+{
+ if (*url) {
+ php_http_url_dtor(*url);
+ efree(*url);
+ *url = NULL;
+ }
+}
+
+static const unsigned char utf8mblen[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
+};
+static const unsigned char utf8mask[] = {
+ 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01
+};
+
+static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len)
+{
+ unsigned char ub = utf8mblen[*uc];
+
+ if (!ub || ub > len || ub > 3) {
+ return 0;
+ }
+
+ *wc = *uc & utf8mask[ub];
+
+ switch (ub) {
+ case 4:
+ if ((uc[1] & 0xc0) != 0x80) {
+ return 0;
+ }
+ *wc <<= 6;
+ *wc += *++uc & 0x3f;
+ /* no break */
+ case 3:
+ if ((uc[1] & 0xc0) != 0x80) {
+ return 0;
+ }
+ *wc <<= 6;
+ *wc += *++uc & 0x3f;
+ /* no break */
+ case 2:
+ if ((uc[1] & 0xc0) != 0x80) {
+ return 0;
+ }
+ *wc <<= 6;
+ *wc += *++uc & 0x3f;
+ break;
+
+ default:
+ return 0;
+ }
+
+ return ub;
+}
+
+#include "ualpha.h"
-PHP_HTTP_BEGIN_ARGS(__construct, 0)
- PHP_HTTP_ARG_VAL(old_url, 0)
- PHP_HTTP_ARG_VAL(new_url, 0)
- PHP_HTTP_ARG_VAL(flags, 0)
-PHP_HTTP_END_ARGS;
-PHP_HTTP_EMPTY_ARGS(toString);
-PHP_HTTP_EMPTY_ARGS(toArray);
+static inline zend_bool isualnum(unsigned ch)
+{
+ unsigned i;
-PHP_HTTP_BEGIN_ARGS(mod, 1)
- PHP_HTTP_ARG_VAL(more_url_parts, 0)
- PHP_HTTP_ARG_VAL(flags, 0)
-PHP_HTTP_END_ARGS;
+ /* digits */
+ if (ch >= 0x30 && ch <= 0x39) {
+ return 1;
+ }
-static zend_class_entry *php_http_url_class_entry;
+ for (i = 0; i < sizeof(utf8_ranges)/sizeof(utf8_range_t); ++i) {
+ if (utf8_ranges[i].start == ch) {
+ return 1;
+ } else if (utf8_ranges[i].start <= ch && utf8_ranges[i].end >= ch) {
+ if (utf8_ranges[i].step == 1) {
+ return 1;
+ }
+ /* FIXME step */
+ return 0;
+ }
+ }
+ return 0;
+}
-zend_class_entry *php_http_url_get_class_entry(void)
+static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
{
- return php_http_url_class_entry;
+ unsigned wchar;
+ size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr);
+
+ if (!consumed || consumed == (size_t) -1) {
+ return 0;
+ }
+ if (!idn && !isualnum(wchar)) {
+ return 0;
+ }
+
+ return consumed;
}
-static zend_function_entry php_http_url_method_entry[] = {
- PHP_HTTP_URL_ME(__construct, ZEND_ACC_PUBLIC|ZEND_ACC_CTOR)
- PHP_HTTP_URL_ME(mod, ZEND_ACC_PUBLIC)
- PHP_HTTP_URL_ME(toString, ZEND_ACC_PUBLIC)
- ZEND_MALIAS(HttpUrl, __toString, toString, PHP_HTTP_ARGS(HttpUrl, toString), ZEND_ACC_PUBLIC)
- PHP_HTTP_URL_ME(toArray, ZEND_ACC_PUBLIC)
- EMPTY_FUNCTION_ENTRY
+#ifdef PHP_HTTP_HAVE_WCHAR
+static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
+{
+ wchar_t wchar;
+ size_t consumed = 0;
+#if defined(HAVE_MBRTOWC)
+ mbstate_t ps = {0};
+
+ consumed = mbrtowc(&wchar, ptr, end - ptr, &ps);
+#elif defined(HAVE_MBTOWC)
+ consumed = mbtowc(&wchar, ptr, end - ptr);
+#endif
+
+ if (!consumed || consumed == (size_t) -1) {
+ return 0;
+ }
+ if (!idn && !iswalnum(wchar)) {
+ return 0;
+ }
+
+ return consumed;
+}
+#endif
+
+typedef enum parse_mb_what {
+ PARSE_SCHEME,
+ PARSE_USERINFO,
+ PARSE_HOSTINFO,
+ PARSE_PATH,
+ PARSE_QUERY,
+ PARSE_FRAGMENT
+} parse_mb_what_t;
+
+static const char * const parse_what[] = {
+ "scheme",
+ "userinfo",
+ "hostinfo",
+ "path",
+ "query",
+ "fragment"
};
-PHP_METHOD(HttpUrl, __construct)
+static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent)
{
- with_error_handling(EH_THROW, php_http_exception_get_class_entry()) {
- zval *new_url = NULL, *old_url = NULL;
- long flags = PHP_HTTP_URL_FROM_ENV;
-
- if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags)) {
- with_error_handling(EH_THROW, php_http_exception_get_class_entry()) {
- php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
-
- if (new_url) {
- switch (Z_TYPE_P(new_url)) {
- case IS_OBJECT:
- case IS_ARRAY:
- new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
- break;
- default: {
- zval *cpy = php_http_ztyp(IS_STRING, new_url);
+ size_t consumed = 0;
+ zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN);
- new_purl = php_url_parse(Z_STRVAL_P(cpy));
- zval_ptr_dtor(&cpy);
- break;
- }
- }
- if (!new_purl) {
- return;
- }
- }
- if (old_url) {
- switch (Z_TYPE_P(old_url)) {
- case IS_OBJECT:
- case IS_ARRAY:
- old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
- break;
- default: {
- zval *cpy = php_http_ztyp(IS_STRING, old_url);
+ if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+ consumed = parse_mb_utf8(url, ptr, end, idn);
+ }
+#ifdef PHP_HTTP_HAVE_WCHAR
+ else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+ consumed = parse_mb_loc(url, ptr, end, idn);
+ }
+#endif
- old_purl = php_url_parse(Z_STRVAL_P(cpy));
- zval_ptr_dtor(&cpy);
- break;
- }
- }
- if (!old_purl) {
- if (new_purl) {
- php_url_free(new_purl);
- }
- return;
- }
+ if (!consumed && !silent) {
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
+ parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
+ }
+
+ return consumed;
+}
+
+static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *password = NULL, *tmp = ptr;
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+
+ do {
+ switch (*ptr) {
+ case ':':
+ if (password) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse password; duplicate ':' at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ }
+ password = ptr + 1;
+ break;
+
+ case '%':
+ if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ }
+ ptr += 2;
+ break;
+
+ case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case ';': case '=': /* sub-delims */
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* allowed */
+ break;
+
+ default:
+ if (!(mb = parse_mb(url, PARSE_USERINFO, ptr, end, tmp, 0))) {
+ return FAILURE;
+ }
+ ptr += mb - 1;
+ }
+ } while(++ptr != end);
+
+ if (password) {
+ if ((url->authority.userinfo.username.len = password - tmp - 1)) {
+ url->authority.userinfo.username.str = estrndup(tmp,
+ url->authority.userinfo.username.len);
+ }
+ if ((url->authority.userinfo.password.len = end - password)) {
+ url->authority.userinfo.password.str = estrndup(password,
+ url->authority.userinfo.password.len);
+ }
+ } else {
+ if ((url->authority.userinfo.username.len = end - tmp)) {
+ url->authority.userinfo.username.str = estrndup(tmp,
+ url->authority.userinfo.username.len);
+ }
+ }
+
+ return SUCCESS;
+}
+
+static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *tmp = ptr, *port = NULL;
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+
+ /* FIXME: IP(v6) addresses */
+ do {
+ switch (*ptr) {
+ case ':':
+ if (port) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse port; duplicate ':' at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ }
+ port = ptr + 1;
+ break;
+
+ case '%':
+ if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ }
+ ptr += 2;
+ break;
+
+ case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case ';': case '=': /* sub-delims */
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ if (port) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
+ (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ }
+ /* no break */
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* allowed */
+ if (port) {
+ url->authority.port *= 10;
+ url->authority.port += *ptr - '0';
+ }
+ break;
+
+ default:
+ if (port) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
+ (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
+ return FAILURE;
+ } else if (!(mb = parse_mb(url, PARSE_HOSTINFO, ptr, end, tmp, 0))) {
+ return FAILURE;
+ }
+ ptr += mb - 1;
+ }
+ } while (++ptr != end);
+
+ if (port) {
+ url->authority.host.len = port - tmp - 1;
+ } else {
+ url->authority.host.len = end - tmp;
+ }
+
+ url->authority.host.str = estrndup(tmp, url->authority.host.len);
+
+#ifdef PHP_HTTP_HAVE_IDN
+ if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
+ if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
+ char *idn = NULL;
+ int rv = idna_to_ascii_8z(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+
+ if (rv != IDNA_SUCCESS) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
+ return FAILURE;
+ } else {
+ STR_SET(url->authority.host.str, estrdup(idn));
+ url->authority.host.len = strlen(idn);
+ free(idn);
+ }
+ }
+# ifdef PHP_HTTP_HAVE_WCHAR
+ else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
+ char *idn = NULL;
+ int rv = idna_to_ascii_lz(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
+
+ if (rv != IDNA_SUCCESS) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
+ return FAILURE;
+ } else {
+ STR_SET(url->authority.host.str, estrdup(idn));
+ url->authority.host.len = strlen(idn);
+ free(idn);
+ }
+ }
+ }
+# endif
+#endif
+
+ return SUCCESS;
+}
+
+static const char *parse_authority(php_http_url_t *url, const char *ptr, const char *end)
+{
+ const char *tmp = ptr;
+
+ do {
+ switch (*ptr) {
+ case '@':
+ /* userinfo delimiter */
+ if (tmp != ptr && SUCCESS != parse_userinfo(url, tmp, ptr)) {
+ return NULL;
+ }
+ tmp = ptr + 1;
+ break;
+
+ case '/':
+ case '?':
+ case '#':
+ case '\0':
+ /* host delimiter */
+ if (tmp != ptr && SUCCESS != parse_hostinfo(url, tmp, ptr)) {
+ return NULL;
+ }
+ return ptr;
+ }
+ } while (++ptr <= end);
+
+ return NULL;
+}
+
+static const char *parse_path(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *tmp = ptr;
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+
+ do {
+ switch (*ptr) {
+ case '?':
+ case '\0':
+ if ((url->path.len = ptr - tmp)) {
+ url->path.str = estrndup(tmp, url->path.len);
+ }
+ return ptr;
+
+ case '%':
+ if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse path; invalid percent encoding at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return NULL;
+ }
+ ptr += 2;
+ break;
+
+ case '/': /* yeah, well */
+ case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case ';': case '=': /* sub-delims */
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case ':': case '@': /* pchar */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* allowed */
+ break;
+
+ default:
+ if (!(mb = parse_mb(url, PARSE_PATH, ptr, end, tmp, 0))) {
+ return NULL;
+ }
+ ptr += mb - 1;
+ }
+ } while (++ptr <= end);
+
+ return NULL;
+}
+
+static const char *parse_query(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *tmp = ptr + !!*ptr;
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+
+ do {
+ switch (*ptr) {
+ case '#':
+ case '\0':
+ if ((url->query.len = ptr - tmp)) {
+ url->query.str = estrndup(tmp, url->query.len);
+ }
+ return ptr;
+
+ case '%':
+ if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse query; invalid percent encoding at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return NULL;
+ }
+ ptr += 2;
+ break;
+
+ case '?': case '/': /* yeah, well */
+ case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case ';': case '=': /* sub-delims */
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case ':': case '@': /* pchar */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* allowed */
+ break;
+
+ default:
+ if (!(mb = parse_mb(url, PARSE_QUERY, ptr, end, tmp, 0))) {
+ return NULL;
+ }
+ ptr += mb - 1;
+ }
+ } while (++ptr <= end);
+
+ return NULL;
+}
+
+static const char *parse_fragment(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *tmp = ptr + !!*ptr;
+ TSRMLS_FETCH_FROM_CTX(url->ts);
+
+ do {
+ switch (*ptr) {
+ case '\0':
+ if ((url->fragment.len = ptr - tmp)) {
+ url->fragment.str = estrndup(tmp, url->fragment.len);
+ }
+ return ptr;
+
+ case '%':
+ if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
+ "Failed to parse query; invalid percent encoding at pos %u in '%s'",
+ (unsigned) (ptr - tmp), tmp);
+ return NULL;
+ }
+ ptr += 2;
+ break;
+
+ case '?': case '/': /* yeah, well */
+ case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case ';': case '=': /* sub-delims */
+ case '-': case '.': case '_': case '~': /* unreserved */
+ case ':': case '@': /* pchar */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* allowed */
+ break;
+
+ default:
+ if (!(mb = parse_mb(url, PARSE_FRAGMENT, ptr, end, tmp, 0))) {
+ return NULL;
+ }
+ ptr += mb - 1;
+ }
+ } while (++ptr <= end);
+
+ return NULL;
+}
+
+static const char *parse_hier(php_http_url_t *url, const char *ptr, const char *end)
+{
+ if (*ptr == '/') {
+ if (end - ptr > 1) {
+ if (*(ptr + 1) == '/') {
+ if (!(ptr = parse_authority(url, ptr + 2, end))) {
+ return NULL;
}
+ }
+ }
+ }
+ return parse_path(url, ptr, end);
+}
+
+static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char *end)
+{
+ size_t mb;
+ const char *tmp = ptr;
+
+ do {
+ switch (*ptr) {
+ case ':':
+ /* scheme delimiter */
+ url->scheme.len = ptr - tmp;
+ url->scheme.str = estrndup(tmp, url->scheme.len);
+ return ++ptr;
+
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ case '+': case '-': case '.':
+ if (ptr == tmp) {
+ return tmp;
+ }
+ /* no break */
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ /* scheme part */
+ break;
+
+ default:
+ if (!(mb = parse_mb(url, PARSE_SCHEME, ptr, end, tmp, 1))) {
+ /* soft fail; parse path next */
+ return tmp;
+ }
+ ptr += mb - 1;
+ }
+ } while (++ptr != end);
+
+ return tmp;
+}
+
+php_http_url_t *php_http_url_init(php_http_url_t *url, const char *str, size_t len, unsigned flags TSRMLS_DC)
+{
+ const char *ptr, *end = str + len;
+ zend_bool free_url = !url;
+
+ if (url) {
+ memset(url, 0, sizeof(*url));
+ } else {
+ url = ecalloc(1, sizeof(*url));
+ }
+
+ url->flags = flags;
+ TSRMLS_SET_CTX(url->ts);
+
+ if ((ptr = str) && !(str = parse_scheme(url, ptr, end))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", ptr);
+ if (free_url) {
+ php_http_url_free(&url);
+ } else {
+ php_http_url_dtor(url);
+ }
+ return NULL;
+ }
- php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
- php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
+ if ((ptr = str) && !(str = parse_hier(url, ptr, end))) {
+ if (free_url) {
+ php_http_url_free(&url);
+ } else {
+ php_http_url_dtor(url);
+ }
+ return NULL;
+ }
- php_url_free(res_purl);
- if (old_purl) {
- php_url_free(old_purl);
+ if ((ptr = str) && !(str = parse_query(url, ptr, end))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", ptr);
+ if (free_url) {
+ php_http_url_free(&url);
+ } else {
+ php_http_url_dtor(url);
+ }
+ return NULL;
+ }
+
+ if ((ptr = str) && !(str = parse_fragment(url, ptr, end))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", ptr);
+ if (free_url) {
+ php_http_url_free(&url);
+ } else {
+ php_http_url_dtor(url);
+ }
+ return NULL;
+ }
+
+ return url;
+}
+
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0)
+ ZEND_ARG_INFO(0, old_url)
+ ZEND_ARG_INFO(0, new_url)
+ ZEND_ARG_INFO(0, flags)
+ZEND_END_ARG_INFO();
+PHP_METHOD(HttpUrl, __construct)
+{
+ zval *new_url = NULL, *old_url = NULL;
+ long flags = PHP_HTTP_URL_FROM_ENV;
+ zend_error_handling zeh;
+
+ php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags), invalid_arg, return);
+
+ zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+ {
+ php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
+
+ if (new_url) {
+ switch (Z_TYPE_P(new_url)) {
+ case IS_OBJECT:
+ case IS_ARRAY:
+ new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
+ break;
+ default: {
+ zval *cpy = php_http_ztyp(IS_STRING, new_url);
+
+ new_purl = php_url_parse(Z_STRVAL_P(cpy));
+ zval_ptr_dtor(&cpy);
+ break;
}
+ }
+ if (!new_purl) {
+ zend_restore_error_handling(&zeh TSRMLS_CC);
+ return;
+ }
+ }
+ if (old_url) {
+ switch (Z_TYPE_P(old_url)) {
+ case IS_OBJECT:
+ case IS_ARRAY:
+ old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
+ break;
+ default: {
+ zval *cpy = php_http_ztyp(IS_STRING, old_url);
+
+ old_purl = php_url_parse(Z_STRVAL_P(cpy));
+ zval_ptr_dtor(&cpy);
+ break;
+ }
+ }
+ if (!old_purl) {
if (new_purl) {
php_url_free(new_purl);
}
- } end_error_handling();
+ zend_restore_error_handling(&zeh TSRMLS_CC);
+ return;
+ }
+ }
+
+ php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
+ php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
+
+ php_url_free(res_purl);
+ if (old_purl) {
+ php_url_free(old_purl);
+ }
+ if (new_purl) {
+ php_url_free(new_purl);
}
- } end_error_handling();
+ }
+ zend_restore_error_handling(&zeh TSRMLS_CC);
}
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod, 0, 0, 1)
+ ZEND_ARG_INFO(0, more_url_parts)
+ ZEND_ARG_INFO(0, flags)
+ZEND_END_ARG_INFO();
PHP_METHOD(HttpUrl, mod)
{
zval *new_url = NULL;
long flags = PHP_HTTP_URL_JOIN_PATH | PHP_HTTP_URL_JOIN_QUERY;
+ zend_error_handling zeh;
- if (SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags)) {
+ php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags), invalid_arg, return);
+
+ zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+ {
php_url *new_purl = NULL, *old_purl = NULL;
if (new_url) {
}
}
if (!new_purl) {
+ zend_restore_error_handling(&zeh TSRMLS_CC);
return;
}
}
php_url_free(new_purl);
}
}
+ zend_restore_error_handling(&zeh TSRMLS_CC);
}
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString, 0, 0, 0)
+ZEND_END_ARG_INFO();
PHP_METHOD(HttpUrl, toString)
{
if (SUCCESS == zend_parse_parameters_none()) {
RETURN_EMPTY_STRING();
}
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray, 0, 0, 0)
+ZEND_END_ARG_INFO();
PHP_METHOD(HttpUrl, toArray)
{
+ php_url *purl;
+
if (SUCCESS != zend_parse_parameters_none()) {
- RETURN_FALSE;
+ return;
}
- array_init(return_value);
- array_copy(HASH_OF(getThis()), HASH_OF(return_value));
+
+ /* strip any non-URL properties */
+ purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC);
+ php_http_url_to_struct(purl, return_value TSRMLS_CC);
+ php_url_free(purl);
}
+ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse, 0, 0, 1)
+ ZEND_ARG_INFO(0, url)
+ ZEND_ARG_INFO(0, flags)
+ZEND_END_ARG_INFO();
+PHP_METHOD(HttpUrl, parse)
+{
+ char *str;
+ int len;
+ long flags = 0;
+ php_http_url_t url;
+ zend_error_handling zeh;
+
+ php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
+
+ zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
+ if (php_http_url_init(&url, str, len, flags TSRMLS_CC)) {
+ object_init_ex(return_value, php_http_url_class_entry);
+ if (url.scheme.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("scheme"),
+ url.scheme.str, url.scheme.len TSRMLS_CC);
+ }
+ if (url.authority.userinfo.username.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("user"),
+ url.authority.userinfo.username.str, url.authority.userinfo.username.len TSRMLS_CC);
+ }
+ if (url.authority.userinfo.password.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("pass"),
+ url.authority.userinfo.password.str, url.authority.userinfo.password.len TSRMLS_CC);
+ }
+ if (url.authority.host.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("host"),
+ url.authority.host.str, url.authority.host.len TSRMLS_CC);
+ }
+ if (url.authority.port) {
+ zend_update_property_long(php_http_url_class_entry, return_value, ZEND_STRL("port"),
+ url.authority.port TSRMLS_CC);
+ }
+ if (url.path.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("path"),
+ url.path.str, url.path.len TSRMLS_CC);
+ }
+ if (url.query.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("query"),
+ url.query.str, url.query.len TSRMLS_CC);
+ }
+ if (url.fragment.len) {
+ zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("fragment"),
+ url.fragment.str, url.fragment.len TSRMLS_CC);
+ }
+ php_http_url_dtor(&url);
+ }
+ zend_restore_error_handling(&zeh TSRMLS_CC);
+}
+
+static zend_function_entry php_http_url_methods[] = {
+ PHP_ME(HttpUrl, __construct, ai_HttpUrl___construct, ZEND_ACC_PUBLIC|ZEND_ACC_CTOR)
+ PHP_ME(HttpUrl, mod, ai_HttpUrl_mod, ZEND_ACC_PUBLIC)
+ PHP_ME(HttpUrl, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(HttpUrl, __toString, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
+ PHP_ME(HttpUrl, toArray, ai_HttpUrl_toArray, ZEND_ACC_PUBLIC)
+ PHP_ME(HttpUrl, parse, ai_HttpUrl_parse, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ EMPTY_FUNCTION_ENTRY
+};
+
+zend_class_entry *php_http_url_class_entry;
+
PHP_MINIT_FUNCTION(http_url)
{
- PHP_HTTP_REGISTER_CLASS(http, Url, http_url, php_http_object_get_class_entry(), 0);
+ zend_class_entry ce = {0};
+
+ INIT_NS_CLASS_ENTRY(ce, "http", "Url", php_http_url_methods);
+ php_http_url_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC);
zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC);
zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC);
zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC);
+#ifdef PHP_HTTP_HAVE_WCHAR
+ zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC);
+#endif
+ zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC);
+#ifdef PHP_HTTP_HAVE_IDN
+ zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC);
+#endif
+
return SUCCESS;
}