fix edge cases with @
[m6w6/ext-http] / php_http_url.c
index 3c3fa4d58d8e2c687e7f91216f73c1b81eacda22..1f06271f9527183f12c9c5c2a46506b14a674b74 100644 (file)
@@ -379,10 +379,12 @@ static const char * const parse_what[] = {
        "fragment"
 };
 
+static const char parse_xdigits[] = "0123456789ABCDEF";
+
 static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent)
 {
        size_t consumed = 0;
-       zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN);
+       zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_TOIDN);
 
        if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
                consumed = parse_mb_utf8(url, ptr, end, idn);
@@ -394,7 +396,18 @@ static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *pt
 #endif
 
        if (consumed) {
-               PHP_HTTP_DUFF(consumed, url->buffer[url->offset++] = *ptr++);
+               if (!(url->flags & PHP_HTTP_URL_PARSE_TOPCT) || what == PARSE_HOSTINFO || what == PARSE_SCHEME) {
+                       PHP_HTTP_DUFF(consumed, url->buffer[url->offset++] = *ptr++);
+               } else {
+                       int i = 0;
+
+                       PHP_HTTP_DUFF(consumed,
+                                       url->buffer[url->offset++] = '%';
+                                       url->buffer[url->offset++] = parse_xdigits[((unsigned char) ptr[i]) >> 4];
+                                       url->buffer[url->offset++] = parse_xdigits[((unsigned char) ptr[i]) & 0xf];
+                                       ++i;
+                       );
+               }
        } else if (!silent) {
                TSRMLS_FETCH_FROM_CTX(url->ts);
                php_error_docref(NULL TSRMLS_CC, E_WARNING,
@@ -411,6 +424,8 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr)
        const char *password = NULL, *end = url->ptr, *tmp = ptr;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
+       url->user = &url->buffer[url->offset];
+
        do {
                switch (*ptr) {
                case ':':
@@ -421,7 +436,8 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr)
                                return FAILURE;
                        }
                        password = ptr + 1;
-                       url->buffer[url->offset++] = *ptr;
+                       url->buffer[url->offset++] = 0;
+                       url->pass = &url->buffer[url->offset];
                        break;
 
                case '%':
@@ -461,15 +477,8 @@ static STATUS parse_userinfo(php_http_url_t *url, const char *ptr)
                }
        } while(++ptr != end);
 
-       if (password) {
-               url->user = &url->buffer[url->offset - (end - password) - (password - tmp)];
-               url->buffer[url->offset - (end - password) - 1] = 0;
-               url->pass = &url->buffer[url->offset - (end - password)];
-               url->buffer[url->offset++] = 0;
-       } else {
-               url->user = &url->buffer[url->offset - (end - tmp)];
-               url->buffer[url->offset++] = 0;
-       }
+
+       url->buffer[url->offset++] = 0;
 
        return SUCCESS;
 }
@@ -519,7 +528,7 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr)
                case ':':
                        if (port) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING,
-                                               "Failed to parse port; duplicate ':' at pos %u in '%s'",
+                                               "Failed to parse port; unexpected ':' at pos %u in '%s'",
                                                (unsigned) (ptr - tmp), tmp);
                                return FAILURE;
                        }
@@ -587,7 +596,7 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr)
        }
 
 #ifdef PHP_HTTP_HAVE_IDN
-       if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
+       if (url->flags & PHP_HTTP_URL_PARSE_TOIDN) {
                char *idn = NULL;
                int rv = -1;
 
@@ -616,12 +625,19 @@ static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr)
 
 static const char *parse_authority(php_http_url_t *url)
 {
-       const char *tmp = url->ptr;
+       const char *tmp = url->ptr, *host = NULL;
 
        do {
                switch (*url->ptr) {
                case '@':
                        /* userinfo delimiter */
+                       if (host) {
+                               TSRMLS_FETCH_FROM_CTX(url->ts);
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING,
+                                               "Failed to parse userinfo; unexpected '@'");
+                               return NULL;
+                       }
+                       host = url->ptr + 1;
                        if (tmp != url->ptr && SUCCESS != parse_userinfo(url, tmp)) {
                                return NULL;
                        }
@@ -654,15 +670,18 @@ static const char *parse_path(php_http_url_t *url)
                return url->ptr;
        }
        tmp = url->ptr;
+       url->path = &url->buffer[url->offset];
 
        do {
                switch (*url->ptr) {
+               case '#':
                case '?':
                case '\0':
                        /* did we have any path component ? */
                        if (tmp != url->ptr) {
-                               url->path = &url->buffer[url->offset - (url->ptr - tmp)];
                                url->buffer[url->offset++] = 0;
+                       } else {
+                               url->path = NULL;
                        }
                        return url->ptr;
 
@@ -714,19 +733,19 @@ static const char *parse_query(php_http_url_t *url)
        const char *tmp = url->ptr + !!*url->ptr;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
-       /* is there actually a query to parse ? */
-       if (!*url->ptr || *url->ptr != '?') {
+       /* is there actually a query to parse? */
+       if (*url->ptr != '?') {
                return url->ptr;
        }
 
        /* skip initial '?' */
-       tmp = url->ptr + 1;
+       tmp = ++url->ptr;
+       url->query = &url->buffer[url->offset];
 
        do {
                switch (*url->ptr) {
                case '#':
                case '\0':
-                       url->query = &url->buffer[url->offset - (url->ptr - tmp)];
                        url->buffer[url->offset++] = 0;
                        return url->ptr;
 
@@ -778,18 +797,18 @@ static const char *parse_fragment(php_http_url_t *url)
        const char *tmp;
        TSRMLS_FETCH_FROM_CTX(url->ts);
 
-       /* is there actually a fragment to parse */
-       if (!*url->ptr || *url->ptr != '#') {
+       /* is there actually a fragment to parse? */
+       if (*url->ptr != '#') {
                return url->ptr;
        }
 
        /* skip initial '#' */
-       tmp = url->ptr + 1;
+       tmp = ++url->ptr;
+       url->fragment = &url->buffer[url->offset];
 
        do {
                switch (*url->ptr) {
                case '\0':
-                       url->fragment = &url->buffer[url->offset - (url->ptr - tmp)];
                        url->buffer[url->offset++] = 0;
                        return url->ptr;
 
@@ -805,7 +824,7 @@ static const char *parse_fragment(php_http_url_t *url)
                        url->buffer[url->offset++] = *url->ptr;
                        break;
 
-               case '?': case '/': /* yeah, well */
+               case '?': case '/':
                case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
                case '+': case ',': case ';': case '=': /* sub-delims */
                case '-': case '.': case '_': case '~': /* unreserved */
@@ -1197,8 +1216,9 @@ PHP_MINIT_FUNCTION(http_url)
 #endif
        zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC);
 #ifdef PHP_HTTP_HAVE_IDN
-       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC);
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN TSRMLS_CC);
 #endif
+       zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT TSRMLS_CC);
 
        return SUCCESS;
 }