- add HTTP_URL_STRIP_ALL constant
[m6w6/ext-http] / http_url_api.c
index 9c35a5c4a989f906191c929ad522cbf865429db6..827c7b6a2c5d6c1db62c459deb9af71ed7aaa011 100644 (file)
     | modification, are permitted provided that the conditions mentioned |
     | in the accompanying LICENSE file are met.                          |
     +--------------------------------------------------------------------+
-    | Copyright (c) 2004-2005, Michael Wallner <mike@php.net>            |
+    | Copyright (c) 2004-2006, Michael Wallner <mike@php.net>            |
     +--------------------------------------------------------------------+
 */
 
 /* $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#      include "config.h"
-#endif
-#include "php.h"
+#define HTTP_WANT_SAPI
+#define HTTP_WANT_NETDB
+#include "php_http.h"
 
-#include "SAPI.h"
 #include "zend_ini.h"
 #include "php_output.h"
-#include "ext/standard/url.h"
+#include "ext/standard/php_string.h"
 
-#include "php_http.h"
 #include "php_http_api.h"
+#include "php_http_querystring_api.h"
 #include "php_http_url_api.h"
-#include "php_http_std_defs.h"
-
-#include "phpstr/phpstr.h"
 
+static inline char *localhostname(void)
+{
+       char hostname[1024] = {0};
+       
 #ifdef PHP_WIN32
-#      include <winsock2.h>
-#elif defined(HAVE_NETDB_H)
-#      include <netdb.h>
+       if (SUCCESS == gethostname(hostname, lenof(hostname))) {
+               return estrdup(hostname);
+       }
+#elif defined(HAVE_UNISTD_H)
+       if (SUCCESS == gethostname(hostname, lenof(hostname))) {
+               size_t hlen = strlen(hostname);
+               
+               if (hlen <= lenof(hostname) - lenof("(none)")) {
+                       hostname[hlen++] = '.';
+                       if (SUCCESS == getdomainname(&hostname[hlen], lenof(hostname) - hlen)) {
+                               if (!strcmp(&hostname[hlen], "(none)")) {
+                                       hostname[hlen - 1] = '\0';
+                               }
+                               return estrdup(hostname);
+                       }
+               }
+       }
 #endif
+       return estrdup("localhost");
+}
+
+PHP_MINIT_FUNCTION(http_url)
+{
+       HTTP_LONG_CONSTANT("HTTP_URL_REPLACE", HTTP_URL_REPLACE);
+       HTTP_LONG_CONSTANT("HTTP_URL_JOIN_PATH", HTTP_URL_JOIN_PATH);
+       HTTP_LONG_CONSTANT("HTTP_URL_JOIN_QUERY", HTTP_URL_JOIN_QUERY);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_USER", HTTP_URL_STRIP_USER);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_PASS", HTTP_URL_STRIP_PASS);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_AUTH", HTTP_URL_STRIP_AUTH);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_PORT", HTTP_URL_STRIP_PORT);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_PATH", HTTP_URL_STRIP_PATH);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_QUERY", HTTP_URL_STRIP_QUERY);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_FRAGMENT", HTTP_URL_STRIP_FRAGMENT);
+       HTTP_LONG_CONSTANT("HTTP_URL_STRIP_ALL", HTTP_URL_STRIP_ALL);
+       return SUCCESS;
+}
 
-ZEND_EXTERN_MODULE_GLOBALS(http);
+PHP_HTTP_API char *_http_absolute_url(const char *url TSRMLS_DC)
+{
+       char *abs = NULL;
+       php_url *purl = NULL;
+       
+       if (url) {
+               purl = php_url_parse(abs = estrdup(url));
+               STR_SET(abs, NULL);
+               if (!purl) {
+                       http_error_ex(HE_WARNING, HTTP_E_URL, "Could not parse URL (%s)", url);
+                       return NULL;
+               }
+       }
+       
+       http_build_url(0, purl, NULL, NULL, &abs, NULL);
+       
+       if (purl) {
+               php_url_free(purl);
+       }
+       
+       return abs;
+}
 
-/* {{{ char *http_absolute_url(char *) */
-PHP_HTTP_API char *_http_absolute_url_ex(
-       const char *url,        size_t url_len,
-       const char *proto,      size_t proto_len,
-       const char *host,       size_t host_len,
-       unsigned port TSRMLS_DC)
+/* {{{ void http_build_url(int flags, const php_url *, const php_url *, php_url **, char **, size_t *) */
+PHP_HTTP_API void _http_build_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
 {
-#if defined(PHP_WIN32) || defined(HAVE_NETDB_H)
+#ifdef HTTP_HAVE_NETDB
        struct servent *se;
 #endif
-       php_url *purl = NULL, furl;
-       size_t full_len = 0;
-       zval *zhost = NULL;
-       char *scheme = NULL, *uri, *URL;
+       php_url *url = ecalloc(1, sizeof(php_url));
 
-       if ((!url || !url_len) && (
-                       (!(url = SG(request_info).request_uri)) ||
-                       (!(url_len = strlen(SG(request_info).request_uri))))) {
-               http_error(HE_WARNING, HTTP_E_RUNTIME, "Cannot build an absolute URI if supplied URL and REQUEST_URI is empty");
-               return NULL;
+#define __URLSET(u,n) \
+       ((u)&&(u)->n)
+#define __URLCPY(n) \
+       url->n = __URLSET(new_url,n) ? estrdup(new_url->n) : (__URLSET(old_url,n) ? estrdup(old_url->n) : NULL)
+       
+       if (!(flags & HTTP_URL_STRIP_PORT)) {
+               url->port = (new_url&&new_url->port) ? new_url->port : ((old_url) ? old_url->port : 0);
        }
-
-       URL = ecalloc(1, HTTP_URI_MAXLEN + 1);
-       uri = estrndup(url, url_len);
-       if (!(purl = php_url_parse(uri))) {
-               http_error_ex(HE_WARNING, HTTP_E_URL, "Could not parse supplied URL: %s", url);
-               return NULL;
+       if ((!(flags & HTTP_URL_STRIP_AUTH)) && (!(flags & HTTP_URL_STRIP_USER))) {
+               __URLCPY(user);
        }
-
-       furl.user               = purl->user;
-       furl.pass               = purl->pass;
-       furl.path               = purl->path;
-       furl.query              = purl->query;
-       furl.fragment   = purl->fragment;
-
-       if (proto && proto_len) {
-               furl.scheme = scheme = estrdup(proto);
-       } else if (purl->scheme) {
-               furl.scheme = purl->scheme;
-#if defined(PHP_WIN32) || defined(HAVE_NETDB_H)
-       } else if (port && (se = getservbyport(port, "tcp"))) {
-               furl.scheme = (scheme = estrdup(se->s_name));
-#endif
-       } else {
-               furl.scheme = "http";
+       if ((!(flags & HTTP_URL_STRIP_AUTH)) && (!(flags & HTTP_URL_STRIP_PASS))) {
+               __URLCPY(pass);
        }
-
-       if (port) {
-               furl.port = port;
-       } else if (purl->port) {
-               furl.port = purl->port;
-       } else if (strncmp(furl.scheme, "http", 4)) {
-#if defined(PHP_WIN32) || defined(HAVE_NETDB_H)
-               if ((se = getservbyname(furl.scheme, "tcp"))) {
-                       furl.port = se->s_port;
+       
+       __URLCPY(scheme);
+       __URLCPY(host);
+       
+       if (!(flags & HTTP_URL_STRIP_PATH)) {
+               if ((flags & HTTP_URL_JOIN_PATH) && __URLSET(old_url, path) && __URLSET(new_url, path) && *new_url->path != '/') {
+                       size_t old_path_len = strlen(old_url->path), new_path_len = strlen(new_url->path);
+                       
+                       url->path = ecalloc(1, old_path_len + new_path_len + 1 + 1);
+                       
+                       strcat(url->path, old_url->path);
+                       if (url->path[old_path_len - 1] != '/') {
+                               php_dirname(url->path, old_path_len);
+                               strcat(url->path, "/");
+                       }
+                       strcat(url->path, new_url->path);
+               } else {
+                       __URLCPY(path);
                }
-#endif
-       } else {
-               furl.port = (furl.scheme[4] == 's') ? 443 : 80;
        }
-
-       if (host) {
-               furl.host = (char *) host;
-       } else if (purl->host) {
-               furl.host = purl->host;
-       } else if (     (zhost = http_get_server_var("HTTP_HOST")) ||
-                               (zhost = http_get_server_var("SERVER_NAME"))) {
-               furl.host = Z_STRVAL_P(zhost);
-       } else {
-               furl.host = "localhost";
+       if (!(flags & HTTP_URL_STRIP_QUERY)) {
+               if ((flags & HTTP_URL_JOIN_QUERY) && __URLSET(new_url, query) && __URLSET(old_url, query)) {
+                       zval qarr, qstr;
+                       
+                       INIT_PZVAL(&qstr);
+                       INIT_PZVAL(&qarr);
+                       array_init(&qarr);
+                       
+                       ZVAL_STRING(&qstr, old_url->query, 0);
+                       http_querystring_modify(&qarr, &qstr);
+                       ZVAL_STRING(&qstr, new_url->query, 0);
+                       http_querystring_modify(&qarr, &qstr);
+                       
+                       ZVAL_NULL(&qstr);
+                       http_querystring_update(&qarr, &qstr);
+                       url->query = Z_STRVAL(qstr);
+                       zval_dtor(&qarr);
+               } else {
+                       __URLCPY(query);
+               }
        }
-
-#define HTTP_URI_STRLCATS(URL, full_len, add_string) HTTP_URI_STRLCAT(URL, full_len, add_string, sizeof(add_string)-1)
-#define HTTP_URI_STRLCATL(URL, full_len, add_string) HTTP_URI_STRLCAT(URL, full_len, add_string, strlen(add_string))
-#define HTTP_URI_STRLCAT(URL, full_len, add_string, add_len) \
-       if ((full_len += add_len) > HTTP_URI_MAXLEN) { \
-               http_error_ex(HE_NOTICE, HTTP_E_URL, \
-                       "Absolute URI would have exceeded max URI length (%d bytes) - " \
-                       "tried to add %d bytes ('%s')", \
-                       HTTP_URI_MAXLEN, add_len, add_string); \
-               if (scheme) { \
-                       efree(scheme); \
-               } \
-               php_url_free(purl); \
-               efree(uri); \
-               return URL; \
-       } else { \
-               strcat(URL, add_string); \
+       if (!(flags & HTTP_URL_STRIP_FRAGMENT)) {
+               __URLCPY(fragment);
        }
+       
+       if (!url->scheme) {
+               zval *https = http_get_server_var("HTTPS");
+               if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) {
+                       url->scheme = estrndup("https", lenof("https"));
+               } else
+               switch (url->port)
+               {
+                       case 443:
+                               url->scheme = estrndup("https", lenof("https"));
+                       break;
 
-       HTTP_URI_STRLCATL(URL, full_len, furl.scheme);
-       HTTP_URI_STRLCATS(URL, full_len, "://");
-
-       if (furl.user) {
-               HTTP_URI_STRLCATL(URL, full_len, furl.user);
-               if (furl.pass) {
-                       HTTP_URI_STRLCATS(URL, full_len, ":");
-                       HTTP_URI_STRLCATL(URL, full_len, furl.pass);
+#ifndef HTTP_HAVE_NETDB
+                       default:
+#endif
+                       case 80:
+                               url->scheme = estrndup("http", lenof("http"));
+                       break;
+                       
+#ifdef HTTP_HAVE_NETDB
+                       default:
+                               if ((se = getservbyport(htons(url->port), "tcp")) && se->s_name) {
+                                       url->scheme = estrdup(se->s_name);
+                               } else {
+                                       url->scheme = estrndup("http", lenof("http"));
+                               }
+                       break;
+#endif
                }
-               HTTP_URI_STRLCATS(URL, full_len, "@");
        }
 
-       HTTP_URI_STRLCATL(URL, full_len, furl.host);
-
-       if (    (!strcmp(furl.scheme, "http") && (furl.port != 80)) ||
-                       (!strcmp(furl.scheme, "https") && (furl.port != 443))) {
-               char port_string[8] = {0};
-               snprintf(port_string, 7, ":%u", furl.port);
-               HTTP_URI_STRLCATL(URL, full_len, port_string);
+       if (!url->host) {
+               zval *zhost;
+               
+               if ((((zhost = http_get_server_var("HTTP_HOST")) || 
+                               (zhost = http_get_server_var("SERVER_NAME")))) && Z_STRLEN_P(zhost)) {
+                       url->host = estrndup(Z_STRVAL_P(zhost), Z_STRLEN_P(zhost));
+               } else {
+                       url->host = localhostname();
+               }
        }
-
-       if (furl.path) {
-               if (furl.path[0] != '/') {
-                       HTTP_URI_STRLCATS(URL, full_len, "/");
+       
+       if (!url->path) {
+               if (SG(request_info).request_uri && SG(request_info).request_uri[0]) {
+                       const char *q = strchr(SG(request_info).request_uri, '?');
+                       
+                       if (q) {
+                               url->path = estrndup(SG(request_info).request_uri, q - SG(request_info).request_uri);
+                       } else {
+                               url->path = estrdup(SG(request_info).request_uri);
+                       }
+               } else {
+                       url->path = estrndup("/", 1);
                }
-               HTTP_URI_STRLCATL(URL, full_len, furl.path);
-       } else {
-               HTTP_URI_STRLCATS(URL, full_len, "/");
+       } else if (url->path[0] != '/' && SG(request_info).request_uri && SG(request_info).request_uri[0]) {
+               size_t ulen = strlen(SG(request_info).request_uri);
+               size_t plen = strlen(url->path);
+               char *path;
+               
+               if (SG(request_info).request_uri[ulen-1] != '/') {
+                       for (--ulen; ulen && SG(request_info).request_uri[ulen - 1] != '/'; --ulen);
+               }
+               
+               path = emalloc(ulen + plen + 1);
+               memcpy(path, SG(request_info).request_uri, ulen);
+               memcpy(path + ulen, url->path, plen);
+               path[ulen + plen] = '\0';
+               STR_SET(url->path, path);
        }
-
-       if (furl.query) {
-               HTTP_URI_STRLCATS(URL, full_len, "?");
-               HTTP_URI_STRLCATL(URL, full_len, furl.query);
+       /* replace directory references if path is not a single slash */
+       if (url->path[0] && (url->path[0] != '/' || url->path[1])) {
+               char *ptr, *end = url->path + strlen(url->path) + 1;
+                       
+               for (ptr = strstr(url->path, "/."); ptr; ptr = strstr(ptr, "/.")) {
+                       switch (ptr[2])
+                       {
+                               case '\0':
+                                       ptr[1] = '\0';
+                               break;
+                               
+                               case '/':
+                                       memmove(&ptr[1], &ptr[3], end - &ptr[3]);
+                               break;
+                                       
+                               case '.':
+                                       if (ptr[3] == '/') {
+                                               char *pos = &ptr[4];
+                                               while (ptr != url->path) {
+                                                       if (*--ptr == '/') {
+                                                               break;
+                                                       }
+                                               }
+                                               memmove(&ptr[1], pos, end - pos);
+                                       }
+                               break;
+                               
+                               default:
+                                       /* something else */
+                                       ++ptr;
+                               break;
+                       }
+               }
        }
-
-       if (furl.fragment) {
-               HTTP_URI_STRLCATS(URL, full_len, "#");
-               HTTP_URI_STRLCATL(URL, full_len, furl.fragment);
+       
+       if (url->port) {
+               if (    ((url->port == 80) && !strcmp(url->scheme, "http"))
+                       ||      ((url->port ==443) && !strcmp(url->scheme, "https"))
+#ifdef HTTP_HAVE_NETDB
+                       ||      ((se = getservbyname(url->scheme, "tcp")) && se->s_port && 
+                                       (url->port == ntohs(se->s_port)))
+#endif
+               ) {
+                       url->port = 0;
+               }
        }
-
-       if (scheme) {
-               efree(scheme);
+       
+       if (url_str) {
+               size_t len;
+               
+               *url_str = emalloc(HTTP_URL_MAXLEN + 1);
+               
+               **url_str = '\0';
+               strlcat(*url_str, url->scheme, HTTP_URL_MAXLEN);
+               strlcat(*url_str, "://", HTTP_URL_MAXLEN);
+               
+               if (url->user && *url->user) {
+                       strlcat(*url_str, url->user, HTTP_URL_MAXLEN);
+                       if (url->pass && *url->pass) {
+                               strlcat(*url_str, ":", HTTP_URL_MAXLEN);
+                               strlcat(*url_str, url->pass, HTTP_URL_MAXLEN);
+                       }
+                       strlcat(*url_str, "@", HTTP_URL_MAXLEN);
+               }
+               
+               strlcat(*url_str, url->host, HTTP_URL_MAXLEN);
+               
+               if (url->port) {
+                       char port_str[8] = {0};
+                       
+                       snprintf(port_str, lenof(port_str), "%d", (int) url->port);
+                       strlcat(*url_str, ":", HTTP_URL_MAXLEN);
+                       strlcat(*url_str, port_str, HTTP_URL_MAXLEN);
+               }
+               
+               if (*url->path != '/') {
+                       strlcat(*url_str, "/", HTTP_URL_MAXLEN);
+               }
+               strlcat(*url_str, url->path, HTTP_URL_MAXLEN);
+               
+               if (url->query && *url->query) {
+                       strlcat(*url_str, "?", HTTP_URL_MAXLEN);
+                       strlcat(*url_str, url->query, HTTP_URL_MAXLEN);
+               }
+               
+               if (url->fragment && *url->fragment) {
+                       strlcat(*url_str, "#", HTTP_URL_MAXLEN);
+                       strlcat(*url_str, url->fragment, HTTP_URL_MAXLEN);
+               }
+               
+               if (HTTP_URL_MAXLEN == (len = strlen(*url_str))) {
+                       http_error(HE_NOTICE, HTTP_E_URL, "Length of URL exceeds HTTP_URL_MAXLEN");
+               }
+               if (url_len) {
+                       *url_len = len;
+               }
+       }
+       
+       if (url_ptr) {
+               *url_ptr = url;
+       } else {
+               php_url_free(url);
        }
-       php_url_free(purl);
-       efree(uri);
-
-       return URL;
 }
 /* }}} */
 
@@ -230,6 +383,7 @@ PHP_HTTP_API STATUS _http_urlencode_hash_recursive(HashTable *ht, phpstr *str, c
                phpstr new_prefix;
                
                if (!data || !*data) {
+                       phpstr_dtor(str);
                        return FAILURE;
                }
                
@@ -247,14 +401,14 @@ PHP_HTTP_API STATUS _http_urlencode_hash_recursive(HashTable *ht, phpstr *str, c
                        phpstr_init(&new_prefix);
                        if (prefix && prefix_len) {
                                phpstr_append(&new_prefix, prefix, prefix_len);
-                               phpstr_appends(&new_prefix, "[");
+                               phpstr_appends(&new_prefix, "%5B");
                        }
                        
                        phpstr_append(&new_prefix, encoded_key, encoded_len);
                        efree(encoded_key);
                        
                        if (prefix && prefix_len) {
-                               phpstr_appends(&new_prefix, "]");
+                               phpstr_appends(&new_prefix, "%5D");
                        }
                        phpstr_fix(&new_prefix);
                }
@@ -266,12 +420,11 @@ PHP_HTTP_API STATUS _http_urlencode_hash_recursive(HashTable *ht, phpstr *str, c
                        --ht->nApplyCount;
                        if (SUCCESS != status) {
                                phpstr_dtor(&new_prefix);
+                               phpstr_dtor(str);
                                return FAILURE;
                        }
                } else {
-                       char *encoded_val;
-                       int encoded_len;
-                       zval *cpy, *val = convert_to_type_ex(IS_STRING, *data, &cpy);
+                       zval *val = zval_copy(IS_STRING, *data);
                        
                        if (PHPSTR_LEN(str)) {
                                phpstr_append(str, arg_sep, arg_sep_len);
@@ -279,15 +432,17 @@ PHP_HTTP_API STATUS _http_urlencode_hash_recursive(HashTable *ht, phpstr *str, c
                        phpstr_append(str, PHPSTR_VAL(&new_prefix), PHPSTR_LEN(&new_prefix));
                        phpstr_appends(str, "=");
                        
-                       encoded_val = php_url_encode(Z_STRVAL_P(val), Z_STRLEN_P(val), &encoded_len);
-                       phpstr_append(str, encoded_val, encoded_len);
-                       efree(encoded_val);
-                       
-                       if (cpy) {
-                               zval_ptr_dtor(&cpy);
+                       if (Z_STRLEN_P(val) && Z_STRVAL_P(val)) {
+                               char *encoded_val;
+                               int encoded_len;
+                               
+                               encoded_val = php_url_encode(Z_STRVAL_P(val), Z_STRLEN_P(val), &encoded_len);
+                               phpstr_append(str, encoded_val, encoded_len);
+                               efree(encoded_val);
                        }
+                       
+                       zval_free(&val);
                }
-               
                phpstr_dtor(&new_prefix);
        }
        return SUCCESS;