2 +--------------------------------------------------------------------+
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
13 #include "php_http_api.h"
15 #ifdef PHP_HTTP_HAVE_IDN
19 #ifdef PHP_HTTP_HAVE_WCHAR
24 static inline char *localhostname(void)
26 char hostname
[1024] = {0};
29 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
30 return estrdup(hostname
);
32 #elif defined(HAVE_GETHOSTNAME)
33 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
34 # if defined(HAVE_GETDOMAINNAME)
35 size_t hlen
= strlen(hostname
);
36 if (hlen
<= lenof(hostname
) - lenof("(none)")) {
37 hostname
[hlen
++] = '.';
38 if (SUCCESS
== getdomainname(&hostname
[hlen
], lenof(hostname
) - hlen
)) {
39 if (!strcmp(&hostname
[hlen
], "(none)")) {
40 hostname
[hlen
- 1] = '\0';
42 return estrdup(hostname
);
46 if (strcmp(hostname
, "(none)")) {
47 return estrdup(hostname
);
51 return estrndup("localhost", lenof("localhost"));
54 static php_url
*php_http_url_from_env(php_url
*url TSRMLS_DC
)
56 zval
*https
, *zhost
, *zport
;
60 url
= ecalloc(1, sizeof(*url
));
64 zport
= php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC
);
65 if (zport
&& IS_LONG
== is_numeric_string(Z_STRVAL_P(zport
), Z_STRLEN_P(zport
), &port
, NULL
, 0)) {
70 https
= php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC
);
71 if (https
&& !strcasecmp(Z_STRVAL_P(https
), "ON")) {
72 url
->scheme
= estrndup("https", lenof("https"));
74 url
->scheme
= estrndup("http", lenof("http"));
78 if ((((zhost
= php_http_env_get_server_var(ZEND_STRL("HTTP_HOST"), 1 TSRMLS_CC
)) ||
79 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_NAME"), 1 TSRMLS_CC
)) ||
80 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_ADDR"), 1 TSRMLS_CC
)))) && Z_STRLEN_P(zhost
)) {
81 size_t stop_at
= strspn(Z_STRVAL_P(zhost
), "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.");
83 url
->host
= estrndup(Z_STRVAL_P(zhost
), stop_at
);
85 url
->host
= localhostname();
89 if (SG(request_info
).request_uri
&& SG(request_info
).request_uri
[0]) {
90 const char *q
= strchr(SG(request_info
).request_uri
, '?');
93 url
->path
= estrndup(SG(request_info
).request_uri
, q
- SG(request_info
).request_uri
);
95 url
->path
= estrdup(SG(request_info
).request_uri
);
100 if (SG(request_info
).query_string
&& SG(request_info
).query_string
[0]) {
101 url
->query
= estrdup(SG(request_info
).query_string
);
107 void php_http_url(int flags
, const php_url
*old_url
, const php_url
*new_url
, php_url
**url_ptr
, char **url_str
, size_t *url_len TSRMLS_DC
)
109 php_url
*url
, *tmp_url
= NULL
;
111 /* set from env if requested */
112 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
113 php_url
*env_url
= php_http_url_from_env(NULL TSRMLS_CC
);
115 php_http_url(flags
^ PHP_HTTP_URL_FROM_ENV
, env_url
, old_url
, &tmp_url
, NULL
, NULL TSRMLS_CC
);
117 php_url_free(env_url
);
121 url
= ecalloc(1, sizeof(*url
));
123 #define __URLSET(u,n) \
125 #define __URLCPY(n) \
126 url->n = __URLSET(new_url,n) ? estrdup(new_url->n) : (__URLSET(old_url,n) ? estrdup(old_url->n) : NULL)
128 if (!(flags
& PHP_HTTP_URL_STRIP_PORT
)) {
129 url
->port
= __URLSET(new_url
, port
) ? new_url
->port
: ((old_url
) ? old_url
->port
: 0);
131 if (!(flags
& PHP_HTTP_URL_STRIP_USER
)) {
134 if (!(flags
& PHP_HTTP_URL_STRIP_PASS
)) {
141 if (!(flags
& PHP_HTTP_URL_STRIP_PATH
)) {
142 if ((flags
& PHP_HTTP_URL_JOIN_PATH
) && __URLSET(old_url
, path
) && __URLSET(new_url
, path
) && *new_url
->path
!= '/') {
143 size_t old_path_len
= strlen(old_url
->path
), new_path_len
= strlen(new_url
->path
);
145 url
->path
= ecalloc(1, old_path_len
+ new_path_len
+ 1 + 1);
147 strcat(url
->path
, old_url
->path
);
148 if (url
->path
[old_path_len
- 1] != '/') {
149 php_dirname(url
->path
, old_path_len
);
150 strcat(url
->path
, "/");
152 strcat(url
->path
, new_url
->path
);
157 if (!(flags
& PHP_HTTP_URL_STRIP_QUERY
)) {
158 if ((flags
& PHP_HTTP_URL_JOIN_QUERY
) && __URLSET(new_url
, query
) && __URLSET(old_url
, query
)) {
165 ZVAL_STRING(&qstr
, old_url
->query
, 0);
166 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
167 ZVAL_STRING(&qstr
, new_url
->query
, 0);
168 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
171 php_http_querystring_update(&qarr
, NULL
, &qstr TSRMLS_CC
);
172 url
->query
= Z_STRVAL(qstr
);
178 if (!(flags
& PHP_HTTP_URL_STRIP_FRAGMENT
)) {
182 /* done with copy & combine & strip */
184 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
185 /* free old_url we tainted above */
186 php_url_free(tmp_url
);
189 /* set some sane defaults */
192 url
->scheme
= estrndup("http", lenof("http"));
196 url
->host
= estrndup("localhost", lenof("localhost"));
200 url
->path
= estrndup("/", 1);
201 } else if (url
->path
[0] != '/') {
202 size_t plen
= strlen(url
->path
);
203 char *path
= emalloc(plen
+ 1 + 1);
206 memcpy(&path
[1], url
->path
, plen
+ 1);
207 STR_SET(url
->path
, path
);
209 /* replace directory references if path is not a single slash */
210 if ((flags
& PHP_HTTP_URL_SANITIZE_PATH
)
211 && url
->path
[0] && (url
->path
[0] != '/' || url
->path
[1])) {
212 char *ptr
, *end
= url
->path
+ strlen(url
->path
) + 1;
214 for (ptr
= strchr(url
->path
, '/'); ptr
; ptr
= strchr(ptr
, '/')) {
217 memmove(&ptr
[1], &ptr
[2], end
- &ptr
[2]);
227 memmove(&ptr
[1], &ptr
[3], end
- &ptr
[3]);
233 while (ptr
!= url
->path
) {
238 memmove(&ptr
[1], pos
, end
- pos
);
240 } else if (!ptr
[3]) {
259 /* unset default ports */
261 if ( ((url
->port
== 80) && !strcmp(url
->scheme
, "http"))
262 || ((url
->port
==443) && !strcmp(url
->scheme
, "https"))
269 php_http_url_to_string(url
, url_str
, url_len TSRMLS_CC
);
279 STATUS
php_http_url_encode_hash(HashTable
*hash
, const char *pre_encoded_str
, size_t pre_encoded_len
, char **encoded_str
, size_t *encoded_len TSRMLS_DC
)
281 const char *arg_sep_str
;
283 php_http_buffer_t
*qstr
= php_http_buffer_new();
285 php_http_url_argsep(&arg_sep_str
, &arg_sep_len TSRMLS_CC
);
287 if (SUCCESS
!= php_http_url_encode_hash_ex(hash
, qstr
, arg_sep_str
, arg_sep_len
, "=", 1, pre_encoded_str
, pre_encoded_len TSRMLS_CC
)) {
288 php_http_buffer_free(&qstr
);
292 php_http_buffer_data(qstr
, encoded_str
, encoded_len
);
293 php_http_buffer_free(&qstr
);
298 STATUS
php_http_url_encode_hash_ex(HashTable
*hash
, php_http_buffer_t
*qstr
, const char *arg_sep_str
, size_t arg_sep_len
, const char *val_sep_str
, size_t val_sep_len
, const char *pre_encoded_str
, size_t pre_encoded_len TSRMLS_DC
)
300 if (pre_encoded_len
&& pre_encoded_str
) {
301 php_http_buffer_append(qstr
, pre_encoded_str
, pre_encoded_len
);
304 if (!php_http_params_to_string(qstr
, hash
, arg_sep_str
, arg_sep_len
, "", 0, val_sep_str
, val_sep_len
, PHP_HTTP_PARAMS_QUERY TSRMLS_CC
)) {
311 void php_http_url_dtor(php_http_url_t
*url
)
313 STR_FREE(url
->scheme
.str
);
314 STR_FREE(url
->authority
.userinfo
.username
.str
);
315 STR_FREE(url
->authority
.userinfo
.password
.str
);
316 STR_FREE(url
->authority
.host
.str
);
317 STR_FREE(url
->path
.str
);
318 STR_FREE(url
->query
.str
);
319 STR_FREE(url
->fragment
.str
);
322 void php_http_url_free(php_http_url_t
**url
)
325 php_http_url_dtor(*url
);
331 static const unsigned char utf8mblen
[256] = {
332 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
333 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
334 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
335 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
336 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
337 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
338 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
339 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
340 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
341 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
342 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
343 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
344 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
345 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
346 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
347 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
349 static const unsigned char utf8mask
[] = {
350 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01
353 static inline size_t utf8towc(unsigned *wc
, const unsigned char *uc
, size_t len
)
355 unsigned char ub
= utf8mblen
[*uc
];
357 if (!ub
|| ub
> len
|| ub
> 3) {
361 *wc
= *uc
& utf8mask
[ub
];
365 if ((uc
[1] & 0xc0) != 0x80) {
372 if ((uc
[1] & 0xc0) != 0x80) {
379 if ((uc
[1] & 0xc0) != 0x80) {
395 static inline zend_bool
isualnum(unsigned ch
)
400 if (ch
>= 0x30 && ch
<= 0x39) {
404 for (i
= 0; i
< sizeof(utf8_ranges
)/sizeof(utf8_range_t
); ++i
) {
405 if (utf8_ranges
[i
].start
== ch
) {
407 } else if (utf8_ranges
[i
].start
<= ch
&& utf8_ranges
[i
].end
>= ch
) {
408 if (utf8_ranges
[i
].step
== 1) {
418 static size_t parse_mb_utf8(php_http_url_t
*url
, const char *ptr
, const char *end
, zend_bool idn
)
421 size_t consumed
= utf8towc(&wchar
, (const unsigned char *) ptr
, end
- ptr
);
423 if (!consumed
|| consumed
== (size_t) -1) {
426 if (!idn
&& !isualnum(wchar
)) {
433 #ifdef PHP_HTTP_HAVE_WCHAR
434 static size_t parse_mb_loc(php_http_url_t
*url
, const char *ptr
, const char *end
, zend_bool idn
)
438 #if defined(HAVE_MBRTOWC)
441 consumed
= mbrtowc(&wchar
, ptr
, end
- ptr
, &ps
);
442 #elif defined(HAVE_MBTOWC)
443 consumed
= mbtowc(&wchar
, ptr
, end
- ptr
);
446 if (!consumed
|| consumed
== (size_t) -1) {
449 if (!idn
&& !iswalnum(wchar
)) {
457 typedef enum parse_mb_what
{
466 static const char * const parse_what
[] = {
475 static size_t parse_mb(php_http_url_t
*url
, parse_mb_what_t what
, const char *ptr
, const char *end
, const char *begin
, zend_bool silent
)
478 zend_bool idn
= (what
== PARSE_HOSTINFO
) && (url
->flags
& PHP_HTTP_URL_PARSE_IDN
);
480 if (url
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
481 consumed
= parse_mb_utf8(url
, ptr
, end
, idn
);
483 #ifdef PHP_HTTP_HAVE_WCHAR
484 else if (url
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
485 consumed
= parse_mb_loc(url
, ptr
, end
, idn
);
489 if (!consumed
&& !silent
) {
490 TSRMLS_FETCH_FROM_CTX(url
->ts
);
491 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
492 "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
493 parse_what
[what
], (unsigned char) *ptr
, (unsigned) (ptr
- begin
), begin
);
499 static STATUS
parse_userinfo(php_http_url_t
*url
, const char *ptr
, const char *end
)
502 const char *password
= NULL
, *tmp
= ptr
;
503 TSRMLS_FETCH_FROM_CTX(url
->ts
);
509 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
510 "Failed to parse password; duplicate ':' at pos %u in '%s'",
511 (unsigned) (ptr
- tmp
), tmp
);
518 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
519 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
520 "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
521 (unsigned) (ptr
- tmp
), tmp
);
527 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
528 case '+': case ',': case ';': case '=': /* sub-delims */
529 case '-': case '.': case '_': case '~': /* unreserved */
530 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
531 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
532 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
533 case 'V': case 'W': case 'X': case 'Y': case 'Z':
534 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
535 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
536 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
537 case 'v': case 'w': case 'x': case 'y': case 'z':
538 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
539 case '7': case '8': case '9':
544 if (!(mb
= parse_mb(url
, PARSE_USERINFO
, ptr
, end
, tmp
, 0))) {
549 } while(++ptr
!= end
);
552 if ((url
->authority
.userinfo
.username
.len
= password
- tmp
- 1)) {
553 url
->authority
.userinfo
.username
.str
= estrndup(tmp
,
554 url
->authority
.userinfo
.username
.len
);
556 if ((url
->authority
.userinfo
.password
.len
= end
- password
)) {
557 url
->authority
.userinfo
.password
.str
= estrndup(password
,
558 url
->authority
.userinfo
.password
.len
);
561 if ((url
->authority
.userinfo
.username
.len
= end
- tmp
)) {
562 url
->authority
.userinfo
.username
.str
= estrndup(tmp
,
563 url
->authority
.userinfo
.username
.len
);
570 static STATUS
parse_hostinfo(php_http_url_t
*url
, const char *ptr
, const char *end
)
573 const char *tmp
= ptr
, *port
= NULL
;
574 TSRMLS_FETCH_FROM_CTX(url
->ts
);
576 /* FIXME: IP(v6) addresses */
581 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
582 "Failed to parse port; duplicate ':' at pos %u in '%s'",
583 (unsigned) (ptr
- tmp
), tmp
);
590 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
591 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
592 "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
593 (unsigned) (ptr
- tmp
), tmp
);
599 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
600 case '+': case ',': case ';': case '=': /* sub-delims */
601 case '-': case '.': case '_': case '~': /* unreserved */
602 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
603 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
604 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
605 case 'V': case 'W': case 'X': case 'Y': case 'Z':
606 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
607 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
608 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
609 case 'v': case 'w': case 'x': case 'y': case 'z':
611 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
612 "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
613 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
617 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
618 case '7': case '8': case '9':
621 url
->authority
.port
*= 10;
622 url
->authority
.port
+= *ptr
- '0';
628 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
629 "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
630 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
632 } else if (!(mb
= parse_mb(url
, PARSE_HOSTINFO
, ptr
, end
, tmp
, 0))) {
637 } while (++ptr
!= end
);
640 url
->authority
.host
.len
= port
- tmp
- 1;
642 url
->authority
.host
.len
= end
- tmp
;
645 url
->authority
.host
.str
= estrndup(tmp
, url
->authority
.host
.len
);
647 #ifdef PHP_HTTP_HAVE_IDN
648 if (url
->flags
& PHP_HTTP_URL_PARSE_IDN
) {
649 if (url
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
651 int rv
= idna_to_ascii_8z(url
->authority
.host
.str
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
653 if (rv
!= IDNA_SUCCESS
) {
654 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; %s", idna_strerror(rv
));
657 STR_SET(url
->authority
.host
.str
, estrdup(idn
));
658 url
->authority
.host
.len
= strlen(idn
);
662 # ifdef PHP_HTTP_HAVE_WCHAR
663 else if (url
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
665 int rv
= idna_to_ascii_lz(url
->authority
.host
.str
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
667 if (rv
!= IDNA_SUCCESS
) {
668 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; %s", idna_strerror(rv
));
671 STR_SET(url
->authority
.host
.str
, estrdup(idn
));
672 url
->authority
.host
.len
= strlen(idn
);
683 static const char *parse_authority(php_http_url_t
*url
, const char *ptr
, const char *end
)
685 const char *tmp
= ptr
;
690 /* userinfo delimiter */
691 if (tmp
!= ptr
&& SUCCESS
!= parse_userinfo(url
, tmp
, ptr
)) {
702 if (tmp
!= ptr
&& SUCCESS
!= parse_hostinfo(url
, tmp
, ptr
)) {
707 } while (++ptr
<= end
);
712 static const char *parse_path(php_http_url_t
*url
, const char *ptr
, const char *end
)
715 const char *tmp
= ptr
;
716 TSRMLS_FETCH_FROM_CTX(url
->ts
);
722 if ((url
->path
.len
= ptr
- tmp
)) {
723 url
->path
.str
= estrndup(tmp
, url
->path
.len
);
728 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
729 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
730 "Failed to parse path; invalid percent encoding at pos %u in '%s'",
731 (unsigned) (ptr
- tmp
), tmp
);
737 case '/': /* yeah, well */
738 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
739 case '+': case ',': case ';': case '=': /* sub-delims */
740 case '-': case '.': case '_': case '~': /* unreserved */
741 case ':': case '@': /* pchar */
742 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
743 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
744 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
745 case 'V': case 'W': case 'X': case 'Y': case 'Z':
746 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
747 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
748 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
749 case 'v': case 'w': case 'x': case 'y': case 'z':
750 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
751 case '7': case '8': case '9':
756 if (!(mb
= parse_mb(url
, PARSE_PATH
, ptr
, end
, tmp
, 0))) {
761 } while (++ptr
<= end
);
766 static const char *parse_query(php_http_url_t
*url
, const char *ptr
, const char *end
)
769 const char *tmp
= ptr
+ !!*ptr
;
770 TSRMLS_FETCH_FROM_CTX(url
->ts
);
776 if ((url
->query
.len
= ptr
- tmp
)) {
777 url
->query
.str
= estrndup(tmp
, url
->query
.len
);
782 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
783 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
784 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
785 (unsigned) (ptr
- tmp
), tmp
);
791 case '?': case '/': /* yeah, well */
792 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
793 case '+': case ',': case ';': case '=': /* sub-delims */
794 case '-': case '.': case '_': case '~': /* unreserved */
795 case ':': case '@': /* pchar */
796 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
797 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
798 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
799 case 'V': case 'W': case 'X': case 'Y': case 'Z':
800 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
801 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
802 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
803 case 'v': case 'w': case 'x': case 'y': case 'z':
804 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
805 case '7': case '8': case '9':
810 if (!(mb
= parse_mb(url
, PARSE_QUERY
, ptr
, end
, tmp
, 0))) {
815 } while (++ptr
<= end
);
820 static const char *parse_fragment(php_http_url_t
*url
, const char *ptr
, const char *end
)
823 const char *tmp
= ptr
+ !!*ptr
;
824 TSRMLS_FETCH_FROM_CTX(url
->ts
);
829 if ((url
->fragment
.len
= ptr
- tmp
)) {
830 url
->fragment
.str
= estrndup(tmp
, url
->fragment
.len
);
835 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
836 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
837 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
838 (unsigned) (ptr
- tmp
), tmp
);
844 case '?': case '/': /* yeah, well */
845 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
846 case '+': case ',': case ';': case '=': /* sub-delims */
847 case '-': case '.': case '_': case '~': /* unreserved */
848 case ':': case '@': /* pchar */
849 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
850 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
851 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
852 case 'V': case 'W': case 'X': case 'Y': case 'Z':
853 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
854 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
855 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
856 case 'v': case 'w': case 'x': case 'y': case 'z':
857 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
858 case '7': case '8': case '9':
863 if (!(mb
= parse_mb(url
, PARSE_FRAGMENT
, ptr
, end
, tmp
, 0))) {
868 } while (++ptr
<= end
);
873 static const char *parse_hier(php_http_url_t
*url
, const char *ptr
, const char *end
)
877 if (*(ptr
+ 1) == '/') {
878 if (!(ptr
= parse_authority(url
, ptr
+ 2, end
))) {
884 return parse_path(url
, ptr
, end
);
887 static const char *parse_scheme(php_http_url_t
*url
, const char *ptr
, const char *end
)
890 const char *tmp
= ptr
;
895 /* scheme delimiter */
896 url
->scheme
.len
= ptr
- tmp
;
897 url
->scheme
.str
= estrndup(tmp
, url
->scheme
.len
);
900 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
901 case '7': case '8': case '9':
902 case '+': case '-': case '.':
907 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
908 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
909 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
910 case 'V': case 'W': case 'X': case 'Y': case 'Z':
911 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
912 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
913 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
914 case 'v': case 'w': case 'x': case 'y': case 'z':
919 if (!(mb
= parse_mb(url
, PARSE_SCHEME
, ptr
, end
, tmp
, 1))) {
920 /* soft fail; parse path next */
925 } while (++ptr
!= end
);
930 php_http_url_t
*php_http_url_init(php_http_url_t
*url
, const char *str
, size_t len
, unsigned flags TSRMLS_DC
)
932 const char *ptr
, *end
= str
+ len
;
933 zend_bool free_url
= !url
;
936 memset(url
, 0, sizeof(*url
));
938 url
= ecalloc(1, sizeof(*url
));
942 TSRMLS_SET_CTX(url
->ts
);
944 if ((ptr
= str
) && !(str
= parse_scheme(url
, ptr
, end
))) {
945 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL scheme: '%s'", ptr
);
947 php_http_url_free(&url
);
949 php_http_url_dtor(url
);
954 if ((ptr
= str
) && !(str
= parse_hier(url
, ptr
, end
))) {
956 php_http_url_free(&url
);
958 php_http_url_dtor(url
);
963 if ((ptr
= str
) && !(str
= parse_query(url
, ptr
, end
))) {
964 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL query: '%s'", ptr
);
966 php_http_url_free(&url
);
968 php_http_url_dtor(url
);
973 if ((ptr
= str
) && !(str
= parse_fragment(url
, ptr
, end
))) {
974 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL fragment: '%s'", ptr
);
976 php_http_url_free(&url
);
978 php_http_url_dtor(url
);
986 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct
, 0, 0, 0)
987 ZEND_ARG_INFO(0, old_url
)
988 ZEND_ARG_INFO(0, new_url
)
989 ZEND_ARG_INFO(0, flags
)
991 PHP_METHOD(HttpUrl
, __construct
)
993 zval
*new_url
= NULL
, *old_url
= NULL
;
994 long flags
= PHP_HTTP_URL_FROM_ENV
;
995 zend_error_handling zeh
;
997 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "|z!z!l", &old_url
, &new_url
, &flags
), invalid_arg
, return);
999 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1001 php_url
*res_purl
, *new_purl
= NULL
, *old_purl
= NULL
;
1004 switch (Z_TYPE_P(new_url
)) {
1007 new_purl
= php_http_url_from_struct(NULL
, HASH_OF(new_url
) TSRMLS_CC
);
1010 zval
*cpy
= php_http_ztyp(IS_STRING
, new_url
);
1012 new_purl
= php_url_parse(Z_STRVAL_P(cpy
));
1013 zval_ptr_dtor(&cpy
);
1018 zend_restore_error_handling(&zeh TSRMLS_CC
);
1023 switch (Z_TYPE_P(old_url
)) {
1026 old_purl
= php_http_url_from_struct(NULL
, HASH_OF(old_url
) TSRMLS_CC
);
1029 zval
*cpy
= php_http_ztyp(IS_STRING
, old_url
);
1031 old_purl
= php_url_parse(Z_STRVAL_P(cpy
));
1032 zval_ptr_dtor(&cpy
);
1038 php_url_free(new_purl
);
1040 zend_restore_error_handling(&zeh TSRMLS_CC
);
1045 php_http_url(flags
, old_purl
, new_purl
, &res_purl
, NULL
, NULL TSRMLS_CC
);
1046 php_http_url_to_struct(res_purl
, getThis() TSRMLS_CC
);
1048 php_url_free(res_purl
);
1050 php_url_free(old_purl
);
1053 php_url_free(new_purl
);
1056 zend_restore_error_handling(&zeh TSRMLS_CC
);
1059 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod
, 0, 0, 1)
1060 ZEND_ARG_INFO(0, more_url_parts
)
1061 ZEND_ARG_INFO(0, flags
)
1062 ZEND_END_ARG_INFO();
1063 PHP_METHOD(HttpUrl
, mod
)
1065 zval
*new_url
= NULL
;
1066 long flags
= PHP_HTTP_URL_JOIN_PATH
| PHP_HTTP_URL_JOIN_QUERY
;
1067 zend_error_handling zeh
;
1069 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "z!|l", &new_url
, &flags
), invalid_arg
, return);
1071 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1073 php_url
*new_purl
= NULL
, *old_purl
= NULL
;
1076 switch (Z_TYPE_P(new_url
)) {
1079 new_purl
= php_http_url_from_struct(NULL
, HASH_OF(new_url
) TSRMLS_CC
);
1082 zval
*cpy
= php_http_ztyp(IS_STRING
, new_url
);
1084 new_purl
= php_url_parse(Z_STRVAL_P(new_url
));
1085 zval_ptr_dtor(&cpy
);
1090 zend_restore_error_handling(&zeh TSRMLS_CC
);
1095 if ((old_purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
))) {
1098 ZVAL_OBJVAL(return_value
, zend_objects_clone_obj(getThis() TSRMLS_CC
), 0);
1100 php_http_url(flags
, old_purl
, new_purl
, &res_purl
, NULL
, NULL TSRMLS_CC
);
1101 php_http_url_to_struct(res_purl
, return_value TSRMLS_CC
);
1103 php_url_free(res_purl
);
1104 php_url_free(old_purl
);
1107 php_url_free(new_purl
);
1110 zend_restore_error_handling(&zeh TSRMLS_CC
);
1113 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString
, 0, 0, 0)
1114 ZEND_END_ARG_INFO();
1115 PHP_METHOD(HttpUrl
, toString
)
1117 if (SUCCESS
== zend_parse_parameters_none()) {
1120 if ((purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
))) {
1124 php_http_url(0, purl
, NULL
, NULL
, &str
, &len TSRMLS_CC
);
1126 RETURN_STRINGL(str
, len
, 0);
1129 RETURN_EMPTY_STRING();
1132 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray
, 0, 0, 0)
1133 ZEND_END_ARG_INFO();
1134 PHP_METHOD(HttpUrl
, toArray
)
1138 if (SUCCESS
!= zend_parse_parameters_none()) {
1142 /* strip any non-URL properties */
1143 purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
);
1144 php_http_url_to_struct(purl
, return_value TSRMLS_CC
);
1148 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse
, 0, 0, 1)
1149 ZEND_ARG_INFO(0, url
)
1150 ZEND_ARG_INFO(0, flags
)
1151 ZEND_END_ARG_INFO();
1152 PHP_METHOD(HttpUrl
, parse
)
1158 zend_error_handling zeh
;
1160 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "s|l", &str
, &len
, &flags
), invalid_arg
, return);
1162 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1163 if (php_http_url_init(&url
, str
, len
, flags TSRMLS_CC
)) {
1164 object_init_ex(return_value
, php_http_url_class_entry
);
1165 if (url
.scheme
.len
) {
1166 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("scheme"),
1167 url
.scheme
.str
, url
.scheme
.len TSRMLS_CC
);
1169 if (url
.authority
.userinfo
.username
.len
) {
1170 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("user"),
1171 url
.authority
.userinfo
.username
.str
, url
.authority
.userinfo
.username
.len TSRMLS_CC
);
1173 if (url
.authority
.userinfo
.password
.len
) {
1174 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("pass"),
1175 url
.authority
.userinfo
.password
.str
, url
.authority
.userinfo
.password
.len TSRMLS_CC
);
1177 if (url
.authority
.host
.len
) {
1178 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("host"),
1179 url
.authority
.host
.str
, url
.authority
.host
.len TSRMLS_CC
);
1181 if (url
.authority
.port
) {
1182 zend_update_property_long(php_http_url_class_entry
, return_value
, ZEND_STRL("port"),
1183 url
.authority
.port TSRMLS_CC
);
1186 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("path"),
1187 url
.path
.str
, url
.path
.len TSRMLS_CC
);
1189 if (url
.query
.len
) {
1190 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("query"),
1191 url
.query
.str
, url
.query
.len TSRMLS_CC
);
1193 if (url
.fragment
.len
) {
1194 zend_update_property_stringl(php_http_url_class_entry
, return_value
, ZEND_STRL("fragment"),
1195 url
.fragment
.str
, url
.fragment
.len TSRMLS_CC
);
1197 php_http_url_dtor(&url
);
1199 zend_restore_error_handling(&zeh TSRMLS_CC
);
1202 static zend_function_entry php_http_url_methods
[] = {
1203 PHP_ME(HttpUrl
, __construct
, ai_HttpUrl___construct
, ZEND_ACC_PUBLIC
|ZEND_ACC_CTOR
)
1204 PHP_ME(HttpUrl
, mod
, ai_HttpUrl_mod
, ZEND_ACC_PUBLIC
)
1205 PHP_ME(HttpUrl
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1206 ZEND_MALIAS(HttpUrl
, __toString
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1207 PHP_ME(HttpUrl
, toArray
, ai_HttpUrl_toArray
, ZEND_ACC_PUBLIC
)
1208 PHP_ME(HttpUrl
, parse
, ai_HttpUrl_parse
, ZEND_ACC_PUBLIC
|ZEND_ACC_STATIC
)
1209 EMPTY_FUNCTION_ENTRY
1212 zend_class_entry
*php_http_url_class_entry
;
1214 PHP_MINIT_FUNCTION(http_url
)
1216 zend_class_entry ce
= {0};
1218 INIT_NS_CLASS_ENTRY(ce
, "http", "Url", php_http_url_methods
);
1219 php_http_url_class_entry
= zend_register_internal_class(&ce TSRMLS_CC
);
1221 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1222 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1223 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("pass"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1224 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("host"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1225 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("port"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1226 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("path"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1227 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("query"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1228 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("fragment"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1230 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("REPLACE"), PHP_HTTP_URL_REPLACE TSRMLS_CC
);
1231 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_PATH"), PHP_HTTP_URL_JOIN_PATH TSRMLS_CC
);
1232 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_QUERY"), PHP_HTTP_URL_JOIN_QUERY TSRMLS_CC
);
1233 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_USER"), PHP_HTTP_URL_STRIP_USER TSRMLS_CC
);
1234 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PASS"), PHP_HTTP_URL_STRIP_PASS TSRMLS_CC
);
1235 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_AUTH"), PHP_HTTP_URL_STRIP_AUTH TSRMLS_CC
);
1236 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PORT"), PHP_HTTP_URL_STRIP_PORT TSRMLS_CC
);
1237 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PATH"), PHP_HTTP_URL_STRIP_PATH TSRMLS_CC
);
1238 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_QUERY"), PHP_HTTP_URL_STRIP_QUERY TSRMLS_CC
);
1239 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_FRAGMENT"), PHP_HTTP_URL_STRIP_FRAGMENT TSRMLS_CC
);
1240 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_ALL"), PHP_HTTP_URL_STRIP_ALL TSRMLS_CC
);
1241 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC
);
1242 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC
);
1244 #ifdef PHP_HTTP_HAVE_WCHAR
1245 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC
);
1247 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC
);
1248 #ifdef PHP_HTTP_HAVE_IDN
1249 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC
);
1261 * vim600: noet sw=4 ts=4 fdm=marker
1262 * vim<600: noet sw=4 ts=4