2 +--------------------------------------------------------------------+
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
13 #include "php_http_api.h"
15 #if PHP_HTTP_HAVE_IDN2
17 #elif PHP_HTTP_HAVE_IDN
21 #ifdef PHP_HTTP_HAVE_WCHAR
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
30 #include "php_http_utf8.h"
32 static inline char *localhostname(void)
34 char hostname
[1024] = {0};
37 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
38 return estrdup(hostname
);
40 #elif defined(HAVE_GETHOSTNAME)
41 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
42 # if defined(HAVE_GETDOMAINNAME)
43 size_t hlen
= strlen(hostname
);
44 if (hlen
<= lenof(hostname
) - lenof("(none)")) {
45 hostname
[hlen
++] = '.';
46 if (SUCCESS
== getdomainname(&hostname
[hlen
], lenof(hostname
) - hlen
)) {
47 if (!strcmp(&hostname
[hlen
], "(none)")) {
48 hostname
[hlen
- 1] = '\0';
50 return estrdup(hostname
);
54 if (strcmp(hostname
, "(none)")) {
55 return estrdup(hostname
);
59 return estrndup("localhost", lenof("localhost"));
62 #define url(buf) ((php_http_url_t *) (buf).data)
64 static php_http_url_t
*php_http_url_from_env(TSRMLS_D
)
66 zval
*https
, *zhost
, *zport
;
68 php_http_buffer_t buf
;
70 php_http_buffer_init_ex(&buf
, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE
, sizeof(php_http_url_t
)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC
);
71 php_http_buffer_account(&buf
, sizeof(php_http_url_t
));
72 memset(buf
.data
, 0, buf
.used
);
75 url(buf
)->scheme
= &buf
.data
[buf
.used
];
76 https
= php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC
);
77 if (https
&& !strcasecmp(Z_STRVAL_P(https
), "ON")) {
78 php_http_buffer_append(&buf
, "https", sizeof("https"));
80 php_http_buffer_append(&buf
, "http", sizeof("http"));
84 url(buf
)->host
= &buf
.data
[buf
.used
];
85 if ((((zhost
= php_http_env_get_server_var(ZEND_STRL("HTTP_HOST"), 1 TSRMLS_CC
)) ||
86 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_NAME"), 1 TSRMLS_CC
)) ||
87 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_ADDR"), 1 TSRMLS_CC
)))) && Z_STRLEN_P(zhost
)) {
88 size_t stop_at
= strspn(Z_STRVAL_P(zhost
), "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.");
90 php_http_buffer_append(&buf
, Z_STRVAL_P(zhost
), stop_at
);
91 php_http_buffer_append(&buf
, "", 1);
93 char *host_str
= localhostname();
95 php_http_buffer_append(&buf
, host_str
, strlen(host_str
) + 1);
100 zport
= php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC
);
101 if (zport
&& IS_LONG
== is_numeric_string(Z_STRVAL_P(zport
), Z_STRLEN_P(zport
), &port
, NULL
, 0)) {
102 url(buf
)->port
= port
;
106 if (SG(request_info
).request_uri
&& SG(request_info
).request_uri
[0]) {
107 const char *q
= strchr(SG(request_info
).request_uri
, '?');
109 url(buf
)->path
= &buf
.data
[buf
.used
];
112 php_http_buffer_append(&buf
, SG(request_info
).request_uri
, q
- SG(request_info
).request_uri
);
113 php_http_buffer_append(&buf
, "", 1);
115 php_http_buffer_append(&buf
, SG(request_info
).request_uri
, strlen(SG(request_info
).request_uri
) + 1);
120 if (SG(request_info
).query_string
&& SG(request_info
).query_string
[0]) {
121 url(buf
)->query
= &buf
.data
[buf
.used
];
122 php_http_buffer_append(&buf
, SG(request_info
).query_string
, strlen(SG(request_info
).query_string
) + 1);
128 #define url_isset(u,n) \
130 #define url_append(buf, append) do { \
131 char *_ptr = (buf)->data; \
132 php_http_url_t *_url = (php_http_url_t *) _ptr, _mem = *_url; \
135 if (_ptr != (buf)->data) { \
136 ptrdiff_t diff = (buf)->data - _ptr; \
137 _url = (php_http_url_t *) (buf)->data; \
138 if (_mem.scheme) _url->scheme += diff; \
139 if (_mem.user) _url->user += diff; \
140 if (_mem.pass) _url->pass += diff; \
141 if (_mem.host) _url->host += diff; \
142 if (_mem.path) _url->path += diff; \
143 if (_mem.query) _url->query += diff; \
144 if (_mem.fragment) _url->fragment += diff; \
147 #define url_copy(n) do { \
148 if (url_isset(new_url, n)) { \
149 url(buf)->n = &buf.data[buf.used]; \
150 url_append(&buf, php_http_buffer_append(&buf, new_url->n, strlen(new_url->n) + 1)); \
151 } else if (url_isset(old_url, n)) { \
152 url(buf)->n = &buf.data[buf.used]; \
153 url_append(&buf, php_http_buffer_append(&buf, old_url->n, strlen(old_url->n) + 1)); \
157 php_http_url_t
*php_http_url_mod(const php_http_url_t
*old_url
, const php_http_url_t
*new_url
, unsigned flags TSRMLS_DC
)
159 php_http_url_t
*tmp_url
= NULL
;
160 php_http_buffer_t buf
;
162 php_http_buffer_init_ex(&buf
, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE
, sizeof(php_http_url_t
)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC
);
163 php_http_buffer_account(&buf
, sizeof(php_http_url_t
));
164 memset(buf
.data
, 0, buf
.used
);
166 /* set from env if requested */
167 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
168 php_http_url_t
*env_url
= php_http_url_from_env(TSRMLS_C
);
170 old_url
= tmp_url
= php_http_url_mod(env_url
, old_url
, flags
^ PHP_HTTP_URL_FROM_ENV TSRMLS_CC
);
171 php_http_url_free(&env_url
);
176 if (!(flags
& PHP_HTTP_URL_STRIP_USER
)) {
180 if (!(flags
& PHP_HTTP_URL_STRIP_PASS
)) {
186 if (!(flags
& PHP_HTTP_URL_STRIP_PORT
)) {
187 url(buf
)->port
= url_isset(new_url
, port
) ? new_url
->port
: ((old_url
) ? old_url
->port
: 0);
190 if (!(flags
& PHP_HTTP_URL_STRIP_PATH
)) {
191 if ((flags
& PHP_HTTP_URL_JOIN_PATH
) && url_isset(old_url
, path
) && url_isset(new_url
, path
) && *new_url
->path
!= '/') {
192 size_t old_path_len
= strlen(old_url
->path
), new_path_len
= strlen(new_url
->path
);
193 char *path
= ecalloc(1, old_path_len
+ new_path_len
+ 1 + 1);
195 strcat(path
, old_url
->path
);
196 if (path
[old_path_len
- 1] != '/') {
197 php_dirname(path
, old_path_len
);
200 strcat(path
, new_url
->path
);
202 url(buf
)->path
= &buf
.data
[buf
.used
];
203 if (path
[0] != '/') {
204 url_append(&buf
, php_http_buffer_append(&buf
, "/", 1));
206 url_append(&buf
, php_http_buffer_append(&buf
, path
, strlen(path
) + 1));
209 const char *path
= NULL
;
211 if (url_isset(new_url
, path
)) {
212 path
= new_url
->path
;
213 } else if (url_isset(old_url
, path
)) {
214 path
= old_url
->path
;
218 url(buf
)->path
= &buf
.data
[buf
.used
];
220 url_append(&buf
, php_http_buffer_append(&buf
, path
, strlen(path
) + 1));
227 if (!(flags
& PHP_HTTP_URL_STRIP_QUERY
)) {
228 if ((flags
& PHP_HTTP_URL_JOIN_QUERY
) && url_isset(new_url
, query
) && url_isset(old_url
, query
)) {
235 ZVAL_STRING(&qstr
, old_url
->query
, 0);
236 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
237 ZVAL_STRING(&qstr
, new_url
->query
, 0);
238 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
241 php_http_querystring_update(&qarr
, NULL
, &qstr TSRMLS_CC
);
243 url(buf
)->query
= &buf
.data
[buf
.used
];
244 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL(qstr
), Z_STRLEN(qstr
) + 1));
253 if (!(flags
& PHP_HTTP_URL_STRIP_FRAGMENT
)) {
257 /* done with copy & combine & strip */
259 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
260 /* free old_url we tainted above */
261 php_http_url_free(&tmp_url
);
264 /* replace directory references if path is not a single slash */
265 if ((flags
& PHP_HTTP_URL_SANITIZE_PATH
)
267 && url(buf
)->path
[0] && url(buf
)->path
[1]) {
268 char *ptr
, *end
= url(buf
)->path
+ strlen(url(buf
)->path
) + 1;
270 for (ptr
= strchr(url(buf
)->path
, '/'); ptr
; ptr
= strchr(ptr
, '/')) {
273 memmove(&ptr
[1], &ptr
[2], end
- &ptr
[2]);
283 memmove(&ptr
[1], &ptr
[3], end
- &ptr
[3]);
289 while (ptr
!= url(buf
)->path
) {
294 memmove(&ptr
[1], pos
, end
- pos
);
296 } else if (!ptr
[3]) {
315 /* unset default ports */
316 if (url(buf
)->port
) {
317 if ( ((url(buf
)->port
== 80) && url(buf
)->scheme
&& !strcmp(url(buf
)->scheme
, "http"))
318 || ((url(buf
)->port
==443) && url(buf
)->scheme
&& !strcmp(url(buf
)->scheme
, "https"))
327 char *php_http_url_to_string(const php_http_url_t
*url
, char **url_str
, size_t *url_len
, zend_bool persistent
)
329 php_http_buffer_t buf
;
331 php_http_buffer_init_ex(&buf
, PHP_HTTP_BUFFER_DEFAULT_SIZE
, persistent
?
332 PHP_HTTP_BUFFER_INIT_PERSISTENT
: 0);
334 if (url
->scheme
&& *url
->scheme
) {
335 php_http_buffer_appendl(&buf
, url
->scheme
);
336 php_http_buffer_appends(&buf
, "://");
337 } else if ((url
->user
&& *url
->user
) || (url
->host
&& *url
->host
)) {
338 php_http_buffer_appends(&buf
, "//");
341 if (url
->user
&& *url
->user
) {
342 php_http_buffer_appendl(&buf
, url
->user
);
343 if (url
->pass
&& *url
->pass
) {
344 php_http_buffer_appends(&buf
, ":");
345 php_http_buffer_appendl(&buf
, url
->pass
);
347 php_http_buffer_appends(&buf
, "@");
350 if (url
->host
&& *url
->host
) {
351 php_http_buffer_appendl(&buf
, url
->host
);
353 php_http_buffer_appendf(&buf
, ":%hu", url
->port
);
357 if (url
->path
&& *url
->path
) {
358 if (*url
->path
!= '/') {
359 php_http_buffer_appends(&buf
, "/");
361 php_http_buffer_appendl(&buf
, url
->path
);
362 } else if (buf
.used
) {
363 php_http_buffer_appends(&buf
, "/");
366 if (url
->query
&& *url
->query
) {
367 php_http_buffer_appends(&buf
, "?");
368 php_http_buffer_appendl(&buf
, url
->query
);
371 if (url
->fragment
&& *url
->fragment
) {
372 php_http_buffer_appends(&buf
, "#");
373 php_http_buffer_appendl(&buf
, url
->fragment
);
376 php_http_buffer_shrink(&buf
);
377 php_http_buffer_fix(&buf
);
390 char *php_http_url_authority_to_string(const php_http_url_t
*url
, char **url_str
, size_t *url_len
)
392 php_http_buffer_t buf
;
394 php_http_buffer_init(&buf
);
396 if (url
->user
&& *url
->user
) {
397 php_http_buffer_appendl(&buf
, url
->user
);
398 if (url
->pass
&& *url
->pass
) {
399 php_http_buffer_appends(&buf
, ":");
400 php_http_buffer_appendl(&buf
, url
->pass
);
402 php_http_buffer_appends(&buf
, "@");
405 if (url
->host
&& *url
->host
) {
406 php_http_buffer_appendl(&buf
, url
->host
);
408 php_http_buffer_appendf(&buf
, ":%hu", url
->port
);
412 php_http_buffer_shrink(&buf
);
413 php_http_buffer_fix(&buf
);
426 php_http_url_t
*php_http_url_from_zval(zval
*value
, unsigned flags TSRMLS_DC
)
429 php_http_url_t
*purl
;
431 switch (Z_TYPE_P(value
)) {
434 purl
= php_http_url_from_struct(HASH_OF(value
));
438 zcpy
= php_http_ztyp(IS_STRING
, value
);
439 purl
= php_http_url_parse(Z_STRVAL_P(zcpy
), Z_STRLEN_P(zcpy
), flags TSRMLS_CC
);
440 zval_ptr_dtor(&zcpy
);
446 php_http_url_t
*php_http_url_from_struct(HashTable
*ht
)
449 php_http_buffer_t buf
;
451 php_http_buffer_init_ex(&buf
, MAX(PHP_HTTP_BUFFER_DEFAULT_SIZE
, sizeof(php_http_url_t
)<<2), PHP_HTTP_BUFFER_INIT_PREALLOC
);
452 php_http_buffer_account(&buf
, sizeof(php_http_url_t
));
453 memset(buf
.data
, 0, buf
.used
);
455 if (SUCCESS
== zend_hash_find(ht
, "scheme", sizeof("scheme"), (void *) &e
)) {
456 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
457 url(buf
)->scheme
= &buf
.data
[buf
.used
];
458 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
461 if (SUCCESS
== zend_hash_find(ht
, "user", sizeof("user"), (void *) &e
)) {
462 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
463 url(buf
)->user
= &buf
.data
[buf
.used
];
464 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
467 if (SUCCESS
== zend_hash_find(ht
, "pass", sizeof("pass"), (void *) &e
)) {
468 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
469 url(buf
)->pass
= &buf
.data
[buf
.used
];
470 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
473 if (SUCCESS
== zend_hash_find(ht
, "host", sizeof("host"), (void *) &e
)) {
474 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
475 url(buf
)->host
= &buf
.data
[buf
.used
];
476 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
479 if (SUCCESS
== zend_hash_find(ht
, "port", sizeof("port"), (void *) &e
)) {
480 zval
*cpy
= php_http_ztyp(IS_LONG
, *e
);
481 url(buf
)->port
= (unsigned short) Z_LVAL_P(cpy
);
484 if (SUCCESS
== zend_hash_find(ht
, "path", sizeof("path"), (void *) &e
)) {
485 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
486 url(buf
)->path
= &buf
.data
[buf
.used
];
487 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
490 if (SUCCESS
== zend_hash_find(ht
, "query", sizeof("query"), (void *) &e
)) {
491 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
492 url(buf
)->query
= &buf
.data
[buf
.used
];
493 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
496 if (SUCCESS
== zend_hash_find(ht
, "fragment", sizeof("fragment"), (void *) &e
)) {
497 zval
*cpy
= php_http_ztyp(IS_STRING
, *e
);
498 url(buf
)->fragment
= &buf
.data
[buf
.used
];
499 url_append(&buf
, php_http_buffer_append(&buf
, Z_STRVAL_P(cpy
), Z_STRLEN_P(cpy
) + 1));
506 HashTable
*php_http_url_to_struct(const php_http_url_t
*url
, zval
*strct TSRMLS_DC
)
511 switch (Z_TYPE_P(strct
)) {
518 INIT_PZVAL_ARRAY((&arr
), HASH_OF(strct
));
528 add_assoc_string(&arr
, "scheme", url
->scheme
, 1);
531 add_assoc_string(&arr
, "user", url
->user
, 1);
534 add_assoc_string(&arr
, "pass", url
->pass
, 1);
537 add_assoc_string(&arr
, "host", url
->host
, 1);
540 add_assoc_long(&arr
, "port", (long) url
->port
);
543 add_assoc_string(&arr
, "path", url
->path
, 1);
546 add_assoc_string(&arr
, "query", url
->query
, 1);
549 add_assoc_string(&arr
, "fragment", url
->fragment
, 1);
553 return Z_ARRVAL(arr
);
556 ZEND_RESULT_CODE
php_http_url_encode_hash(HashTable
*hash
, const char *pre_encoded_str
, size_t pre_encoded_len
, char **encoded_str
, size_t *encoded_len TSRMLS_DC
)
558 const char *arg_sep_str
= "&";
559 size_t arg_sep_len
= 1;
560 php_http_buffer_t
*qstr
= php_http_buffer_new();
562 php_http_url_argsep(&arg_sep_str
, &arg_sep_len TSRMLS_CC
);
564 if (SUCCESS
!= php_http_url_encode_hash_ex(hash
, qstr
, arg_sep_str
, arg_sep_len
, "=", 1, pre_encoded_str
, pre_encoded_len TSRMLS_CC
)) {
565 php_http_buffer_free(&qstr
);
569 php_http_buffer_data(qstr
, encoded_str
, encoded_len
);
570 php_http_buffer_free(&qstr
);
575 ZEND_RESULT_CODE
php_http_url_encode_hash_ex(HashTable
*hash
, php_http_buffer_t
*qstr
, const char *arg_sep_str
, size_t arg_sep_len
, const char *val_sep_str
, size_t val_sep_len
, const char *pre_encoded_str
, size_t pre_encoded_len TSRMLS_DC
)
577 if (pre_encoded_len
&& pre_encoded_str
) {
578 php_http_buffer_append(qstr
, pre_encoded_str
, pre_encoded_len
);
581 if (!php_http_params_to_string(qstr
, hash
, arg_sep_str
, arg_sep_len
, "", 0, val_sep_str
, val_sep_len
, PHP_HTTP_PARAMS_QUERY TSRMLS_CC
)) {
598 char buffer
[1]; /* last member */
601 void php_http_url_free(php_http_url_t
**url
)
609 php_http_url_t
*php_http_url_copy(const php_http_url_t
*url
, zend_bool persistent
)
612 const char *end
= NULL
, *url_ptr
= (const char *) url
;
615 end
= MAX(url
->scheme
, end
);
616 end
= MAX(url
->pass
, end
);
617 end
= MAX(url
->user
, end
);
618 end
= MAX(url
->host
, end
);
619 end
= MAX(url
->path
, end
);
620 end
= MAX(url
->query
, end
);
621 end
= MAX(url
->fragment
, end
);
624 end
+= strlen(end
) + 1;
625 cpy_ptr
= pecalloc(1, end
- url_ptr
, persistent
);
626 cpy
= (php_http_url_t
*) cpy_ptr
;
628 memcpy(cpy_ptr
+ sizeof(*cpy
), url_ptr
+ sizeof(*url
), end
- url_ptr
- sizeof(*url
));
630 cpy
->scheme
= url
->scheme
? cpy_ptr
+ (url
->scheme
- url_ptr
) : NULL
;
631 cpy
->pass
= url
->pass
? cpy_ptr
+ (url
->pass
- url_ptr
) : NULL
;
632 cpy
->user
= url
->user
? cpy_ptr
+ (url
->user
- url_ptr
) : NULL
;
633 cpy
->host
= url
->host
? cpy_ptr
+ (url
->host
- url_ptr
) : NULL
;
634 cpy
->path
= url
->path
? cpy_ptr
+ (url
->path
- url_ptr
) : NULL
;
635 cpy
->query
= url
->query
? cpy_ptr
+ (url
->query
- url_ptr
) : NULL
;
636 cpy
->fragment
= url
->fragment
? cpy_ptr
+ (url
->fragment
- url_ptr
) : NULL
;
638 cpy
= ecalloc(1, sizeof(*url
));
641 cpy
->port
= url
->port
;
646 static size_t parse_mb_utf8(unsigned *wc
, const char *ptr
, const char *end
)
649 size_t consumed
= utf8towc(&wchar
, (const unsigned char *) ptr
, end
- ptr
);
651 if (!consumed
|| consumed
== (size_t) -1) {
661 #ifdef PHP_HTTP_HAVE_WCHAR
662 static size_t parse_mb_loc(unsigned *wc
, const char *ptr
, const char *end
)
666 #if defined(HAVE_MBRTOWC)
669 memset(&ps
, 0, sizeof(ps
));
670 consumed
= mbrtowc(&wchar
, ptr
, end
- ptr
, &ps
);
671 #elif defined(HAVE_MBTOWC)
672 consumed
= mbtowc(&wchar
, ptr
, end
- ptr
);
675 if (!consumed
|| consumed
== (size_t) -1) {
686 typedef enum parse_mb_what
{
695 static const char * const parse_what
[] = {
704 static const char parse_xdigits
[] = "0123456789ABCDEF";
706 static size_t parse_mb(struct parse_state
*state
, parse_mb_what_t what
, const char *ptr
, const char *end
, const char *begin
, zend_bool silent
)
711 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
712 consumed
= parse_mb_utf8(&wchar
, ptr
, end
);
714 #ifdef PHP_HTTP_HAVE_WCHAR
715 else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
716 consumed
= parse_mb_loc(&wchar
, ptr
, end
);
721 if (!(state
->flags
& PHP_HTTP_URL_PARSE_TOPCT
) || what
== PARSE_HOSTINFO
|| what
== PARSE_SCHEME
) {
722 if (what
== PARSE_HOSTINFO
&& (state
->flags
& PHP_HTTP_URL_PARSE_TOIDN
)) {
724 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
725 if (!isualnum(wchar
)) {
728 #ifdef PHP_HTTP_HAVE_WCHAR
729 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
730 if (!iswalnum(wchar
)) {
735 PHP_HTTP_DUFF(consumed
, state
->buffer
[state
->offset
++] = *ptr
++);
739 PHP_HTTP_DUFF(consumed
,
740 state
->buffer
[state
->offset
++] = '%';
741 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) ptr
[i
]) >> 4];
742 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) ptr
[i
]) & 0xf];
751 TSRMLS_FETCH_FROM_CTX(state
->ts
);
753 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
754 "Failed to parse %s; unexpected multibyte sequence 0x%x at pos %u in '%s'",
755 parse_what
[what
], wchar
, (unsigned) (ptr
- begin
), begin
);
757 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
758 "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
759 parse_what
[what
], (unsigned char) *ptr
, (unsigned) (ptr
- begin
), begin
);
766 static ZEND_RESULT_CODE
parse_userinfo(struct parse_state
*state
, const char *ptr
)
769 const char *password
= NULL
, *end
= state
->ptr
, *tmp
= ptr
;
770 TSRMLS_FETCH_FROM_CTX(state
->ts
);
772 state
->url
.user
= &state
->buffer
[state
->offset
];
778 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
779 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
780 "Failed to parse password; duplicate ':' at pos %u in '%s'",
781 (unsigned) (ptr
- tmp
), tmp
);
783 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
786 state
->buffer
[state
->offset
++] = *ptr
;
790 state
->buffer
[state
->offset
++] = 0;
791 state
->url
.pass
= &state
->buffer
[state
->offset
];
795 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
796 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
797 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
798 "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
799 (unsigned) (ptr
- tmp
), tmp
);
801 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
804 state
->buffer
[state
->offset
++] = *ptr
++;
807 state
->buffer
[state
->offset
++] = *ptr
++;
808 state
->buffer
[state
->offset
++] = *ptr
++;
809 state
->buffer
[state
->offset
++] = *ptr
;
813 if ((mb
= parse_mb(state
, PARSE_USERINFO
, ptr
, end
, tmp
, state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
))) {
817 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
821 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
822 case '+': case ',': case ';': case '=': /* sub-delims */
823 case '-': case '.': case '_': case '~': /* unreserved */
824 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
825 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
826 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
827 case 'V': case 'W': case 'X': case 'Y': case 'Z':
828 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
829 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
830 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
831 case 'v': case 'w': case 'x': case 'y': case 'z':
832 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
833 case '7': case '8': case '9':
835 state
->buffer
[state
->offset
++] = *ptr
;
839 } while(++ptr
!= end
);
842 state
->buffer
[state
->offset
++] = 0;
847 #if defined(PHP_WIN32) || defined(HAVE_UIDNA_IDNTOASCII)
848 typedef size_t (*parse_mb_func
)(unsigned *wc
, const char *ptr
, const char *end
);
849 static ZEND_RESULT_CODE
to_utf16(parse_mb_func fn
, const char *u8
, uint16_t **u16
, size_t *len TSRMLS_DC
)
851 size_t offset
= 0, u8_len
= strlen(u8
);
853 *u16
= ecalloc(4 * sizeof(uint16_t), u8_len
+ 1);
856 while (offset
< u8_len
) {
858 uint16_t buf
[2], *ptr
= buf
;
859 size_t consumed
= fn(&wc
, &u8
[offset
], &u8
[u8_len
]);
863 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse UTF-8 at pos %zu of '%s'", offset
, u8
);
869 switch (wctoutf16(buf
, wc
)) {
871 (*u16
)[(*len
)++] = *ptr
++;
874 (*u16
)[(*len
)++] = *ptr
++;
879 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to convert UTF-32 'U+%X' to UTF-16", wc
);
888 #ifndef MAXHOSTNAMELEN
889 # define MAXHOSTNAMELEN 256
892 #if PHP_HTTP_HAVE_IDN2
893 static ZEND_RESULT_CODE
parse_idn2(struct parse_state
*state
, size_t prev_len
)
897 TSRMLS_FETCH_FROM_CTX(state
->ts
);
899 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
900 rv
= idn2_lookup_u8((const unsigned char *) state
->url
.host
, (unsigned char **) &idn
, IDN2_NFC_INPUT
);
902 # ifdef PHP_HTTP_HAVE_WCHAR
903 else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
904 rv
= idn2_lookup_ul(state
->url
.host
, &idn
, 0);
908 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
909 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; %s", idn2_strerror(rv
));
911 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
915 size_t idnlen
= strlen(idn
);
916 memcpy(state
->url
.host
, idn
, idnlen
+ 1);
918 state
->offset
+= idnlen
- prev_len
;
922 #elif PHP_HTTP_HAVE_IDN
923 static ZEND_RESULT_CODE
parse_idn(struct parse_state
*state
, size_t prev_len
)
927 TSRMLS_FETCH_FROM_CTX(state
->ts
);
929 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
930 rv
= idna_to_ascii_8z(state
->url
.host
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
932 # ifdef PHP_HTTP_HAVE_WCHAR
933 else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
934 rv
= idna_to_ascii_lz(state
->url
.host
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
937 if (rv
!= IDNA_SUCCESS
) {
938 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
939 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; %s", idna_strerror(rv
));
941 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
945 size_t idnlen
= strlen(idn
);
946 memcpy(state
->url
.host
, idn
, idnlen
+ 1);
948 state
->offset
+= idnlen
- prev_len
;
954 #ifdef HAVE_UIDNA_IDNTOASCII
955 # if HAVE_UNICODE_UIDNA_H
956 # include <unicode/uidna.h>
958 typedef uint16_t UChar
;
959 typedef enum { U_ZERO_ERROR
= 0 } UErrorCode
;
960 int32_t uidna_IDNToASCII(const UChar
*src
, int32_t srcLength
, UChar
*dest
, int32_t destCapacity
, int32_t options
, void *parseError
, UErrorCode
*status
);
962 static ZEND_RESULT_CODE
parse_uidn(struct parse_state
*state
)
965 uint16_t *uhost_str
, ahost_str
[MAXHOSTNAMELEN
], *ahost_ptr
;
966 size_t uhost_len
, ahost_len
;
967 UErrorCode error
= U_ZERO_ERROR
;
968 TSRMLS_FETCH_FROM_CTX(state
->ts
);
970 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
971 if (SUCCESS
!= to_utf16(parse_mb_utf8
, state
->url
.host
, &uhost_str
, &uhost_len TSRMLS_CC
)) {
974 #ifdef PHP_HTTP_HAVE_WCHAR
975 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
976 if (SUCCESS
!= to_utf16(parse_mb_loc
, state
->url
.host
, &uhost_str
, &uhost_len TSRMLS_CC
)) {
981 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; codepage not specified");
985 ahost_len
= uidna_IDNToASCII(uhost_str
, uhost_len
, ahost_str
, MAXHOSTNAMELEN
, 3, NULL
, &error
);
988 if (error
!= U_ZERO_ERROR
) {
989 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; ICU error %d", error
);
993 host_ptr
= state
->url
.host
;
994 ahost_ptr
= ahost_str
;
995 PHP_HTTP_DUFF(ahost_len
, *host_ptr
++ = *ahost_ptr
++);
998 state
->offset
+= host_ptr
- state
->url
.host
;
1004 #if 0 && defined(PHP_WIN32)
1005 static ZEND_RESULT_CODE
parse_widn(struct parse_state
*state
)
1008 uint16_t *uhost_str
, ahost_str
[MAXHOSTNAMELEN
], *ahost_ptr
;
1010 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1012 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
1013 if (SUCCESS
!= to_utf16(parse_mb_utf8
, state
->url
.host
, &uhost_str
, &uhost_len
)) {
1014 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN");
1017 #ifdef PHP_HTTP_HAVE_WCHAR
1018 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
1019 if (SUCCESS
!= to_utf16(parse_mb_loc
, state
->url
.host
, &uhost_str
, &uhost_len
)) {
1020 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN");
1025 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN");
1029 if (!IdnToAscii(IDN_ALLOW_UNASSIGNED
|IDN_USE_STD3_ASCII_RULES
, uhost_str
, uhost_len
, ahost_str
, MAXHOSTNAMELEN
)) {
1031 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN");
1036 host_ptr
= state
->url
.host
;
1037 ahost_ptr
= ahost_str
;
1038 PHP_HTTP_DUFF(wcslen(ahost_str
), *host_ptr
++ = *ahost_ptr
++);
1042 state
->offset
+= host_ptr
- state
->url
.host
;
1048 #ifdef HAVE_INET_PTON
1049 static const char *parse_ip6(struct parse_state
*state
, const char *ptr
)
1052 const char *error
= NULL
, *end
= state
->ptr
, *tmp
= memchr(ptr
, ']', end
- ptr
);
1053 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1056 size_t addrlen
= tmp
- ptr
+ 1;
1057 char buf
[16], *addr
= estrndup(ptr
+ 1, addrlen
- 2);
1058 int rv
= inet_pton(AF_INET6
, addr
, buf
);
1061 state
->buffer
[state
->offset
] = '[';
1062 state
->url
.host
= &state
->buffer
[state
->offset
];
1063 inet_ntop(AF_INET6
, buf
, state
->url
.host
+ 1, state
->maxlen
- state
->offset
);
1064 state
->offset
+= strlen(state
->url
.host
);
1065 state
->buffer
[state
->offset
++] = ']';
1066 state
->buffer
[state
->offset
++] = 0;
1068 } else if (rv
== -1) {
1070 error
= strerror(errno
);
1072 error
= "unexpected '['";
1076 pos
= tmp
? tmp
- ptr
: end
- ptr
;
1077 error
= "expected ']'";
1081 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1082 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse hostinfo; %s at pos %u in '%s'", error
, pos
, ptr
);
1091 static ZEND_RESULT_CODE
parse_hostinfo(struct parse_state
*state
, const char *ptr
)
1093 size_t mb
, len
= state
->offset
;
1094 const char *end
= state
->ptr
, *tmp
= ptr
, *port
= NULL
, *label
= NULL
;
1095 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1097 #ifdef HAVE_INET_PTON
1098 if (*ptr
== '[' && !(ptr
= parse_ip6(state
, ptr
))) {
1099 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1106 if (ptr
!= end
) do {
1110 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1111 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1112 "Failed to parse port; unexpected ':' at pos %u in '%s'",
1113 (unsigned) (ptr
- tmp
), tmp
);
1115 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1123 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
1124 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1125 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1126 "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
1127 (unsigned) (ptr
- tmp
), tmp
);
1129 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1132 state
->buffer
[state
->offset
++] = *ptr
++;
1135 state
->buffer
[state
->offset
++] = *ptr
++;
1136 state
->buffer
[state
->offset
++] = *ptr
++;
1137 state
->buffer
[state
->offset
++] = *ptr
;
1141 if (port
|| !label
) {
1142 /* sort of a compromise, just ensure we don't end up
1143 * with a dot at the beginning or two consecutive dots
1145 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1146 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1147 "Failed to parse %s; unexpected '%c' at pos %u in '%s'",
1148 port
? "port" : "host",
1149 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
1151 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1156 state
->buffer
[state
->offset
++] = *ptr
;
1162 /* sort of a compromise, just ensure we don't end up
1163 * with a hyphen at the beginning
1165 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1166 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1167 "Failed to parse %s; unexpected '%c' at pos %u in '%s'",
1168 port
? "port" : "host",
1169 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
1171 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1177 case '_': case '~': /* unreserved */
1178 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
1179 case '+': case ',': case ';': case '=': /* sub-delims */
1180 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1181 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1182 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1183 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1184 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1185 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1186 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1187 case 'v': case 'w': case 'x': case 'y': case 'z':
1189 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1190 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1191 "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
1192 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
1194 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1200 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1201 case '7': case '8': case '9':
1204 state
->url
.port
*= 10;
1205 state
->url
.port
+= *ptr
- '0';
1208 state
->buffer
[state
->offset
++] = *ptr
;
1216 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1217 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1218 "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
1219 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
1221 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1225 } else if (!(mb
= parse_mb(state
, PARSE_HOSTINFO
, ptr
, end
, tmp
, state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
))) {
1226 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1234 } while (++ptr
!= end
);
1236 if (!state
->url
.host
) {
1237 len
= state
->offset
- len
;
1238 state
->url
.host
= &state
->buffer
[state
->offset
- len
];
1239 state
->buffer
[state
->offset
++] = 0;
1242 if (state
->flags
& PHP_HTTP_URL_PARSE_TOIDN
) {
1243 #if PHP_HTTP_HAVE_IDN2
1244 return parse_idn2(state
, len
);
1245 #elif PHP_HTTP_HAVE_IDN
1246 return parse_idn(state
, len
);
1248 #ifdef HAVE_UIDNA_IDNTOASCII
1249 return parse_uidn(state
);
1251 #if 0 && defined(PHP_WIN32)
1252 return parse_widn(state
);
1259 static const char *parse_authority(struct parse_state
*state
)
1261 const char *tmp
= state
->ptr
, *host
= NULL
;
1264 switch (*state
->ptr
) {
1266 /* userinfo delimiter */
1268 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1269 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1270 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1271 "Failed to parse userinfo; unexpected '@'");
1273 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1278 host
= state
->ptr
+ 1;
1279 if (tmp
!= state
->ptr
&& SUCCESS
!= parse_userinfo(state
, tmp
)) {
1282 tmp
= state
->ptr
+ 1;
1290 /* host delimiter */
1291 if (tmp
!= state
->ptr
&& SUCCESS
!= parse_hostinfo(state
, tmp
)) {
1296 } while (++state
->ptr
<= state
->end
);
1302 static const char *parse_path(struct parse_state
*state
)
1306 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1308 /* is there actually a path to parse? */
1313 state
->url
.path
= &state
->buffer
[state
->offset
];
1316 switch (*state
->ptr
) {
1322 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
1323 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1324 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1325 "Failed to parse path; invalid percent encoding at pos %u in '%s'",
1326 (unsigned) (state
->ptr
- tmp
), tmp
);
1328 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1331 state
->buffer
[state
->offset
++] = *state
->ptr
;
1334 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1335 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1336 state
->buffer
[state
->offset
++] = *state
->ptr
;
1339 case '/': /* yeah, well */
1340 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
1341 case '+': case ',': case ';': case '=': /* sub-delims */
1342 case '-': case '.': case '_': case '~': /* unreserved */
1343 case ':': case '@': /* pchar */
1344 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1345 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1346 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1347 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1348 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1349 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1350 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1351 case 'v': case 'w': case 'x': case 'y': case 'z':
1352 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1353 case '7': case '8': case '9':
1355 state
->buffer
[state
->offset
++] = *state
->ptr
;
1359 if (!(mb
= parse_mb(state
, PARSE_PATH
, state
->ptr
, state
->end
, tmp
, state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
))) {
1360 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1365 state
->ptr
+= mb
- 1;
1367 } while (++state
->ptr
< state
->end
);
1370 /* did we have any path component ? */
1371 if (tmp
!= state
->ptr
) {
1372 state
->buffer
[state
->offset
++] = 0;
1374 state
->url
.path
= NULL
;
1379 static const char *parse_query(struct parse_state
*state
)
1382 const char *tmp
= state
->ptr
+ !!*state
->ptr
;
1383 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1385 /* is there actually a query to parse? */
1386 if (*state
->ptr
!= '?') {
1390 /* skip initial '?' */
1392 state
->url
.query
= &state
->buffer
[state
->offset
];
1394 while (state
->ptr
< state
->end
) {
1395 switch (*state
->ptr
) {
1400 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
1401 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1402 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1403 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
1404 (unsigned) (state
->ptr
- tmp
), tmp
);
1406 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1409 /* fallthrough, pct-encode the percent sign */
1411 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1412 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1413 state
->buffer
[state
->offset
++] = *state
->ptr
;
1420 case '|': case '\\': case '^': case '`': case '"': case ' ':
1421 /* RFC1738 unsafe */
1422 if (state
->flags
& PHP_HTTP_URL_PARSE_TOPCT
) {
1423 state
->buffer
[state
->offset
++] = '%';
1424 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) *state
->ptr
) >> 4];
1425 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) *state
->ptr
) & 0xf];
1430 case '?': case '/': /* yeah, well */
1431 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
1432 case '+': case ',': case ';': case '=': /* sub-delims */
1433 case '-': case '.': case '_': case '~': /* unreserved */
1434 case ':': case '@': /* pchar */
1435 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1436 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1437 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1438 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1439 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1440 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1441 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1442 case 'v': case 'w': case 'x': case 'y': case 'z':
1443 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1444 case '7': case '8': case '9':
1446 state
->buffer
[state
->offset
++] = *state
->ptr
;
1450 if (!(mb
= parse_mb(state
, PARSE_QUERY
, state
->ptr
, state
->end
, tmp
, state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
))) {
1451 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1456 state
->ptr
+= mb
- 1;
1463 state
->buffer
[state
->offset
++] = 0;
1467 static const char *parse_fragment(struct parse_state
*state
)
1471 TSRMLS_FETCH_FROM_CTX(state
->ts
);
1473 /* is there actually a fragment to parse? */
1474 if (*state
->ptr
!= '#') {
1478 /* skip initial '#' */
1480 state
->url
.fragment
= &state
->buffer
[state
->offset
];
1483 switch (*state
->ptr
) {
1485 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1486 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1487 "Failed to parse fragment; invalid fragment identifier at pos %u in '%s'",
1488 (unsigned) (state
->ptr
- tmp
), tmp
);
1490 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1493 state
->buffer
[state
->offset
++] = *state
->ptr
;
1497 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
1498 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1499 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1500 "Failed to parse fragment; invalid percent encoding at pos %u in '%s'",
1501 (unsigned) (state
->ptr
- tmp
), tmp
);
1503 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1508 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1509 state
->buffer
[state
->offset
++] = *state
->ptr
++;
1510 state
->buffer
[state
->offset
++] = *state
->ptr
;
1518 case '|': case '\\': case '^': case '`': case '"': case ' ':
1519 /* RFC1738 unsafe */
1520 if (state
->flags
& PHP_HTTP_URL_PARSE_TOPCT
) {
1521 state
->buffer
[state
->offset
++] = '%';
1522 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) *state
->ptr
) >> 4];
1523 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) *state
->ptr
) & 0xf];
1529 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
1530 case '+': case ',': case ';': case '=': /* sub-delims */
1531 case '-': case '.': case '_': case '~': /* unreserved */
1532 case ':': case '@': /* pchar */
1533 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1534 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1535 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1536 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1537 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1538 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1539 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1540 case 'v': case 'w': case 'x': case 'y': case 'z':
1541 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1542 case '7': case '8': case '9':
1544 state
->buffer
[state
->offset
++] = *state
->ptr
;
1548 if (!(mb
= parse_mb(state
, PARSE_FRAGMENT
, state
->ptr
, state
->end
, tmp
, state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
))) {
1549 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1554 state
->ptr
+= mb
- 1;
1556 } while (++state
->ptr
< state
->end
);
1558 state
->buffer
[state
->offset
++] = 0;
1562 static const char *parse_hier(struct parse_state
*state
)
1564 if (*state
->ptr
== '/') {
1565 if (state
->end
- state
->ptr
> 1) {
1566 if (*(state
->ptr
+ 1) == '/') {
1568 if (!(state
->ptr
= parse_authority(state
))) {
1574 return parse_path(state
);
1577 static const char *parse_scheme(struct parse_state
*state
)
1580 const char *tmp
= state
->ptr
;
1583 switch (*state
->ptr
) {
1585 /* scheme delimiter */
1586 state
->url
.scheme
= &state
->buffer
[0];
1587 state
->buffer
[state
->offset
++] = 0;
1588 return ++state
->ptr
;
1590 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1591 case '7': case '8': case '9':
1592 case '+': case '-': case '.':
1593 if (state
->ptr
== tmp
) {
1597 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1598 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1599 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1600 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1601 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1602 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1603 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1604 case 'v': case 'w': case 'x': case 'y': case 'z':
1606 state
->buffer
[state
->offset
++] = *state
->ptr
;
1610 if (!(mb
= parse_mb(state
, PARSE_SCHEME
, state
->ptr
, state
->end
, tmp
, 1))) {
1613 state
->ptr
+= mb
- 1;
1615 } while (++state
->ptr
!= state
->end
);
1619 return state
->ptr
= tmp
;
1622 php_http_url_t
*php_http_url_parse(const char *str
, size_t len
, unsigned flags TSRMLS_DC
)
1624 size_t maxlen
= 3 * len
+ 8 /* null bytes for all components */;
1625 struct parse_state
*state
= ecalloc(1, sizeof(*state
) + maxlen
);
1627 state
->end
= str
+ len
;
1629 state
->flags
= flags
;
1630 state
->maxlen
= maxlen
;
1631 TSRMLS_SET_CTX(state
->ts
);
1633 if (!parse_scheme(state
)) {
1634 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL scheme: '%s'", state
->ptr
);
1639 if (!parse_hier(state
)) {
1644 if (!parse_query(state
)) {
1645 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL query: '%s'", state
->ptr
);
1650 if (!parse_fragment(state
)) {
1651 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL fragment: '%s'", state
->ptr
);
1656 return (php_http_url_t
*) state
;
1659 php_http_url_t
*php_http_url_parse_authority(const char *str
, size_t len
, unsigned flags TSRMLS_DC
)
1661 size_t maxlen
= 3 * len
;
1662 struct parse_state
*state
= ecalloc(1, sizeof(*state
) + maxlen
);
1664 state
->end
= str
+ len
;
1666 state
->flags
= flags
;
1667 state
->maxlen
= maxlen
;
1668 TSRMLS_SET_CTX(state
->ts
);
1670 if (!(state
->ptr
= parse_authority(state
))) {
1675 if (state
->ptr
!= state
->end
) {
1676 if (!(state
->flags
& PHP_HTTP_URL_SILENT_ERRORS
)) {
1677 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
1678 "Failed to parse URL authority, unexpected character at pos %u in '%s'",
1679 (unsigned) (state
->ptr
- str
), str
);
1681 if (!(state
->flags
& PHP_HTTP_URL_IGNORE_ERRORS
)) {
1687 return (php_http_url_t
*) state
;
1690 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct
, 0, 0, 0)
1691 ZEND_ARG_INFO(0, old_url
)
1692 ZEND_ARG_INFO(0, new_url
)
1693 ZEND_ARG_INFO(0, flags
)
1694 ZEND_END_ARG_INFO();
1695 PHP_METHOD(HttpUrl
, __construct
)
1697 zval
*new_url
= NULL
, *old_url
= NULL
;
1698 long flags
= PHP_HTTP_URL_FROM_ENV
;
1699 zend_error_handling zeh
;
1701 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "|z!z!l", &old_url
, &new_url
, &flags
), invalid_arg
, return);
1703 if (flags
& PHP_HTTP_URL_SILENT_ERRORS
) {
1704 zend_replace_error_handling(EH_SUPPRESS
, NULL
, &zeh TSRMLS_CC
);
1705 } else if (flags
& PHP_HTTP_URL_IGNORE_ERRORS
) {
1706 zend_replace_error_handling(EH_NORMAL
, NULL
, &zeh TSRMLS_CC
);
1708 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1711 php_http_url_t
*res_purl
, *new_purl
= NULL
, *old_purl
= NULL
;
1714 new_purl
= php_http_url_from_zval(new_url
, flags TSRMLS_CC
);
1716 zend_restore_error_handling(&zeh TSRMLS_CC
);
1721 old_purl
= php_http_url_from_zval(old_url
, flags TSRMLS_CC
);
1724 php_http_url_free(&new_purl
);
1726 zend_restore_error_handling(&zeh TSRMLS_CC
);
1731 res_purl
= php_http_url_mod(old_purl
, new_purl
, flags TSRMLS_CC
);
1732 php_http_url_to_struct(res_purl
, getThis() TSRMLS_CC
);
1734 php_http_url_free(&res_purl
);
1736 php_http_url_free(&old_purl
);
1739 php_http_url_free(&new_purl
);
1742 zend_restore_error_handling(&zeh TSRMLS_CC
);
1745 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod
, 0, 0, 1)
1746 ZEND_ARG_INFO(0, more_url_parts
)
1747 ZEND_ARG_INFO(0, flags
)
1748 ZEND_END_ARG_INFO();
1749 PHP_METHOD(HttpUrl
, mod
)
1751 zval
*new_url
= NULL
;
1752 long flags
= PHP_HTTP_URL_JOIN_PATH
| PHP_HTTP_URL_JOIN_QUERY
| PHP_HTTP_URL_SANITIZE_PATH
;
1753 zend_error_handling zeh
;
1755 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "z!|l", &new_url
, &flags
), invalid_arg
, return);
1757 if (flags
& PHP_HTTP_URL_SILENT_ERRORS
) {
1758 zend_replace_error_handling(EH_SUPPRESS
, NULL
, &zeh TSRMLS_CC
);
1759 } else if (flags
& PHP_HTTP_URL_IGNORE_ERRORS
) {
1760 zend_replace_error_handling(EH_NORMAL
, NULL
, &zeh TSRMLS_CC
);
1762 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1765 php_http_url_t
*new_purl
= NULL
, *old_purl
= NULL
;
1768 new_purl
= php_http_url_from_zval(new_url
, flags TSRMLS_CC
);
1770 zend_restore_error_handling(&zeh TSRMLS_CC
);
1775 if ((old_purl
= php_http_url_from_struct(HASH_OF(getThis())))) {
1776 php_http_url_t
*res_purl
;
1778 ZVAL_OBJVAL(return_value
, zend_objects_clone_obj(getThis() TSRMLS_CC
), 0);
1780 res_purl
= php_http_url_mod(old_purl
, new_purl
, flags TSRMLS_CC
);
1781 php_http_url_to_struct(res_purl
, return_value TSRMLS_CC
);
1783 php_http_url_free(&res_purl
);
1784 php_http_url_free(&old_purl
);
1787 php_http_url_free(&new_purl
);
1790 zend_restore_error_handling(&zeh TSRMLS_CC
);
1793 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString
, 0, 0, 0)
1794 ZEND_END_ARG_INFO();
1795 PHP_METHOD(HttpUrl
, toString
)
1797 if (SUCCESS
== zend_parse_parameters_none()) {
1798 php_http_url_t
*purl
;
1800 if ((purl
= php_http_url_from_struct(HASH_OF(getThis())))) {
1804 php_http_url_to_string(purl
, &str
, &len
, 0);
1805 php_http_url_free(&purl
);
1806 RETURN_STRINGL(str
, len
, 0);
1809 RETURN_EMPTY_STRING();
1812 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray
, 0, 0, 0)
1813 ZEND_END_ARG_INFO();
1814 PHP_METHOD(HttpUrl
, toArray
)
1816 php_http_url_t
*purl
;
1818 if (SUCCESS
!= zend_parse_parameters_none()) {
1822 /* strip any non-URL properties */
1823 purl
= php_http_url_from_struct(HASH_OF(getThis()));
1824 php_http_url_to_struct(purl
, return_value TSRMLS_CC
);
1825 php_http_url_free(&purl
);
1828 static zend_function_entry php_http_url_methods
[] = {
1829 PHP_ME(HttpUrl
, __construct
, ai_HttpUrl___construct
, ZEND_ACC_PUBLIC
|ZEND_ACC_CTOR
)
1830 PHP_ME(HttpUrl
, mod
, ai_HttpUrl_mod
, ZEND_ACC_PUBLIC
)
1831 PHP_ME(HttpUrl
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1832 ZEND_MALIAS(HttpUrl
, __toString
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1833 PHP_ME(HttpUrl
, toArray
, ai_HttpUrl_toArray
, ZEND_ACC_PUBLIC
)
1834 EMPTY_FUNCTION_ENTRY
1837 zend_class_entry
*php_http_url_class_entry
;
1839 PHP_MINIT_FUNCTION(http_url
)
1841 zend_class_entry ce
= {0};
1843 INIT_NS_CLASS_ENTRY(ce
, "http", "Url", php_http_url_methods
);
1844 php_http_url_class_entry
= zend_register_internal_class(&ce TSRMLS_CC
);
1846 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1847 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1848 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("pass"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1849 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("host"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1850 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("port"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1851 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("path"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1852 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("query"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1853 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("fragment"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1855 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("REPLACE"), PHP_HTTP_URL_REPLACE TSRMLS_CC
);
1856 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_PATH"), PHP_HTTP_URL_JOIN_PATH TSRMLS_CC
);
1857 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_QUERY"), PHP_HTTP_URL_JOIN_QUERY TSRMLS_CC
);
1858 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_USER"), PHP_HTTP_URL_STRIP_USER TSRMLS_CC
);
1859 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PASS"), PHP_HTTP_URL_STRIP_PASS TSRMLS_CC
);
1860 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_AUTH"), PHP_HTTP_URL_STRIP_AUTH TSRMLS_CC
);
1861 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PORT"), PHP_HTTP_URL_STRIP_PORT TSRMLS_CC
);
1862 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PATH"), PHP_HTTP_URL_STRIP_PATH TSRMLS_CC
);
1863 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_QUERY"), PHP_HTTP_URL_STRIP_QUERY TSRMLS_CC
);
1864 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_FRAGMENT"), PHP_HTTP_URL_STRIP_FRAGMENT TSRMLS_CC
);
1865 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_ALL"), PHP_HTTP_URL_STRIP_ALL TSRMLS_CC
);
1866 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC
);
1867 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC
);
1869 #ifdef PHP_HTTP_HAVE_WCHAR
1870 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC
);
1872 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC
);
1873 #if defined(PHP_HTTP_HAVE_IDN2) || defined(PHP_HTTP_HAVE_IDN) || defined(HAVE_UIDNA_IDNTOASCII)
1874 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN TSRMLS_CC
);
1876 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT TSRMLS_CC
);
1878 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("IGNORE_ERRORS"), PHP_HTTP_URL_IGNORE_ERRORS TSRMLS_CC
);
1879 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("SILENT_ERRORS"), PHP_HTTP_URL_SILENT_ERRORS TSRMLS_CC
);
1881 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STDFLAGS"), PHP_HTTP_URL_STDFLAGS TSRMLS_CC
);
1892 * vim600: noet sw=4 ts=4 fdm=marker
1893 * vim<600: noet sw=4 ts=4