2 +--------------------------------------------------------------------+
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
13 #include "php_http_api.h"
15 #ifdef PHP_HTTP_HAVE_IDN
19 #ifdef PHP_HTTP_HAVE_WCHAR
24 #ifdef HAVE_ARPA_INET_H
25 # include <arpa/inet.h>
28 #include "php_http_utf8.h"
30 static inline char *localhostname(void)
32 char hostname
[1024] = {0};
35 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
36 return estrdup(hostname
);
38 #elif defined(HAVE_GETHOSTNAME)
39 if (SUCCESS
== gethostname(hostname
, lenof(hostname
))) {
40 # if defined(HAVE_GETDOMAINNAME)
41 size_t hlen
= strlen(hostname
);
42 if (hlen
<= lenof(hostname
) - lenof("(none)")) {
43 hostname
[hlen
++] = '.';
44 if (SUCCESS
== getdomainname(&hostname
[hlen
], lenof(hostname
) - hlen
)) {
45 if (!strcmp(&hostname
[hlen
], "(none)")) {
46 hostname
[hlen
- 1] = '\0';
48 return estrdup(hostname
);
52 if (strcmp(hostname
, "(none)")) {
53 return estrdup(hostname
);
57 return estrndup("localhost", lenof("localhost"));
60 static php_url
*php_http_url_from_env(php_url
*url TSRMLS_DC
)
62 zval
*https
, *zhost
, *zport
;
66 url
= ecalloc(1, sizeof(*url
));
70 zport
= php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC
);
71 if (zport
&& IS_LONG
== is_numeric_string(Z_STRVAL_P(zport
), Z_STRLEN_P(zport
), &port
, NULL
, 0)) {
76 https
= php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC
);
77 if (https
&& !strcasecmp(Z_STRVAL_P(https
), "ON")) {
78 url
->scheme
= estrndup("https", lenof("https"));
80 url
->scheme
= estrndup("http", lenof("http"));
84 if ((((zhost
= php_http_env_get_server_var(ZEND_STRL("HTTP_HOST"), 1 TSRMLS_CC
)) ||
85 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_NAME"), 1 TSRMLS_CC
)) ||
86 (zhost
= php_http_env_get_server_var(ZEND_STRL("SERVER_ADDR"), 1 TSRMLS_CC
)))) && Z_STRLEN_P(zhost
)) {
87 size_t stop_at
= strspn(Z_STRVAL_P(zhost
), "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.");
89 url
->host
= estrndup(Z_STRVAL_P(zhost
), stop_at
);
91 url
->host
= localhostname();
95 if (SG(request_info
).request_uri
&& SG(request_info
).request_uri
[0]) {
96 const char *q
= strchr(SG(request_info
).request_uri
, '?');
99 url
->path
= estrndup(SG(request_info
).request_uri
, q
- SG(request_info
).request_uri
);
101 url
->path
= estrdup(SG(request_info
).request_uri
);
106 if (SG(request_info
).query_string
&& SG(request_info
).query_string
[0]) {
107 url
->query
= estrdup(SG(request_info
).query_string
);
113 void php_http_url(int flags
, const php_url
*old_url
, const php_url
*new_url
, php_url
**url_ptr
, char **url_str
, size_t *url_len TSRMLS_DC
)
115 php_url
*url
, *tmp_url
= NULL
;
117 /* set from env if requested */
118 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
119 php_url
*env_url
= php_http_url_from_env(NULL TSRMLS_CC
);
121 php_http_url(flags
^ PHP_HTTP_URL_FROM_ENV
, env_url
, old_url
, &tmp_url
, NULL
, NULL TSRMLS_CC
);
123 php_url_free(env_url
);
127 url
= ecalloc(1, sizeof(*url
));
129 #define __URLSET(u,n) \
131 #define __URLCPY(n) \
132 url->n = __URLSET(new_url,n) ? estrdup(new_url->n) : (__URLSET(old_url,n) ? estrdup(old_url->n) : NULL)
134 if (!(flags
& PHP_HTTP_URL_STRIP_PORT
)) {
135 url
->port
= __URLSET(new_url
, port
) ? new_url
->port
: ((old_url
) ? old_url
->port
: 0);
137 if (!(flags
& PHP_HTTP_URL_STRIP_USER
)) {
140 if (!(flags
& PHP_HTTP_URL_STRIP_PASS
)) {
147 if (!(flags
& PHP_HTTP_URL_STRIP_PATH
)) {
148 if ((flags
& PHP_HTTP_URL_JOIN_PATH
) && __URLSET(old_url
, path
) && __URLSET(new_url
, path
) && *new_url
->path
!= '/') {
149 size_t old_path_len
= strlen(old_url
->path
), new_path_len
= strlen(new_url
->path
);
151 url
->path
= ecalloc(1, old_path_len
+ new_path_len
+ 1 + 1);
153 strcat(url
->path
, old_url
->path
);
154 if (url
->path
[old_path_len
- 1] != '/') {
155 php_dirname(url
->path
, old_path_len
);
156 strcat(url
->path
, "/");
158 strcat(url
->path
, new_url
->path
);
163 if (!(flags
& PHP_HTTP_URL_STRIP_QUERY
)) {
164 if ((flags
& PHP_HTTP_URL_JOIN_QUERY
) && __URLSET(new_url
, query
) && __URLSET(old_url
, query
)) {
171 ZVAL_STRING(&qstr
, old_url
->query
, 0);
172 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
173 ZVAL_STRING(&qstr
, new_url
->query
, 0);
174 php_http_querystring_update(&qarr
, &qstr
, NULL TSRMLS_CC
);
177 php_http_querystring_update(&qarr
, NULL
, &qstr TSRMLS_CC
);
178 url
->query
= Z_STRVAL(qstr
);
184 if (!(flags
& PHP_HTTP_URL_STRIP_FRAGMENT
)) {
188 /* done with copy & combine & strip */
190 if (flags
& PHP_HTTP_URL_FROM_ENV
) {
191 /* free old_url we tainted above */
192 php_url_free(tmp_url
);
195 /* set some sane defaults */
198 url
->scheme
= estrndup("http", lenof("http"));
202 url
->host
= estrndup("localhost", lenof("localhost"));
206 url
->path
= estrndup("/", 1);
207 } else if (url
->path
[0] != '/') {
208 size_t plen
= strlen(url
->path
);
209 char *path
= emalloc(plen
+ 1 + 1);
212 memcpy(&path
[1], url
->path
, plen
+ 1);
213 STR_SET(url
->path
, path
);
215 /* replace directory references if path is not a single slash */
216 if ((flags
& PHP_HTTP_URL_SANITIZE_PATH
)
217 && url
->path
[0] && (url
->path
[0] != '/' || url
->path
[1])) {
218 char *ptr
, *end
= url
->path
+ strlen(url
->path
) + 1;
220 for (ptr
= strchr(url
->path
, '/'); ptr
; ptr
= strchr(ptr
, '/')) {
223 memmove(&ptr
[1], &ptr
[2], end
- &ptr
[2]);
233 memmove(&ptr
[1], &ptr
[3], end
- &ptr
[3]);
239 while (ptr
!= url
->path
) {
244 memmove(&ptr
[1], pos
, end
- pos
);
246 } else if (!ptr
[3]) {
265 /* unset default ports */
267 if ( ((url
->port
== 80) && !strcmp(url
->scheme
, "http"))
268 || ((url
->port
==443) && !strcmp(url
->scheme
, "https"))
275 php_http_url_to_string(url
, url_str
, url_len TSRMLS_CC
);
285 STATUS
php_http_url_encode_hash(HashTable
*hash
, const char *pre_encoded_str
, size_t pre_encoded_len
, char **encoded_str
, size_t *encoded_len TSRMLS_DC
)
287 const char *arg_sep_str
;
289 php_http_buffer_t
*qstr
= php_http_buffer_new();
291 php_http_url_argsep(&arg_sep_str
, &arg_sep_len TSRMLS_CC
);
293 if (SUCCESS
!= php_http_url_encode_hash_ex(hash
, qstr
, arg_sep_str
, arg_sep_len
, "=", 1, pre_encoded_str
, pre_encoded_len TSRMLS_CC
)) {
294 php_http_buffer_free(&qstr
);
298 php_http_buffer_data(qstr
, encoded_str
, encoded_len
);
299 php_http_buffer_free(&qstr
);
304 STATUS
php_http_url_encode_hash_ex(HashTable
*hash
, php_http_buffer_t
*qstr
, const char *arg_sep_str
, size_t arg_sep_len
, const char *val_sep_str
, size_t val_sep_len
, const char *pre_encoded_str
, size_t pre_encoded_len TSRMLS_DC
)
306 if (pre_encoded_len
&& pre_encoded_str
) {
307 php_http_buffer_append(qstr
, pre_encoded_str
, pre_encoded_len
);
310 if (!php_http_params_to_string(qstr
, hash
, arg_sep_str
, arg_sep_len
, "", 0, val_sep_str
, val_sep_len
, PHP_HTTP_PARAMS_QUERY TSRMLS_CC
)) {
327 char buffer
[1]; /* last member */
330 void php_http_url_free(php_http_url_t
**url
)
338 static size_t parse_mb_utf8(unsigned *wc
, const char *ptr
, const char *end
)
341 size_t consumed
= utf8towc(&wchar
, (const unsigned char *) ptr
, end
- ptr
);
343 if (!consumed
|| consumed
== (size_t) -1) {
353 #ifdef PHP_HTTP_HAVE_WCHAR
354 static size_t parse_mb_loc(unsigned *wc
, const char *ptr
, const char *end
)
358 #if defined(HAVE_MBRTOWC)
361 consumed
= mbrtowc(&wchar
, ptr
, end
- ptr
, &ps
);
362 #elif defined(HAVE_MBTOWC)
363 consumed
= mbtowc(&wchar
, ptr
, end
- ptr
);
366 if (!consumed
|| consumed
== (size_t) -1) {
377 typedef enum parse_mb_what
{
386 static const char * const parse_what
[] = {
395 static const char parse_xdigits
[] = "0123456789ABCDEF";
397 static size_t parse_mb(struct parse_state
*state
, parse_mb_what_t what
, const char *ptr
, const char *end
, const char *begin
, zend_bool silent
)
402 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
403 consumed
= parse_mb_utf8(&wchar
, ptr
, end
);
405 #ifdef PHP_HTTP_HAVE_WCHAR
406 else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
407 consumed
= parse_mb_loc(&wchar
, ptr
, end
);
412 if (!(state
->flags
& PHP_HTTP_URL_PARSE_TOPCT
) || what
== PARSE_HOSTINFO
|| what
== PARSE_SCHEME
) {
413 if (what
== PARSE_HOSTINFO
&& (state
->flags
& PHP_HTTP_URL_PARSE_TOIDN
)) {
415 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
416 if (!isualnum(wchar
)) {
419 #ifdef PHP_HTTP_HAVE_WCHAR
420 } else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
421 if (!iswalnum(wchar
)) {
426 PHP_HTTP_DUFF(consumed
, state
->buffer
[state
->offset
++] = *ptr
++);
430 PHP_HTTP_DUFF(consumed
,
431 state
->buffer
[state
->offset
++] = '%';
432 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) ptr
[i
]) >> 4];
433 state
->buffer
[state
->offset
++] = parse_xdigits
[((unsigned char) ptr
[i
]) & 0xf];
442 TSRMLS_FETCH_FROM_CTX(state
->ts
);
443 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
444 "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
445 parse_what
[what
], (unsigned char) *ptr
, (unsigned) (ptr
- begin
), begin
);
451 static STATUS
parse_userinfo(struct parse_state
*state
, const char *ptr
)
454 const char *password
= NULL
, *end
= state
->ptr
, *tmp
= ptr
;
455 TSRMLS_FETCH_FROM_CTX(state
->ts
);
457 state
->url
.user
= &state
->buffer
[state
->offset
];
463 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
464 "Failed to parse password; duplicate ':' at pos %u in '%s'",
465 (unsigned) (ptr
- tmp
), tmp
);
469 state
->buffer
[state
->offset
++] = 0;
470 state
->url
.pass
= &state
->buffer
[state
->offset
];
474 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
475 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
476 "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
477 (unsigned) (ptr
- tmp
), tmp
);
480 state
->buffer
[state
->offset
++] = *ptr
++;
481 state
->buffer
[state
->offset
++] = *ptr
++;
482 state
->buffer
[state
->offset
++] = *ptr
;
485 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
486 case '+': case ',': case ';': case '=': /* sub-delims */
487 case '-': case '.': case '_': case '~': /* unreserved */
488 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
489 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
490 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
491 case 'V': case 'W': case 'X': case 'Y': case 'Z':
492 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
493 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
494 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
495 case 'v': case 'w': case 'x': case 'y': case 'z':
496 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
497 case '7': case '8': case '9':
499 state
->buffer
[state
->offset
++] = *ptr
;
503 if (!(mb
= parse_mb(state
, PARSE_USERINFO
, ptr
, end
, tmp
, 0))) {
508 } while(++ptr
!= end
);
511 state
->buffer
[state
->offset
++] = 0;
516 static STATUS
parse_hostinfo(struct parse_state
*state
, const char *ptr
)
519 const char *end
= state
->ptr
, *tmp
= ptr
, *port
= NULL
;
520 TSRMLS_FETCH_FROM_CTX(state
->ts
);
523 #ifdef HAVE_INET_PTON
525 char *error
= NULL
, *tmp
= memchr(ptr
, ']', end
- ptr
);
528 size_t addrlen
= tmp
- ptr
+ 1;
529 char buf
[16], *addr
= estrndup(ptr
+ 1, addrlen
- 2);
530 int rv
= inet_pton(AF_INET6
, addr
, buf
);
534 state
->buffer
[state
->offset
] = '[';
535 state
->url
.host
= &state
->buffer
[state
->offset
];
536 inet_ntop(AF_INET6
, buf
, state
->url
.host
+ 1, state
->maxlen
- state
->offset
);
537 state
->offset
+= strlen(state
->url
.host
);
538 state
->buffer
[state
->offset
++] = ']';
539 state
->buffer
[state
->offset
++] = 0;
541 } else if (rv
== -1) {
542 error
= strerror(errno
);
544 error
= "unexpected '['";
547 error
= "expected ']'";
551 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse hostinfo; %s", error
);
560 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
561 "Failed to parse port; unexpected ':' at pos %u in '%s'",
562 (unsigned) (ptr
- tmp
), tmp
);
569 if (ptr
[1] != '%' && (end
- ptr
<= 2 || !isxdigit(*(ptr
+1)) || !isxdigit(*(ptr
+2)))) {
570 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
571 "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
572 (unsigned) (ptr
- tmp
), tmp
);
575 state
->buffer
[state
->offset
++] = *ptr
++;
576 state
->buffer
[state
->offset
++] = *ptr
++;
577 state
->buffer
[state
->offset
++] = *ptr
;
580 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
581 case '+': case ',': case ';': case '=': /* sub-delims */
582 case '-': case '.': case '_': case '~': /* unreserved */
583 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
584 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
585 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
586 case 'V': case 'W': case 'X': case 'Y': case 'Z':
587 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
588 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
589 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
590 case 'v': case 'w': case 'x': case 'y': case 'z':
592 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
593 "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
594 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
598 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
599 case '7': case '8': case '9':
602 state
->url
.port
*= 10;
603 state
->url
.port
+= *ptr
- '0';
605 state
->buffer
[state
->offset
++] = *ptr
;
611 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
612 "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
613 (unsigned char) *ptr
, (unsigned) (ptr
- tmp
), tmp
);
615 } else if (!(mb
= parse_mb(state
, PARSE_HOSTINFO
, ptr
, end
, tmp
, 0))) {
620 } while (++ptr
!= end
);
622 if (!state
->url
.host
) {
623 len
= (port
? port
- tmp
- 1 : end
- tmp
);
624 state
->url
.host
= &state
->buffer
[state
->offset
- len
];
625 state
->buffer
[state
->offset
++] = 0;
628 #ifdef PHP_HTTP_HAVE_IDN
629 if (state
->flags
& PHP_HTTP_URL_PARSE_TOIDN
) {
633 if (state
->flags
& PHP_HTTP_URL_PARSE_MBUTF8
) {
634 rv
= idna_to_ascii_8z(state
->url
.host
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
636 # ifdef PHP_HTTP_HAVE_WCHAR
637 else if (state
->flags
& PHP_HTTP_URL_PARSE_MBLOC
) {
638 rv
= idna_to_ascii_lz(state
->url
.host
, &idn
, IDNA_ALLOW_UNASSIGNED
|IDNA_USE_STD3_ASCII_RULES
);
641 if (rv
!= IDNA_SUCCESS
) {
642 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse IDN; %s", idna_strerror(rv
));
645 size_t idnlen
= strlen(idn
);
646 memcpy(state
->url
.host
, idn
, idnlen
+ 1);
648 state
->offset
+= idnlen
- len
;
656 static const char *parse_authority(struct parse_state
*state
)
658 const char *tmp
= state
->ptr
, *host
= NULL
;
661 switch (*state
->ptr
) {
663 /* userinfo delimiter */
665 TSRMLS_FETCH_FROM_CTX(state
->ts
);
666 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
667 "Failed to parse userinfo; unexpected '@'");
670 host
= state
->ptr
+ 1;
671 if (tmp
!= state
->ptr
&& SUCCESS
!= parse_userinfo(state
, tmp
)) {
674 tmp
= state
->ptr
+ 1;
682 if (tmp
!= state
->ptr
&& SUCCESS
!= parse_hostinfo(state
, tmp
)) {
687 } while (++state
->ptr
<= state
->end
);
692 static const char *parse_path(struct parse_state
*state
)
696 TSRMLS_FETCH_FROM_CTX(state
->ts
);
698 /* is there actually a path to parse? */
703 state
->url
.path
= &state
->buffer
[state
->offset
];
706 switch (*state
->ptr
) {
710 /* did we have any path component ? */
711 if (tmp
!= state
->ptr
) {
712 state
->buffer
[state
->offset
++] = 0;
714 state
->url
.path
= NULL
;
719 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
720 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
721 "Failed to parse path; invalid percent encoding at pos %u in '%s'",
722 (unsigned) (state
->ptr
- tmp
), tmp
);
725 state
->buffer
[state
->offset
++] = *state
->ptr
++;
726 state
->buffer
[state
->offset
++] = *state
->ptr
++;
727 state
->buffer
[state
->offset
++] = *state
->ptr
;
730 case '/': /* yeah, well */
731 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
732 case '+': case ',': case ';': case '=': /* sub-delims */
733 case '-': case '.': case '_': case '~': /* unreserved */
734 case ':': case '@': /* pchar */
735 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
736 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
737 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
738 case 'V': case 'W': case 'X': case 'Y': case 'Z':
739 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
740 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
741 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
742 case 'v': case 'w': case 'x': case 'y': case 'z':
743 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
744 case '7': case '8': case '9':
746 state
->buffer
[state
->offset
++] = *state
->ptr
;
750 if (!(mb
= parse_mb(state
, PARSE_PATH
, state
->ptr
, state
->end
, tmp
, 0))) {
753 state
->ptr
+= mb
- 1;
755 } while (++state
->ptr
<= state
->end
);
760 static const char *parse_query(struct parse_state
*state
)
763 const char *tmp
= state
->ptr
+ !!*state
->ptr
;
764 TSRMLS_FETCH_FROM_CTX(state
->ts
);
766 /* is there actually a query to parse? */
767 if (*state
->ptr
!= '?') {
771 /* skip initial '?' */
773 state
->url
.query
= &state
->buffer
[state
->offset
];
776 switch (*state
->ptr
) {
779 state
->buffer
[state
->offset
++] = 0;
783 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
784 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
785 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
786 (unsigned) (state
->ptr
- tmp
), tmp
);
789 state
->buffer
[state
->offset
++] = *state
->ptr
++;
790 state
->buffer
[state
->offset
++] = *state
->ptr
++;
791 state
->buffer
[state
->offset
++] = *state
->ptr
;
794 case '?': case '/': /* yeah, well */
795 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
796 case '+': case ',': case ';': case '=': /* sub-delims */
797 case '-': case '.': case '_': case '~': /* unreserved */
798 case ':': case '@': /* pchar */
799 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
800 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
801 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
802 case 'V': case 'W': case 'X': case 'Y': case 'Z':
803 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
804 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
805 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
806 case 'v': case 'w': case 'x': case 'y': case 'z':
807 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
808 case '7': case '8': case '9':
810 state
->buffer
[state
->offset
++] = *state
->ptr
;
814 if (!(mb
= parse_mb(state
, PARSE_QUERY
, state
->ptr
, state
->end
, tmp
, 0))) {
817 state
->ptr
+= mb
- 1;
819 } while (++state
->ptr
<= state
->end
);
824 static const char *parse_fragment(struct parse_state
*state
)
828 TSRMLS_FETCH_FROM_CTX(state
->ts
);
830 /* is there actually a fragment to parse? */
831 if (*state
->ptr
!= '#') {
835 /* skip initial '#' */
837 state
->url
.fragment
= &state
->buffer
[state
->offset
];
840 switch (*state
->ptr
) {
842 state
->buffer
[state
->offset
++] = 0;
846 if (state
->ptr
[1] != '%' && (state
->end
- state
->ptr
<= 2 || !isxdigit(*(state
->ptr
+1)) || !isxdigit(*(state
->ptr
+2)))) {
847 php_error_docref(NULL TSRMLS_CC
, E_WARNING
,
848 "Failed to parse fragment; invalid percent encoding at pos %u in '%s'",
849 (unsigned) (state
->ptr
- tmp
), tmp
);
852 state
->buffer
[state
->offset
++] = *state
->ptr
++;
853 state
->buffer
[state
->offset
++] = *state
->ptr
++;
854 state
->buffer
[state
->offset
++] = *state
->ptr
;
858 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
859 case '+': case ',': case ';': case '=': /* sub-delims */
860 case '-': case '.': case '_': case '~': /* unreserved */
861 case ':': case '@': /* pchar */
862 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
863 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
864 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
865 case 'V': case 'W': case 'X': case 'Y': case 'Z':
866 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
867 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
868 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
869 case 'v': case 'w': case 'x': case 'y': case 'z':
870 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
871 case '7': case '8': case '9':
873 state
->buffer
[state
->offset
++] = *state
->ptr
;
877 if (!(mb
= parse_mb(state
, PARSE_FRAGMENT
, state
->ptr
, state
->end
, tmp
, 0))) {
880 state
->ptr
+= mb
- 1;
882 } while (++state
->ptr
<= state
->end
);
887 static const char *parse_hier(struct parse_state
*state
)
889 if (*state
->ptr
== '/') {
890 if (state
->end
- state
->ptr
> 1) {
891 if (*(state
->ptr
+ 1) == '/') {
893 if (!(state
->ptr
= parse_authority(state
))) {
899 return parse_path(state
);
902 static const char *parse_scheme(struct parse_state
*state
)
905 const char *tmp
= state
->ptr
;
908 switch (*state
->ptr
) {
910 /* scheme delimiter */
911 state
->url
.scheme
= &state
->buffer
[0];
912 state
->buffer
[state
->offset
++] = 0;
915 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
916 case '7': case '8': case '9':
917 case '+': case '-': case '.':
918 if (state
->ptr
== tmp
) {
922 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
923 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
924 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
925 case 'V': case 'W': case 'X': case 'Y': case 'Z':
926 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
927 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
928 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
929 case 'v': case 'w': case 'x': case 'y': case 'z':
931 state
->buffer
[state
->offset
++] = *state
->ptr
;
935 if (!(mb
= parse_mb(state
, PARSE_SCHEME
, state
->ptr
, state
->end
, tmp
, 1))) {
936 /* soft fail; parse path next */
939 state
->ptr
+= mb
- 1;
941 } while (++state
->ptr
!= state
->end
);
946 php_http_url_t
*php_http_url_parse(const char *str
, size_t len
, unsigned flags TSRMLS_DC
)
948 size_t maxlen
= 3 * len
;
949 struct parse_state
*state
= ecalloc(1, sizeof(*state
) + maxlen
);
951 state
->end
= str
+ len
;
953 state
->flags
= flags
;
954 state
->maxlen
= maxlen
;
955 TSRMLS_SET_CTX(state
->ts
);
957 if (!parse_scheme(state
)) {
958 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL scheme: '%s'", state
->ptr
);
963 if (!parse_hier(state
)) {
968 if (!parse_query(state
)) {
969 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL query: '%s'", state
->ptr
);
974 if (!parse_fragment(state
)) {
975 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "Failed to parse URL fragment: '%s'", state
->ptr
);
980 return (php_http_url_t
*) state
;
983 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct
, 0, 0, 0)
984 ZEND_ARG_INFO(0, old_url
)
985 ZEND_ARG_INFO(0, new_url
)
986 ZEND_ARG_INFO(0, flags
)
988 PHP_METHOD(HttpUrl
, __construct
)
990 zval
*new_url
= NULL
, *old_url
= NULL
;
991 long flags
= PHP_HTTP_URL_FROM_ENV
;
992 zend_error_handling zeh
;
994 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "|z!z!l", &old_url
, &new_url
, &flags
), invalid_arg
, return);
996 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
998 php_url
*res_purl
, *new_purl
= NULL
, *old_purl
= NULL
;
1001 switch (Z_TYPE_P(new_url
)) {
1004 new_purl
= php_http_url_from_struct(NULL
, HASH_OF(new_url
) TSRMLS_CC
);
1007 zval
*cpy
= php_http_ztyp(IS_STRING
, new_url
);
1009 new_purl
= php_url_parse(Z_STRVAL_P(cpy
));
1010 zval_ptr_dtor(&cpy
);
1015 zend_restore_error_handling(&zeh TSRMLS_CC
);
1020 switch (Z_TYPE_P(old_url
)) {
1023 old_purl
= php_http_url_from_struct(NULL
, HASH_OF(old_url
) TSRMLS_CC
);
1026 zval
*cpy
= php_http_ztyp(IS_STRING
, old_url
);
1028 old_purl
= php_url_parse(Z_STRVAL_P(cpy
));
1029 zval_ptr_dtor(&cpy
);
1035 php_url_free(new_purl
);
1037 zend_restore_error_handling(&zeh TSRMLS_CC
);
1042 php_http_url(flags
, old_purl
, new_purl
, &res_purl
, NULL
, NULL TSRMLS_CC
);
1043 php_http_url_to_struct(res_purl
, getThis() TSRMLS_CC
);
1045 php_url_free(res_purl
);
1047 php_url_free(old_purl
);
1050 php_url_free(new_purl
);
1053 zend_restore_error_handling(&zeh TSRMLS_CC
);
1056 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod
, 0, 0, 1)
1057 ZEND_ARG_INFO(0, more_url_parts
)
1058 ZEND_ARG_INFO(0, flags
)
1059 ZEND_END_ARG_INFO();
1060 PHP_METHOD(HttpUrl
, mod
)
1062 zval
*new_url
= NULL
;
1063 long flags
= PHP_HTTP_URL_JOIN_PATH
| PHP_HTTP_URL_JOIN_QUERY
;
1064 zend_error_handling zeh
;
1066 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "z!|l", &new_url
, &flags
), invalid_arg
, return);
1068 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1070 php_url
*new_purl
= NULL
, *old_purl
= NULL
;
1073 switch (Z_TYPE_P(new_url
)) {
1076 new_purl
= php_http_url_from_struct(NULL
, HASH_OF(new_url
) TSRMLS_CC
);
1079 zval
*cpy
= php_http_ztyp(IS_STRING
, new_url
);
1081 new_purl
= php_url_parse(Z_STRVAL_P(new_url
));
1082 zval_ptr_dtor(&cpy
);
1087 zend_restore_error_handling(&zeh TSRMLS_CC
);
1092 if ((old_purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
))) {
1095 ZVAL_OBJVAL(return_value
, zend_objects_clone_obj(getThis() TSRMLS_CC
), 0);
1097 php_http_url(flags
, old_purl
, new_purl
, &res_purl
, NULL
, NULL TSRMLS_CC
);
1098 php_http_url_to_struct(res_purl
, return_value TSRMLS_CC
);
1100 php_url_free(res_purl
);
1101 php_url_free(old_purl
);
1104 php_url_free(new_purl
);
1107 zend_restore_error_handling(&zeh TSRMLS_CC
);
1110 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString
, 0, 0, 0)
1111 ZEND_END_ARG_INFO();
1112 PHP_METHOD(HttpUrl
, toString
)
1114 if (SUCCESS
== zend_parse_parameters_none()) {
1117 if ((purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
))) {
1121 php_http_url(0, purl
, NULL
, NULL
, &str
, &len TSRMLS_CC
);
1123 RETURN_STRINGL(str
, len
, 0);
1126 RETURN_EMPTY_STRING();
1129 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray
, 0, 0, 0)
1130 ZEND_END_ARG_INFO();
1131 PHP_METHOD(HttpUrl
, toArray
)
1135 if (SUCCESS
!= zend_parse_parameters_none()) {
1139 /* strip any non-URL properties */
1140 purl
= php_http_url_from_struct(NULL
, HASH_OF(getThis()) TSRMLS_CC
);
1141 php_http_url_to_struct(purl
, return_value TSRMLS_CC
);
1145 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse
, 0, 0, 1)
1146 ZEND_ARG_INFO(0, url
)
1147 ZEND_ARG_INFO(0, flags
)
1148 ZEND_END_ARG_INFO();
1149 PHP_METHOD(HttpUrl
, parse
)
1154 php_http_url_t
*url
;
1155 zend_error_handling zeh
;
1157 php_http_expect(SUCCESS
== zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC
, "s|l", &str
, &len
, &flags
), invalid_arg
, return);
1159 zend_replace_error_handling(EH_THROW
, php_http_exception_bad_url_class_entry
, &zeh TSRMLS_CC
);
1160 if ((url
= php_http_url_parse(str
, len
, flags TSRMLS_CC
))) {
1161 object_init_ex(return_value
, php_http_url_class_entry
);
1163 zend_update_property_string(php_http_url_class_entry
, return_value
,
1164 ZEND_STRL("scheme"), url
->scheme TSRMLS_CC
);
1167 zend_update_property_string(php_http_url_class_entry
, return_value
,
1168 ZEND_STRL("user"), url
->user TSRMLS_CC
);
1171 zend_update_property_string(php_http_url_class_entry
, return_value
,
1172 ZEND_STRL("pass"), url
->pass TSRMLS_CC
);
1175 zend_update_property_string(php_http_url_class_entry
, return_value
,
1176 ZEND_STRL("host"), url
->host TSRMLS_CC
);
1179 zend_update_property_long(php_http_url_class_entry
, return_value
,
1180 ZEND_STRL("port"), url
->port TSRMLS_CC
);
1183 zend_update_property_string(php_http_url_class_entry
, return_value
,
1184 ZEND_STRL("path"), url
->path TSRMLS_CC
);
1187 zend_update_property_string(php_http_url_class_entry
, return_value
,
1188 ZEND_STRL("query"), url
->query TSRMLS_CC
);
1190 if (url
->fragment
) {
1191 zend_update_property_string(php_http_url_class_entry
, return_value
,
1192 ZEND_STRL("fragment"), url
->fragment TSRMLS_CC
);
1194 php_http_url_free(&url
);
1196 zend_restore_error_handling(&zeh TSRMLS_CC
);
1199 static zend_function_entry php_http_url_methods
[] = {
1200 PHP_ME(HttpUrl
, __construct
, ai_HttpUrl___construct
, ZEND_ACC_PUBLIC
|ZEND_ACC_CTOR
)
1201 PHP_ME(HttpUrl
, mod
, ai_HttpUrl_mod
, ZEND_ACC_PUBLIC
)
1202 PHP_ME(HttpUrl
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1203 ZEND_MALIAS(HttpUrl
, __toString
, toString
, ai_HttpUrl_toString
, ZEND_ACC_PUBLIC
)
1204 PHP_ME(HttpUrl
, toArray
, ai_HttpUrl_toArray
, ZEND_ACC_PUBLIC
)
1205 PHP_ME(HttpUrl
, parse
, ai_HttpUrl_parse
, ZEND_ACC_PUBLIC
|ZEND_ACC_STATIC
)
1206 EMPTY_FUNCTION_ENTRY
1209 zend_class_entry
*php_http_url_class_entry
;
1211 PHP_MINIT_FUNCTION(http_url
)
1213 zend_class_entry ce
= {0};
1215 INIT_NS_CLASS_ENTRY(ce
, "http", "Url", php_http_url_methods
);
1216 php_http_url_class_entry
= zend_register_internal_class(&ce TSRMLS_CC
);
1218 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1219 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1220 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("pass"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1221 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("host"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1222 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("port"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1223 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("path"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1224 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("query"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1225 zend_declare_property_null(php_http_url_class_entry
, ZEND_STRL("fragment"), ZEND_ACC_PUBLIC TSRMLS_CC
);
1227 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("REPLACE"), PHP_HTTP_URL_REPLACE TSRMLS_CC
);
1228 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_PATH"), PHP_HTTP_URL_JOIN_PATH TSRMLS_CC
);
1229 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("JOIN_QUERY"), PHP_HTTP_URL_JOIN_QUERY TSRMLS_CC
);
1230 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_USER"), PHP_HTTP_URL_STRIP_USER TSRMLS_CC
);
1231 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PASS"), PHP_HTTP_URL_STRIP_PASS TSRMLS_CC
);
1232 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_AUTH"), PHP_HTTP_URL_STRIP_AUTH TSRMLS_CC
);
1233 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PORT"), PHP_HTTP_URL_STRIP_PORT TSRMLS_CC
);
1234 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_PATH"), PHP_HTTP_URL_STRIP_PATH TSRMLS_CC
);
1235 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_QUERY"), PHP_HTTP_URL_STRIP_QUERY TSRMLS_CC
);
1236 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_FRAGMENT"), PHP_HTTP_URL_STRIP_FRAGMENT TSRMLS_CC
);
1237 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("STRIP_ALL"), PHP_HTTP_URL_STRIP_ALL TSRMLS_CC
);
1238 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC
);
1239 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC
);
1241 #ifdef PHP_HTTP_HAVE_WCHAR
1242 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC
);
1244 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC
);
1245 #ifdef PHP_HTTP_HAVE_IDN
1246 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_TOIDN"), PHP_HTTP_URL_PARSE_TOIDN TSRMLS_CC
);
1248 zend_declare_class_constant_long(php_http_url_class_entry
, ZEND_STRL("PARSE_TOPCT"), PHP_HTTP_URL_PARSE_TOPCT TSRMLS_CC
);
1259 * vim600: noet sw=4 ts=4 fdm=marker
1260 * vim<600: noet sw=4 ts=4