5ace03d3432dc681c0b5f0194e98405c615db6c8
[m6w6/ext-http] / php_http_url.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifdef PHP_HTTP_HAVE_IDN
16 # include <idna.h>
17 #endif
18
19 #ifdef PHP_HTTP_HAVE_WCHAR
20 # include <wchar.h>
21 # include <wctype.h>
22 #endif
23
24 #include "php_http_utf8.h"
25
26 static inline char *localhostname(void)
27 {
28 char hostname[1024] = {0};
29
30 #ifdef PHP_WIN32
31 if (SUCCESS == gethostname(hostname, lenof(hostname))) {
32 return estrdup(hostname);
33 }
34 #elif defined(HAVE_GETHOSTNAME)
35 if (SUCCESS == gethostname(hostname, lenof(hostname))) {
36 # if defined(HAVE_GETDOMAINNAME)
37 size_t hlen = strlen(hostname);
38 if (hlen <= lenof(hostname) - lenof("(none)")) {
39 hostname[hlen++] = '.';
40 if (SUCCESS == getdomainname(&hostname[hlen], lenof(hostname) - hlen)) {
41 if (!strcmp(&hostname[hlen], "(none)")) {
42 hostname[hlen - 1] = '\0';
43 }
44 return estrdup(hostname);
45 }
46 }
47 # endif
48 if (strcmp(hostname, "(none)")) {
49 return estrdup(hostname);
50 }
51 }
52 #endif
53 return estrndup("localhost", lenof("localhost"));
54 }
55
56 static php_url *php_http_url_from_env(php_url *url TSRMLS_DC)
57 {
58 zval *https, *zhost, *zport;
59 long port;
60
61 if (!url) {
62 url = ecalloc(1, sizeof(*url));
63 }
64
65 /* port */
66 zport = php_http_env_get_server_var(ZEND_STRL("SERVER_PORT"), 1 TSRMLS_CC);
67 if (zport && IS_LONG == is_numeric_string(Z_STRVAL_P(zport), Z_STRLEN_P(zport), &port, NULL, 0)) {
68 url->port = port;
69 }
70
71 /* scheme */
72 https = php_http_env_get_server_var(ZEND_STRL("HTTPS"), 1 TSRMLS_CC);
73 if (https && !strcasecmp(Z_STRVAL_P(https), "ON")) {
74 url->scheme = estrndup("https", lenof("https"));
75 } else {
76 url->scheme = estrndup("http", lenof("http"));
77 }
78
79 /* host */
80 if ((((zhost = php_http_env_get_server_var(ZEND_STRL("HTTP_HOST"), 1 TSRMLS_CC)) ||
81 (zhost = php_http_env_get_server_var(ZEND_STRL("SERVER_NAME"), 1 TSRMLS_CC)) ||
82 (zhost = php_http_env_get_server_var(ZEND_STRL("SERVER_ADDR"), 1 TSRMLS_CC)))) && Z_STRLEN_P(zhost)) {
83 size_t stop_at = strspn(Z_STRVAL_P(zhost), "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.");
84
85 url->host = estrndup(Z_STRVAL_P(zhost), stop_at);
86 } else {
87 url->host = localhostname();
88 }
89
90 /* path */
91 if (SG(request_info).request_uri && SG(request_info).request_uri[0]) {
92 const char *q = strchr(SG(request_info).request_uri, '?');
93
94 if (q) {
95 url->path = estrndup(SG(request_info).request_uri, q - SG(request_info).request_uri);
96 } else {
97 url->path = estrdup(SG(request_info).request_uri);
98 }
99 }
100
101 /* query */
102 if (SG(request_info).query_string && SG(request_info).query_string[0]) {
103 url->query = estrdup(SG(request_info).query_string);
104 }
105
106 return url;
107 }
108
109 void php_http_url(int flags, const php_url *old_url, const php_url *new_url, php_url **url_ptr, char **url_str, size_t *url_len TSRMLS_DC)
110 {
111 php_url *url, *tmp_url = NULL;
112
113 /* set from env if requested */
114 if (flags & PHP_HTTP_URL_FROM_ENV) {
115 php_url *env_url = php_http_url_from_env(NULL TSRMLS_CC);
116
117 php_http_url(flags ^ PHP_HTTP_URL_FROM_ENV, env_url, old_url, &tmp_url, NULL, NULL TSRMLS_CC);
118
119 php_url_free(env_url);
120 old_url = tmp_url;
121 }
122
123 url = ecalloc(1, sizeof(*url));
124
125 #define __URLSET(u,n) \
126 ((u)&&(u)->n)
127 #define __URLCPY(n) \
128 url->n = __URLSET(new_url,n) ? estrdup(new_url->n) : (__URLSET(old_url,n) ? estrdup(old_url->n) : NULL)
129
130 if (!(flags & PHP_HTTP_URL_STRIP_PORT)) {
131 url->port = __URLSET(new_url, port) ? new_url->port : ((old_url) ? old_url->port : 0);
132 }
133 if (!(flags & PHP_HTTP_URL_STRIP_USER)) {
134 __URLCPY(user);
135 }
136 if (!(flags & PHP_HTTP_URL_STRIP_PASS)) {
137 __URLCPY(pass);
138 }
139
140 __URLCPY(scheme);
141 __URLCPY(host);
142
143 if (!(flags & PHP_HTTP_URL_STRIP_PATH)) {
144 if ((flags & PHP_HTTP_URL_JOIN_PATH) && __URLSET(old_url, path) && __URLSET(new_url, path) && *new_url->path != '/') {
145 size_t old_path_len = strlen(old_url->path), new_path_len = strlen(new_url->path);
146
147 url->path = ecalloc(1, old_path_len + new_path_len + 1 + 1);
148
149 strcat(url->path, old_url->path);
150 if (url->path[old_path_len - 1] != '/') {
151 php_dirname(url->path, old_path_len);
152 strcat(url->path, "/");
153 }
154 strcat(url->path, new_url->path);
155 } else {
156 __URLCPY(path);
157 }
158 }
159 if (!(flags & PHP_HTTP_URL_STRIP_QUERY)) {
160 if ((flags & PHP_HTTP_URL_JOIN_QUERY) && __URLSET(new_url, query) && __URLSET(old_url, query)) {
161 zval qarr, qstr;
162
163 INIT_PZVAL(&qstr);
164 INIT_PZVAL(&qarr);
165 array_init(&qarr);
166
167 ZVAL_STRING(&qstr, old_url->query, 0);
168 php_http_querystring_update(&qarr, &qstr, NULL TSRMLS_CC);
169 ZVAL_STRING(&qstr, new_url->query, 0);
170 php_http_querystring_update(&qarr, &qstr, NULL TSRMLS_CC);
171
172 ZVAL_NULL(&qstr);
173 php_http_querystring_update(&qarr, NULL, &qstr TSRMLS_CC);
174 url->query = Z_STRVAL(qstr);
175 zval_dtor(&qarr);
176 } else {
177 __URLCPY(query);
178 }
179 }
180 if (!(flags & PHP_HTTP_URL_STRIP_FRAGMENT)) {
181 __URLCPY(fragment);
182 }
183
184 /* done with copy & combine & strip */
185
186 if (flags & PHP_HTTP_URL_FROM_ENV) {
187 /* free old_url we tainted above */
188 php_url_free(tmp_url);
189 }
190
191 /* set some sane defaults */
192
193 if (!url->scheme) {
194 url->scheme = estrndup("http", lenof("http"));
195 }
196
197 if (!url->host) {
198 url->host = estrndup("localhost", lenof("localhost"));
199 }
200
201 if (!url->path) {
202 url->path = estrndup("/", 1);
203 } else if (url->path[0] != '/') {
204 size_t plen = strlen(url->path);
205 char *path = emalloc(plen + 1 + 1);
206
207 path[0] = '/';
208 memcpy(&path[1], url->path, plen + 1);
209 STR_SET(url->path, path);
210 }
211 /* replace directory references if path is not a single slash */
212 if ((flags & PHP_HTTP_URL_SANITIZE_PATH)
213 && url->path[0] && (url->path[0] != '/' || url->path[1])) {
214 char *ptr, *end = url->path + strlen(url->path) + 1;
215
216 for (ptr = strchr(url->path, '/'); ptr; ptr = strchr(ptr, '/')) {
217 switch (ptr[1]) {
218 case '/':
219 memmove(&ptr[1], &ptr[2], end - &ptr[2]);
220 break;
221
222 case '.':
223 switch (ptr[2]) {
224 case '\0':
225 ptr[1] = '\0';
226 break;
227
228 case '/':
229 memmove(&ptr[1], &ptr[3], end - &ptr[3]);
230 break;
231
232 case '.':
233 if (ptr[3] == '/') {
234 char *pos = &ptr[4];
235 while (ptr != url->path) {
236 if (*--ptr == '/') {
237 break;
238 }
239 }
240 memmove(&ptr[1], pos, end - pos);
241 break;
242 } else if (!ptr[3]) {
243 /* .. at the end */
244 ptr[1] = '\0';
245 }
246 /* no break */
247
248 default:
249 /* something else */
250 ++ptr;
251 break;
252 }
253 break;
254
255 default:
256 ++ptr;
257 break;
258 }
259 }
260 }
261 /* unset default ports */
262 if (url->port) {
263 if ( ((url->port == 80) && !strcmp(url->scheme, "http"))
264 || ((url->port ==443) && !strcmp(url->scheme, "https"))
265 ) {
266 url->port = 0;
267 }
268 }
269
270 if (url_str) {
271 php_http_url_to_string(url, url_str, url_len TSRMLS_CC);
272 }
273
274 if (url_ptr) {
275 *url_ptr = url;
276 } else {
277 php_url_free(url);
278 }
279 }
280
281 STATUS php_http_url_encode_hash(HashTable *hash, const char *pre_encoded_str, size_t pre_encoded_len, char **encoded_str, size_t *encoded_len TSRMLS_DC)
282 {
283 const char *arg_sep_str;
284 size_t arg_sep_len;
285 php_http_buffer_t *qstr = php_http_buffer_new();
286
287 php_http_url_argsep(&arg_sep_str, &arg_sep_len TSRMLS_CC);
288
289 if (SUCCESS != php_http_url_encode_hash_ex(hash, qstr, arg_sep_str, arg_sep_len, "=", 1, pre_encoded_str, pre_encoded_len TSRMLS_CC)) {
290 php_http_buffer_free(&qstr);
291 return FAILURE;
292 }
293
294 php_http_buffer_data(qstr, encoded_str, encoded_len);
295 php_http_buffer_free(&qstr);
296
297 return SUCCESS;
298 }
299
300 STATUS php_http_url_encode_hash_ex(HashTable *hash, php_http_buffer_t *qstr, const char *arg_sep_str, size_t arg_sep_len, const char *val_sep_str, size_t val_sep_len, const char *pre_encoded_str, size_t pre_encoded_len TSRMLS_DC)
301 {
302 if (pre_encoded_len && pre_encoded_str) {
303 php_http_buffer_append(qstr, pre_encoded_str, pre_encoded_len);
304 }
305
306 if (!php_http_params_to_string(qstr, hash, arg_sep_str, arg_sep_len, "", 0, val_sep_str, val_sep_len, PHP_HTTP_PARAMS_QUERY TSRMLS_CC)) {
307 return FAILURE;
308 }
309
310 return SUCCESS;
311 }
312
313 void php_http_url_dtor(php_http_url_t *url)
314 {
315 STR_FREE(url->scheme.str);
316 STR_FREE(url->authority.userinfo.username.str);
317 STR_FREE(url->authority.userinfo.password.str);
318 STR_FREE(url->authority.host.str);
319 STR_FREE(url->path.str);
320 STR_FREE(url->query.str);
321 STR_FREE(url->fragment.str);
322 }
323
324 void php_http_url_free(php_http_url_t **url)
325 {
326 if (*url) {
327 php_http_url_dtor(*url);
328 efree(*url);
329 *url = NULL;
330 }
331 }
332
333 static size_t parse_mb_utf8(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
334 {
335 unsigned wchar;
336 size_t consumed = utf8towc(&wchar, (const unsigned char *) ptr, end - ptr);
337
338 if (!consumed || consumed == (size_t) -1) {
339 return 0;
340 }
341 if (!idn && !isualnum(wchar)) {
342 return 0;
343 }
344
345 return consumed;
346 }
347
348 #ifdef PHP_HTTP_HAVE_WCHAR
349 static size_t parse_mb_loc(php_http_url_t *url, const char *ptr, const char *end, zend_bool idn)
350 {
351 wchar_t wchar;
352 size_t consumed = 0;
353 #if defined(HAVE_MBRTOWC)
354 mbstate_t ps = {0};
355
356 consumed = mbrtowc(&wchar, ptr, end - ptr, &ps);
357 #elif defined(HAVE_MBTOWC)
358 consumed = mbtowc(&wchar, ptr, end - ptr);
359 #endif
360
361 if (!consumed || consumed == (size_t) -1) {
362 return 0;
363 }
364 if (!idn && !iswalnum(wchar)) {
365 return 0;
366 }
367
368 return consumed;
369 }
370 #endif
371
372 typedef enum parse_mb_what {
373 PARSE_SCHEME,
374 PARSE_USERINFO,
375 PARSE_HOSTINFO,
376 PARSE_PATH,
377 PARSE_QUERY,
378 PARSE_FRAGMENT
379 } parse_mb_what_t;
380
381 static const char * const parse_what[] = {
382 "scheme",
383 "userinfo",
384 "hostinfo",
385 "path",
386 "query",
387 "fragment"
388 };
389
390 static size_t parse_mb(php_http_url_t *url, parse_mb_what_t what, const char *ptr, const char *end, const char *begin, zend_bool silent)
391 {
392 size_t consumed = 0;
393 zend_bool idn = (what == PARSE_HOSTINFO) && (url->flags & PHP_HTTP_URL_PARSE_IDN);
394
395 if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
396 consumed = parse_mb_utf8(url, ptr, end, idn);
397 }
398 #ifdef PHP_HTTP_HAVE_WCHAR
399 else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
400 consumed = parse_mb_loc(url, ptr, end, idn);
401 }
402 #endif
403
404 if (!consumed && !silent) {
405 TSRMLS_FETCH_FROM_CTX(url->ts);
406 php_error_docref(NULL TSRMLS_CC, E_WARNING,
407 "Failed to parse %s; unexpected byte 0x%02x at pos %u in '%s'",
408 parse_what[what], (unsigned char) *ptr, (unsigned) (ptr - begin), begin);
409 }
410
411 return consumed;
412 }
413
414 static STATUS parse_userinfo(php_http_url_t *url, const char *ptr, const char *end)
415 {
416 size_t mb;
417 const char *password = NULL, *tmp = ptr;
418 TSRMLS_FETCH_FROM_CTX(url->ts);
419
420 do {
421 switch (*ptr) {
422 case ':':
423 if (password) {
424 php_error_docref(NULL TSRMLS_CC, E_WARNING,
425 "Failed to parse password; duplicate ':' at pos %u in '%s'",
426 (unsigned) (ptr - tmp), tmp);
427 return FAILURE;
428 }
429 password = ptr + 1;
430 break;
431
432 case '%':
433 if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
434 php_error_docref(NULL TSRMLS_CC, E_WARNING,
435 "Failed to parse userinfo; invalid percent encoding at pos %u in '%s'",
436 (unsigned) (ptr - tmp), tmp);
437 return FAILURE;
438 }
439 ptr += 2;
440 break;
441
442 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
443 case '+': case ',': case ';': case '=': /* sub-delims */
444 case '-': case '.': case '_': case '~': /* unreserved */
445 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
446 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
447 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
448 case 'V': case 'W': case 'X': case 'Y': case 'Z':
449 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
450 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
451 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
452 case 'v': case 'w': case 'x': case 'y': case 'z':
453 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
454 case '7': case '8': case '9':
455 /* allowed */
456 break;
457
458 default:
459 if (!(mb = parse_mb(url, PARSE_USERINFO, ptr, end, tmp, 0))) {
460 return FAILURE;
461 }
462 ptr += mb - 1;
463 }
464 } while(++ptr != end);
465
466 if (password) {
467 if ((url->authority.userinfo.username.len = password - tmp - 1)) {
468 url->authority.userinfo.username.str = estrndup(tmp,
469 url->authority.userinfo.username.len);
470 }
471 if ((url->authority.userinfo.password.len = end - password)) {
472 url->authority.userinfo.password.str = estrndup(password,
473 url->authority.userinfo.password.len);
474 }
475 } else {
476 if ((url->authority.userinfo.username.len = end - tmp)) {
477 url->authority.userinfo.username.str = estrndup(tmp,
478 url->authority.userinfo.username.len);
479 }
480 }
481
482 return SUCCESS;
483 }
484
485 static STATUS parse_hostinfo(php_http_url_t *url, const char *ptr, const char *end)
486 {
487 size_t mb;
488 const char *tmp = ptr, *port = NULL;
489 TSRMLS_FETCH_FROM_CTX(url->ts);
490
491 /* FIXME: IP(v6) addresses */
492 do {
493 switch (*ptr) {
494 case ':':
495 if (port) {
496 php_error_docref(NULL TSRMLS_CC, E_WARNING,
497 "Failed to parse port; duplicate ':' at pos %u in '%s'",
498 (unsigned) (ptr - tmp), tmp);
499 return FAILURE;
500 }
501 port = ptr + 1;
502 break;
503
504 case '%':
505 if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
506 php_error_docref(NULL TSRMLS_CC, E_WARNING,
507 "Failed to parse hostinfo; invalid percent encoding at pos %u in '%s'",
508 (unsigned) (ptr - tmp), tmp);
509 return FAILURE;
510 }
511 ptr += 2;
512 break;
513
514 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
515 case '+': case ',': case ';': case '=': /* sub-delims */
516 case '-': case '.': case '_': case '~': /* unreserved */
517 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
518 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
519 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
520 case 'V': case 'W': case 'X': case 'Y': case 'Z':
521 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
522 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
523 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
524 case 'v': case 'w': case 'x': case 'y': case 'z':
525 if (port) {
526 php_error_docref(NULL TSRMLS_CC, E_WARNING,
527 "Failed to parse port; unexpected char '%c' at pos %u in '%s'",
528 (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
529 return FAILURE;
530 }
531 /* no break */
532 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
533 case '7': case '8': case '9':
534 /* allowed */
535 if (port) {
536 url->authority.port *= 10;
537 url->authority.port += *ptr - '0';
538 }
539 break;
540
541 default:
542 if (port) {
543 php_error_docref(NULL TSRMLS_CC, E_WARNING,
544 "Failed to parse port; unexpected byte 0x%02x at pos %u in '%s'",
545 (unsigned char) *ptr, (unsigned) (ptr - tmp), tmp);
546 return FAILURE;
547 } else if (!(mb = parse_mb(url, PARSE_HOSTINFO, ptr, end, tmp, 0))) {
548 return FAILURE;
549 }
550 ptr += mb - 1;
551 }
552 } while (++ptr != end);
553
554 if (port) {
555 url->authority.host.len = port - tmp - 1;
556 } else {
557 url->authority.host.len = end - tmp;
558 }
559
560 url->authority.host.str = estrndup(tmp, url->authority.host.len);
561
562 #ifdef PHP_HTTP_HAVE_IDN
563 if (url->flags & PHP_HTTP_URL_PARSE_IDN) {
564 char *idn = NULL;
565 int rv = -1;
566
567 if (url->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
568 rv = idna_to_ascii_8z(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
569 }
570 # ifdef PHP_HTTP_HAVE_WCHAR
571 else if (url->flags & PHP_HTTP_URL_PARSE_MBLOC) {
572 rv = idna_to_ascii_lz(url->authority.host.str, &idn, IDNA_ALLOW_UNASSIGNED|IDNA_USE_STD3_ASCII_RULES);
573 }
574 # endif
575 if (rv != IDNA_SUCCESS) {
576 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse IDN; %s", idna_strerror(rv));
577 return FAILURE;
578 } else {
579 STR_SET(url->authority.host.str, estrdup(idn));
580 url->authority.host.len = strlen(idn);
581 free(idn);
582 }
583 }
584 #endif
585
586 return SUCCESS;
587 }
588
589 static const char *parse_authority(php_http_url_t *url, const char *ptr, const char *end)
590 {
591 const char *tmp = ptr;
592
593 do {
594 switch (*ptr) {
595 case '@':
596 /* userinfo delimiter */
597 if (tmp != ptr && SUCCESS != parse_userinfo(url, tmp, ptr)) {
598 return NULL;
599 }
600 tmp = ptr + 1;
601 break;
602
603 case '/':
604 case '?':
605 case '#':
606 case '\0':
607 /* host delimiter */
608 if (tmp != ptr && SUCCESS != parse_hostinfo(url, tmp, ptr)) {
609 return NULL;
610 }
611 return ptr;
612 }
613 } while (++ptr <= end);
614
615 return NULL;
616 }
617
618 static const char *parse_path(php_http_url_t *url, const char *ptr, const char *end)
619 {
620 size_t mb;
621 const char *tmp = ptr;
622 TSRMLS_FETCH_FROM_CTX(url->ts);
623
624 do {
625 switch (*ptr) {
626 case '?':
627 case '\0':
628 if ((url->path.len = ptr - tmp)) {
629 url->path.str = estrndup(tmp, url->path.len);
630 }
631 return ptr;
632
633 case '%':
634 if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
635 php_error_docref(NULL TSRMLS_CC, E_WARNING,
636 "Failed to parse path; invalid percent encoding at pos %u in '%s'",
637 (unsigned) (ptr - tmp), tmp);
638 return NULL;
639 }
640 ptr += 2;
641 break;
642
643 case '/': /* yeah, well */
644 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
645 case '+': case ',': case ';': case '=': /* sub-delims */
646 case '-': case '.': case '_': case '~': /* unreserved */
647 case ':': case '@': /* pchar */
648 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
649 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
650 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
651 case 'V': case 'W': case 'X': case 'Y': case 'Z':
652 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
653 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
654 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
655 case 'v': case 'w': case 'x': case 'y': case 'z':
656 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
657 case '7': case '8': case '9':
658 /* allowed */
659 break;
660
661 default:
662 if (!(mb = parse_mb(url, PARSE_PATH, ptr, end, tmp, 0))) {
663 return NULL;
664 }
665 ptr += mb - 1;
666 }
667 } while (++ptr <= end);
668
669 return NULL;
670 }
671
672 static const char *parse_query(php_http_url_t *url, const char *ptr, const char *end)
673 {
674 size_t mb;
675 const char *tmp = ptr + !!*ptr;
676 TSRMLS_FETCH_FROM_CTX(url->ts);
677
678 do {
679 switch (*ptr) {
680 case '#':
681 case '\0':
682 if ((url->query.len = ptr - tmp)) {
683 url->query.str = estrndup(tmp, url->query.len);
684 }
685 return ptr;
686
687 case '%':
688 if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
689 php_error_docref(NULL TSRMLS_CC, E_WARNING,
690 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
691 (unsigned) (ptr - tmp), tmp);
692 return NULL;
693 }
694 ptr += 2;
695 break;
696
697 case '?': case '/': /* yeah, well */
698 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
699 case '+': case ',': case ';': case '=': /* sub-delims */
700 case '-': case '.': case '_': case '~': /* unreserved */
701 case ':': case '@': /* pchar */
702 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
703 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
704 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
705 case 'V': case 'W': case 'X': case 'Y': case 'Z':
706 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
707 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
708 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
709 case 'v': case 'w': case 'x': case 'y': case 'z':
710 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
711 case '7': case '8': case '9':
712 /* allowed */
713 break;
714
715 default:
716 if (!(mb = parse_mb(url, PARSE_QUERY, ptr, end, tmp, 0))) {
717 return NULL;
718 }
719 ptr += mb - 1;
720 }
721 } while (++ptr <= end);
722
723 return NULL;
724 }
725
726 static const char *parse_fragment(php_http_url_t *url, const char *ptr, const char *end)
727 {
728 size_t mb;
729 const char *tmp = ptr + !!*ptr;
730 TSRMLS_FETCH_FROM_CTX(url->ts);
731
732 do {
733 switch (*ptr) {
734 case '\0':
735 if ((url->fragment.len = ptr - tmp)) {
736 url->fragment.str = estrndup(tmp, url->fragment.len);
737 }
738 return ptr;
739
740 case '%':
741 if (ptr[1] != '%' && (end - ptr <= 2 || !isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2)))) {
742 php_error_docref(NULL TSRMLS_CC, E_WARNING,
743 "Failed to parse query; invalid percent encoding at pos %u in '%s'",
744 (unsigned) (ptr - tmp), tmp);
745 return NULL;
746 }
747 ptr += 2;
748 break;
749
750 case '?': case '/': /* yeah, well */
751 case '!': case '$': case '&': case '\'': case '(': case ')': case '*':
752 case '+': case ',': case ';': case '=': /* sub-delims */
753 case '-': case '.': case '_': case '~': /* unreserved */
754 case ':': case '@': /* pchar */
755 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
756 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
757 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
758 case 'V': case 'W': case 'X': case 'Y': case 'Z':
759 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
760 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
761 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
762 case 'v': case 'w': case 'x': case 'y': case 'z':
763 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
764 case '7': case '8': case '9':
765 /* allowed */
766 break;
767
768 default:
769 if (!(mb = parse_mb(url, PARSE_FRAGMENT, ptr, end, tmp, 0))) {
770 return NULL;
771 }
772 ptr += mb - 1;
773 }
774 } while (++ptr <= end);
775
776 return NULL;
777 }
778
779 static const char *parse_hier(php_http_url_t *url, const char *ptr, const char *end)
780 {
781 if (*ptr == '/') {
782 if (end - ptr > 1) {
783 if (*(ptr + 1) == '/') {
784 if (!(ptr = parse_authority(url, ptr + 2, end))) {
785 return NULL;
786 }
787 }
788 }
789 }
790 return parse_path(url, ptr, end);
791 }
792
793 static const char *parse_scheme(php_http_url_t *url, const char *ptr, const char *end)
794 {
795 size_t mb;
796 const char *tmp = ptr;
797
798 do {
799 switch (*ptr) {
800 case ':':
801 /* scheme delimiter */
802 url->scheme.len = ptr - tmp;
803 url->scheme.str = estrndup(tmp, url->scheme.len);
804 return ++ptr;
805
806 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
807 case '7': case '8': case '9':
808 case '+': case '-': case '.':
809 if (ptr == tmp) {
810 return tmp;
811 }
812 /* no break */
813 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
814 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
815 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
816 case 'V': case 'W': case 'X': case 'Y': case 'Z':
817 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
818 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
819 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
820 case 'v': case 'w': case 'x': case 'y': case 'z':
821 /* scheme part */
822 break;
823
824 default:
825 if (!(mb = parse_mb(url, PARSE_SCHEME, ptr, end, tmp, 1))) {
826 /* soft fail; parse path next */
827 return tmp;
828 }
829 ptr += mb - 1;
830 }
831 } while (++ptr != end);
832
833 return tmp;
834 }
835
836 php_http_url_t *php_http_url_init(php_http_url_t *url, const char *str, size_t len, unsigned flags TSRMLS_DC)
837 {
838 const char *ptr, *end = str + len;
839 zend_bool free_url = !url;
840
841 if (url) {
842 memset(url, 0, sizeof(*url));
843 } else {
844 url = ecalloc(1, sizeof(*url));
845 }
846
847 url->flags = flags;
848 TSRMLS_SET_CTX(url->ts);
849
850 if ((ptr = str) && !(str = parse_scheme(url, ptr, end))) {
851 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL scheme: '%s'", ptr);
852 if (free_url) {
853 php_http_url_free(&url);
854 } else {
855 php_http_url_dtor(url);
856 }
857 return NULL;
858 }
859
860 if ((ptr = str) && !(str = parse_hier(url, ptr, end))) {
861 if (free_url) {
862 php_http_url_free(&url);
863 } else {
864 php_http_url_dtor(url);
865 }
866 return NULL;
867 }
868
869 if ((ptr = str) && !(str = parse_query(url, ptr, end))) {
870 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL query: '%s'", ptr);
871 if (free_url) {
872 php_http_url_free(&url);
873 } else {
874 php_http_url_dtor(url);
875 }
876 return NULL;
877 }
878
879 if ((ptr = str) && !(str = parse_fragment(url, ptr, end))) {
880 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to parse URL fragment: '%s'", ptr);
881 if (free_url) {
882 php_http_url_free(&url);
883 } else {
884 php_http_url_dtor(url);
885 }
886 return NULL;
887 }
888
889 return url;
890 }
891
892 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl___construct, 0, 0, 0)
893 ZEND_ARG_INFO(0, old_url)
894 ZEND_ARG_INFO(0, new_url)
895 ZEND_ARG_INFO(0, flags)
896 ZEND_END_ARG_INFO();
897 PHP_METHOD(HttpUrl, __construct)
898 {
899 zval *new_url = NULL, *old_url = NULL;
900 long flags = PHP_HTTP_URL_FROM_ENV;
901 zend_error_handling zeh;
902
903 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z!z!l", &old_url, &new_url, &flags), invalid_arg, return);
904
905 zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
906 {
907 php_url *res_purl, *new_purl = NULL, *old_purl = NULL;
908
909 if (new_url) {
910 switch (Z_TYPE_P(new_url)) {
911 case IS_OBJECT:
912 case IS_ARRAY:
913 new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
914 break;
915 default: {
916 zval *cpy = php_http_ztyp(IS_STRING, new_url);
917
918 new_purl = php_url_parse(Z_STRVAL_P(cpy));
919 zval_ptr_dtor(&cpy);
920 break;
921 }
922 }
923 if (!new_purl) {
924 zend_restore_error_handling(&zeh TSRMLS_CC);
925 return;
926 }
927 }
928 if (old_url) {
929 switch (Z_TYPE_P(old_url)) {
930 case IS_OBJECT:
931 case IS_ARRAY:
932 old_purl = php_http_url_from_struct(NULL, HASH_OF(old_url) TSRMLS_CC);
933 break;
934 default: {
935 zval *cpy = php_http_ztyp(IS_STRING, old_url);
936
937 old_purl = php_url_parse(Z_STRVAL_P(cpy));
938 zval_ptr_dtor(&cpy);
939 break;
940 }
941 }
942 if (!old_purl) {
943 if (new_purl) {
944 php_url_free(new_purl);
945 }
946 zend_restore_error_handling(&zeh TSRMLS_CC);
947 return;
948 }
949 }
950
951 php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
952 php_http_url_to_struct(res_purl, getThis() TSRMLS_CC);
953
954 php_url_free(res_purl);
955 if (old_purl) {
956 php_url_free(old_purl);
957 }
958 if (new_purl) {
959 php_url_free(new_purl);
960 }
961 }
962 zend_restore_error_handling(&zeh TSRMLS_CC);
963 }
964
965 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_mod, 0, 0, 1)
966 ZEND_ARG_INFO(0, more_url_parts)
967 ZEND_ARG_INFO(0, flags)
968 ZEND_END_ARG_INFO();
969 PHP_METHOD(HttpUrl, mod)
970 {
971 zval *new_url = NULL;
972 long flags = PHP_HTTP_URL_JOIN_PATH | PHP_HTTP_URL_JOIN_QUERY;
973 zend_error_handling zeh;
974
975 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z!|l", &new_url, &flags), invalid_arg, return);
976
977 zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
978 {
979 php_url *new_purl = NULL, *old_purl = NULL;
980
981 if (new_url) {
982 switch (Z_TYPE_P(new_url)) {
983 case IS_OBJECT:
984 case IS_ARRAY:
985 new_purl = php_http_url_from_struct(NULL, HASH_OF(new_url) TSRMLS_CC);
986 break;
987 default: {
988 zval *cpy = php_http_ztyp(IS_STRING, new_url);
989
990 new_purl = php_url_parse(Z_STRVAL_P(new_url));
991 zval_ptr_dtor(&cpy);
992 break;
993 }
994 }
995 if (!new_purl) {
996 zend_restore_error_handling(&zeh TSRMLS_CC);
997 return;
998 }
999 }
1000
1001 if ((old_purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC))) {
1002 php_url *res_purl;
1003
1004 ZVAL_OBJVAL(return_value, zend_objects_clone_obj(getThis() TSRMLS_CC), 0);
1005
1006 php_http_url(flags, old_purl, new_purl, &res_purl, NULL, NULL TSRMLS_CC);
1007 php_http_url_to_struct(res_purl, return_value TSRMLS_CC);
1008
1009 php_url_free(res_purl);
1010 php_url_free(old_purl);
1011 }
1012 if (new_purl) {
1013 php_url_free(new_purl);
1014 }
1015 }
1016 zend_restore_error_handling(&zeh TSRMLS_CC);
1017 }
1018
1019 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toString, 0, 0, 0)
1020 ZEND_END_ARG_INFO();
1021 PHP_METHOD(HttpUrl, toString)
1022 {
1023 if (SUCCESS == zend_parse_parameters_none()) {
1024 php_url *purl;
1025
1026 if ((purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC))) {
1027 char *str;
1028 size_t len;
1029
1030 php_http_url(0, purl, NULL, NULL, &str, &len TSRMLS_CC);
1031 php_url_free(purl);
1032 RETURN_STRINGL(str, len, 0);
1033 }
1034 }
1035 RETURN_EMPTY_STRING();
1036 }
1037
1038 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_toArray, 0, 0, 0)
1039 ZEND_END_ARG_INFO();
1040 PHP_METHOD(HttpUrl, toArray)
1041 {
1042 php_url *purl;
1043
1044 if (SUCCESS != zend_parse_parameters_none()) {
1045 return;
1046 }
1047
1048 /* strip any non-URL properties */
1049 purl = php_http_url_from_struct(NULL, HASH_OF(getThis()) TSRMLS_CC);
1050 php_http_url_to_struct(purl, return_value TSRMLS_CC);
1051 php_url_free(purl);
1052 }
1053
1054 ZEND_BEGIN_ARG_INFO_EX(ai_HttpUrl_parse, 0, 0, 1)
1055 ZEND_ARG_INFO(0, url)
1056 ZEND_ARG_INFO(0, flags)
1057 ZEND_END_ARG_INFO();
1058 PHP_METHOD(HttpUrl, parse)
1059 {
1060 char *str;
1061 int len;
1062 long flags = 0;
1063 php_http_url_t url;
1064 zend_error_handling zeh;
1065
1066 php_http_expect(SUCCESS == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &flags), invalid_arg, return);
1067
1068 zend_replace_error_handling(EH_THROW, php_http_exception_bad_url_class_entry, &zeh TSRMLS_CC);
1069 if (php_http_url_init(&url, str, len, flags TSRMLS_CC)) {
1070 object_init_ex(return_value, php_http_url_class_entry);
1071 if (url.scheme.len) {
1072 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("scheme"),
1073 url.scheme.str, url.scheme.len TSRMLS_CC);
1074 }
1075 if (url.authority.userinfo.username.len) {
1076 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("user"),
1077 url.authority.userinfo.username.str, url.authority.userinfo.username.len TSRMLS_CC);
1078 }
1079 if (url.authority.userinfo.password.len) {
1080 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("pass"),
1081 url.authority.userinfo.password.str, url.authority.userinfo.password.len TSRMLS_CC);
1082 }
1083 if (url.authority.host.len) {
1084 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("host"),
1085 url.authority.host.str, url.authority.host.len TSRMLS_CC);
1086 }
1087 if (url.authority.port) {
1088 zend_update_property_long(php_http_url_class_entry, return_value, ZEND_STRL("port"),
1089 url.authority.port TSRMLS_CC);
1090 }
1091 if (url.path.len) {
1092 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("path"),
1093 url.path.str, url.path.len TSRMLS_CC);
1094 }
1095 if (url.query.len) {
1096 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("query"),
1097 url.query.str, url.query.len TSRMLS_CC);
1098 }
1099 if (url.fragment.len) {
1100 zend_update_property_stringl(php_http_url_class_entry, return_value, ZEND_STRL("fragment"),
1101 url.fragment.str, url.fragment.len TSRMLS_CC);
1102 }
1103 php_http_url_dtor(&url);
1104 }
1105 zend_restore_error_handling(&zeh TSRMLS_CC);
1106 }
1107
1108 static zend_function_entry php_http_url_methods[] = {
1109 PHP_ME(HttpUrl, __construct, ai_HttpUrl___construct, ZEND_ACC_PUBLIC|ZEND_ACC_CTOR)
1110 PHP_ME(HttpUrl, mod, ai_HttpUrl_mod, ZEND_ACC_PUBLIC)
1111 PHP_ME(HttpUrl, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
1112 ZEND_MALIAS(HttpUrl, __toString, toString, ai_HttpUrl_toString, ZEND_ACC_PUBLIC)
1113 PHP_ME(HttpUrl, toArray, ai_HttpUrl_toArray, ZEND_ACC_PUBLIC)
1114 PHP_ME(HttpUrl, parse, ai_HttpUrl_parse, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
1115 EMPTY_FUNCTION_ENTRY
1116 };
1117
1118 zend_class_entry *php_http_url_class_entry;
1119
1120 PHP_MINIT_FUNCTION(http_url)
1121 {
1122 zend_class_entry ce = {0};
1123
1124 INIT_NS_CLASS_ENTRY(ce, "http", "Url", php_http_url_methods);
1125 php_http_url_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
1126
1127 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("scheme"), ZEND_ACC_PUBLIC TSRMLS_CC);
1128 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("user"), ZEND_ACC_PUBLIC TSRMLS_CC);
1129 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("pass"), ZEND_ACC_PUBLIC TSRMLS_CC);
1130 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("host"), ZEND_ACC_PUBLIC TSRMLS_CC);
1131 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("port"), ZEND_ACC_PUBLIC TSRMLS_CC);
1132 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("path"), ZEND_ACC_PUBLIC TSRMLS_CC);
1133 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("query"), ZEND_ACC_PUBLIC TSRMLS_CC);
1134 zend_declare_property_null(php_http_url_class_entry, ZEND_STRL("fragment"), ZEND_ACC_PUBLIC TSRMLS_CC);
1135
1136 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("REPLACE"), PHP_HTTP_URL_REPLACE TSRMLS_CC);
1137 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("JOIN_PATH"), PHP_HTTP_URL_JOIN_PATH TSRMLS_CC);
1138 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("JOIN_QUERY"), PHP_HTTP_URL_JOIN_QUERY TSRMLS_CC);
1139 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_USER"), PHP_HTTP_URL_STRIP_USER TSRMLS_CC);
1140 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_PASS"), PHP_HTTP_URL_STRIP_PASS TSRMLS_CC);
1141 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_AUTH"), PHP_HTTP_URL_STRIP_AUTH TSRMLS_CC);
1142 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_PORT"), PHP_HTTP_URL_STRIP_PORT TSRMLS_CC);
1143 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_PATH"), PHP_HTTP_URL_STRIP_PATH TSRMLS_CC);
1144 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_QUERY"), PHP_HTTP_URL_STRIP_QUERY TSRMLS_CC);
1145 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_FRAGMENT"), PHP_HTTP_URL_STRIP_FRAGMENT TSRMLS_CC);
1146 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("STRIP_ALL"), PHP_HTTP_URL_STRIP_ALL TSRMLS_CC);
1147 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("FROM_ENV"), PHP_HTTP_URL_FROM_ENV TSRMLS_CC);
1148 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("SANITIZE_PATH"), PHP_HTTP_URL_SANITIZE_PATH TSRMLS_CC);
1149
1150 #ifdef PHP_HTTP_HAVE_WCHAR
1151 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBLOC"), PHP_HTTP_URL_PARSE_MBLOC TSRMLS_CC);
1152 #endif
1153 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_MBUTF8"), PHP_HTTP_URL_PARSE_MBUTF8 TSRMLS_CC);
1154 #ifdef PHP_HTTP_HAVE_IDN
1155 zend_declare_class_constant_long(php_http_url_class_entry, ZEND_STRL("PARSE_IDN"), PHP_HTTP_URL_PARSE_IDN TSRMLS_CC);
1156 #endif
1157
1158 return SUCCESS;
1159 }
1160
1161
1162 /*
1163 * Local variables:
1164 * tab-width: 4
1165 * c-basic-offset: 4
1166 * End:
1167 * vim600: noet sw=4 ts=4 fdm=marker
1168 * vim<600: noet sw=4 ts=4
1169 */
1170