unfold url tests
[m6w6/ext-http] / php_http_message_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifndef DBG_PARSER
16 # define DBG_PARSER 0
17 #endif
18
19 typedef struct php_http_message_parser_state_spec {
20 php_http_message_parser_state_t state;
21 unsigned need_data:1;
22 } php_http_message_parser_state_spec_t;
23
24 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
25 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
26 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
27 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
28 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
29 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
30 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
31 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
32 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
33 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
34 };
35
36 #if DBG_PARSER
37 const char *php_http_message_parser_state_name(php_http_message_parser_state_t state) {
38 const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
39
40 if (state < 0 || state > (sizeof(states)/sizeof(char*))-1) {
41 return "FAILURE";
42 }
43 return states[state];
44 }
45 #endif
46
47 php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
48 {
49 if (!parser) {
50 parser = emalloc(sizeof(*parser));
51 }
52 memset(parser, 0, sizeof(*parser));
53
54 TSRMLS_SET_CTX(parser->ts);
55
56 php_http_header_parser_init(&parser->header TSRMLS_CC);
57 zend_stack_init(&parser->stack);
58
59 return parser;
60 }
61
62 php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
63 {
64 php_http_message_parser_state_t state;
65 va_list va_args;
66 unsigned i;
67
68 va_start(va_args, argc);
69 for (i = 0; i < argc; ++i) {
70 state = va_arg(va_args, php_http_message_parser_state_t);
71 zend_stack_push(&parser->stack, &state, sizeof(state));
72 }
73 va_end(va_args);
74
75 return state;
76 }
77
78 php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
79 {
80 php_http_message_parser_state_t *state;
81
82 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state)) {
83 return *state;
84 }
85 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
86 }
87
88 php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
89 {
90 php_http_message_parser_state_t state, *state_ptr;
91 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state_ptr)) {
92 state = *state_ptr;
93 zend_stack_del_top(&parser->stack);
94 return state;
95 }
96 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
97 }
98
99 void php_http_message_parser_dtor(php_http_message_parser_t *parser)
100 {
101 php_http_header_parser_dtor(&parser->header);
102 zend_stack_destroy(&parser->stack);
103 if (parser->dechunk) {
104 php_http_encoding_stream_free(&parser->dechunk);
105 }
106 if (parser->inflate) {
107 php_http_encoding_stream_free(&parser->inflate);
108 }
109 }
110
111 void php_http_message_parser_free(php_http_message_parser_t **parser)
112 {
113 if (*parser) {
114 php_http_message_parser_dtor(*parser);
115 efree(*parser);
116 *parser = NULL;
117 }
118 }
119
120 php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message)
121 {
122 php_http_buffer_t buf;
123 php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_START;
124 TSRMLS_FETCH_FROM_CTX(parser->ts);
125
126 php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
127
128 while (!php_stream_eof(s)) {
129 size_t len = 0;
130 #if DBG_PARSER
131 fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags);
132 #endif
133 switch (state) {
134 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
135 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
136 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
137 /* read line */
138 php_stream_get_line(s, buf.data + buf.used, buf.free, &len);
139 php_http_buffer_account(&buf, len);
140 break;
141
142 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
143 /* read all */
144 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free));
145 break;
146
147 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
148 /* read body_length */
149 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length)));
150 break;
151
152 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
153 /* duh, this is very naive */
154 if (len) {
155 size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free));
156
157 php_http_buffer_account(&buf, read);
158
159 len -= read;
160 } else {
161 php_http_buffer_resize(&buf, 24);
162 php_stream_get_line(s, buf.data, buf.free, &len);
163 php_http_buffer_account(&buf, len);
164
165 len = strtoul(buf.data + buf.used - len, NULL, 16);
166 }
167 break;
168
169 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
170 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
171 /* should not occur */
172 abort();
173 break;
174
175 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
176 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
177 php_http_buffer_dtor(&buf);
178 return php_http_message_parser_state_is(parser);
179 }
180
181 state = php_http_message_parser_parse(parser, &buf, flags, message);
182 }
183
184 php_http_buffer_dtor(&buf);
185 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
186 }
187
188
189 php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
190 {
191 char *str = NULL;
192 size_t len = 0;
193 size_t cut = 0;
194 TSRMLS_FETCH_FROM_CTX(parser->ts);
195
196 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
197 #if DBG_PARSER
198 fprintf(stderr, "#MP: %s (f: %u, t:%d, l:%zu)\n",
199 php_http_message_parser_state_name(php_http_message_parser_state_is(parser)),
200 flags,
201 message && *message ? (*message)->type : -1,
202 buffer->used
203 );
204 _dpf(0, buffer->data, buffer->used);
205 #endif
206
207 switch (php_http_message_parser_state_pop(parser))
208 {
209 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
210 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
211
212 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
213 {
214 char *ptr = buffer->data;
215
216 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
217 ++ptr;
218 }
219
220 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
221
222 if (buffer->used) {
223 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
224 }
225 break;
226 }
227
228 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
229 {
230 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
231
232 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, *message ? &(*message)->hdrs : NULL, (php_http_info_callback_t) php_http_message_info_callback, message)) {
233 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
234 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
235
236 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
237 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
238 break;
239
240 default:
241 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
242 if (buffer->used) {
243 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
244 }
245 }
246 break;
247 }
248
249 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
250 {
251 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
252
253 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
254 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
255 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
256 }
257 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
258 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
259 }
260 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
261 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
262 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
263 }
264
265 /* default */
266 MAKE_STD_ZVAL(h);
267 ZVAL_LONG(h, 0);
268 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
269
270 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
271 * it decides not to read the response body.
272 */
273 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
274 && (*message)->type == PHP_HTTP_RESPONSE
275 && (*message)->http.info.response.code/100 == 3
276 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
277 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
278 ) {
279 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
280 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
281 zval_ptr_dtor(&h_loc);
282 zval_ptr_dtor(&h_con);
283 break;
284 }
285 }
286 if (h_loc) {
287 zval_ptr_dtor(&h_loc);
288 }
289 if (h_con) {
290 zval_ptr_dtor(&h_con);
291 }
292
293 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
294 if (php_http_match(Z_STRVAL_P(h), "gzip", PHP_HTTP_MATCH_WORD)
295 || php_http_match(Z_STRVAL_P(h), "x-gzip", PHP_HTTP_MATCH_WORD)
296 || php_http_match(Z_STRVAL_P(h), "deflate", PHP_HTTP_MATCH_WORD)
297 ) {
298 if (parser->inflate) {
299 php_http_encoding_stream_reset(&parser->inflate);
300 } else {
301 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
302 }
303 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
304 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
305 } else {
306 zval_ptr_dtor(&h);
307 }
308 }
309
310 if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
311 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
312 } else {
313 if (h_te) {
314 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
315 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
316 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
317 break;
318 }
319 }
320
321 if (h_cl) {
322 char *stop;
323
324 if (Z_TYPE_PP(h_cl) == IS_STRING) {
325 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
326
327 if (stop != Z_STRVAL_PP(h_cl)) {
328 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
329 break;
330 }
331 } else if (Z_TYPE_PP(h_cl) == IS_LONG) {
332 parser->body_length = Z_LVAL_PP(h_cl);
333 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
334 break;
335 }
336 }
337
338 if (h_cr) {
339 ulong total = 0, start = 0, end = 0;
340
341 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
342 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
343 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
344 || Z_STRVAL_P(h)[lenof("bytes")] == '='
345 )
346 ) {
347 char *total_at = NULL, *end_at = NULL;
348 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
349
350 start = strtoul(start_at, &end_at, 10);
351 if (end_at) {
352 end = strtoul(end_at + 1, &total_at, 10);
353 if (total_at && strncmp(total_at + 1, "*", 1)) {
354 total = strtoul(total_at + 1, NULL, 10);
355 }
356
357 if (end >= start && (!total || end < total)) {
358 parser->body_length = end + 1 - start;
359 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
360 break;
361 }
362 }
363 }
364 }
365
366
367 if ((*message)->type == PHP_HTTP_REQUEST) {
368 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
369 } else {
370 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
371 }
372 }
373 break;
374 }
375
376 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
377 {
378 if (len) {
379 zval *zcl;
380
381 if (parser->inflate) {
382 char *dec_str = NULL;
383 size_t dec_len;
384
385 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len)) {
386 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
387 }
388
389 if (str != buffer->data) {
390 STR_FREE(str);
391 }
392 str = dec_str;
393 len = dec_len;
394 }
395
396 php_stream_write(php_http_message_body_stream((*message)->body), str, len);
397
398 /* keep track */
399 MAKE_STD_ZVAL(zcl);
400 ZVAL_LONG(zcl, php_http_message_body_size((*message)->body));
401 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
402 }
403
404 if (cut) {
405 php_http_buffer_cut(buffer, 0, cut);
406 }
407
408 if (str != buffer->data) {
409 STR_FREE(str);
410 }
411
412 str = NULL;
413 len = 0;
414 cut = 0;
415 break;
416 }
417
418 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
419 {
420 str = buffer->data;
421 len = buffer->used;
422 cut = len;
423
424 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
425 break;
426 }
427
428 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
429 {
430 len = MIN(parser->body_length, buffer->used);
431 str = buffer->data;
432 cut = len;
433
434 parser->body_length -= len;
435
436 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
437 break;
438 }
439
440 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
441 {
442 /*
443 * - pass available data through the dechunk stream
444 * - pass decoded data along
445 * - if stream zeroed:
446 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
447 * - body done
448 * N: - parse ahaed
449 */
450 char *dec_str = NULL;
451 size_t dec_len;
452
453 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len)) {
454 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
455 }
456
457 str = dec_str;
458 len = dec_len;
459
460 if (php_http_encoding_stream_done(parser->dechunk)) {
461 cut = buffer->used - PHP_HTTP_BUFFER(parser->dechunk->ctx)->used;
462 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
463 } else {
464 cut = buffer->used;
465 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
466 }
467 break;
468 }
469
470 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
471 {
472 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
473
474 if (parser->dechunk) {
475 char *dec_str = NULL;
476 size_t dec_len;
477
478 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len)) {
479 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
480 }
481 php_http_encoding_stream_dtor(parser->dechunk);
482
483 if (dec_str && dec_len) {
484 str = dec_str;
485 len = dec_len;
486 cut = 0;
487 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
488 }
489 }
490
491 break;
492 }
493
494 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
495 char *ptr = buffer->data;
496
497 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
498 ++ptr;
499 }
500
501 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
502
503 if (!(flags & PHP_HTTP_MESSAGE_PARSER_GREEDY)) {
504 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
505 }
506 break;
507 }
508 }
509 }
510
511 return php_http_message_parser_state_is(parser);
512 }
513
514 /*
515 * Local variables:
516 * tab-width: 4
517 * c-basic-offset: 4
518 * End:
519 * vim600: noet sw=4 ts=4 fdm=marker
520 * vim<600: noet sw=4 ts=4
521 */
522