save funccall
[m6w6/ext-http] / php_http_message_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 #ifndef DBG_PARSER
16 # define DBG_PARSER 0
17 #endif
18
19 typedef struct php_http_message_parser_state_spec {
20 php_http_message_parser_state_t state;
21 unsigned need_data:1;
22 } php_http_message_parser_state_spec_t;
23
24 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
25 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
26 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
27 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
28 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
29 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
30 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
31 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
32 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
33 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
34 };
35
36 #if DBG_PARSER
37 const char *php_http_message_parser_state_name(php_http_message_parser_state_t state) {
38 const char *states[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
39
40 if (state < 0 || state > (sizeof(states)/sizeof(char*))-1) {
41 return "FAILURE";
42 }
43 return states[state];
44 }
45 #endif
46
47 php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
48 {
49 if (!parser) {
50 parser = emalloc(sizeof(*parser));
51 }
52 memset(parser, 0, sizeof(*parser));
53
54 TSRMLS_SET_CTX(parser->ts);
55
56 php_http_header_parser_init(&parser->header TSRMLS_CC);
57
58 return parser;
59 }
60
61 php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
62 {
63 php_http_message_parser_state_t state;
64 va_list va_args;
65 unsigned i;
66
67 /* short circuit */
68 ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser->stack), argc);
69
70 va_start(va_args, argc);
71 for (i = 0; i < argc; ++i) {
72 state = va_arg(va_args, php_http_message_parser_state_t);
73 zend_ptr_stack_push(&parser->stack, (void *) state);
74 }
75 va_end(va_args);
76
77 return state;
78 }
79
80 php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
81 {
82 if (parser->stack.top) {
83 return (php_http_message_parser_state_t) zend_ptr_stack_top(&parser->stack);
84 }
85 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
86 }
87
88 php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
89 {
90 if (parser->stack.top) {
91 return (php_http_message_parser_state_t) zend_ptr_stack_pop(&parser->stack);
92 }
93 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
94 }
95
96 void php_http_message_parser_dtor(php_http_message_parser_t *parser)
97 {
98 php_http_header_parser_dtor(&parser->header);
99 zend_ptr_stack_destroy(&parser->stack);
100 if (parser->dechunk) {
101 php_http_encoding_stream_free(&parser->dechunk);
102 }
103 if (parser->inflate) {
104 php_http_encoding_stream_free(&parser->inflate);
105 }
106 }
107
108 void php_http_message_parser_free(php_http_message_parser_t **parser)
109 {
110 if (*parser) {
111 php_http_message_parser_dtor(*parser);
112 efree(*parser);
113 *parser = NULL;
114 }
115 }
116
117 php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, unsigned flags, php_http_message_t **message)
118 {
119 php_http_buffer_t buf;
120 php_http_message_parser_state_t state = PHP_HTTP_MESSAGE_PARSER_STATE_START;
121 TSRMLS_FETCH_FROM_CTX(parser->ts);
122
123 php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
124
125 while (!php_stream_eof(s)) {
126 size_t len = 0;
127 #if DBG_PARSER
128 fprintf(stderr, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state), flags);
129 #endif
130 switch (state) {
131 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
132 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
133 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
134 /* read line */
135 php_stream_get_line(s, buf.data + buf.used, buf.free, &len);
136 php_http_buffer_account(&buf, len);
137 break;
138
139 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
140 /* read all */
141 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free));
142 break;
143
144 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
145 /* read body_length */
146 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length)));
147 break;
148
149 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
150 /* duh, this is very naive */
151 if (len) {
152 size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free));
153
154 php_http_buffer_account(&buf, read);
155
156 len -= read;
157 } else {
158 php_http_buffer_resize(&buf, 24);
159 php_stream_get_line(s, buf.data, buf.free, &len);
160 php_http_buffer_account(&buf, len);
161
162 len = strtoul(buf.data + buf.used - len, NULL, 16);
163 }
164 break;
165
166 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
167 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
168 /* should not occur */
169 abort();
170 break;
171
172 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
173 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
174 php_http_buffer_dtor(&buf);
175 return php_http_message_parser_state_is(parser);
176 }
177
178 state = php_http_message_parser_parse(parser, &buf, flags, message);
179 }
180
181 php_http_buffer_dtor(&buf);
182 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
183 }
184
185
186 php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
187 {
188 char *str = NULL;
189 size_t len = 0;
190 size_t cut = 0;
191 TSRMLS_FETCH_FROM_CTX(parser->ts);
192
193 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
194 #if DBG_PARSER
195 fprintf(stderr, "#MP: %s (f: %u, t:%d, l:%zu)\n",
196 php_http_message_parser_state_name(php_http_message_parser_state_is(parser)),
197 flags,
198 message && *message ? (*message)->type : -1,
199 buffer->used
200 );
201 _dpf(0, buffer->data, buffer->used);
202 #endif
203
204 switch (php_http_message_parser_state_pop(parser))
205 {
206 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
207 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
208
209 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
210 {
211 char *ptr = buffer->data;
212
213 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
214 ++ptr;
215 }
216
217 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
218
219 if (buffer->used) {
220 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
221 }
222 break;
223 }
224
225 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
226 {
227 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
228
229 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, *message ? &(*message)->hdrs : NULL, (php_http_info_callback_t) php_http_message_info_callback, message)) {
230 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
231 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
232
233 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
234 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
235 break;
236
237 default:
238 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
239 if (buffer->used) {
240 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
241 }
242 }
243 break;
244 }
245
246 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
247 {
248 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
249
250 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
251 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
252 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
253 }
254 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
255 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
256 }
257 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
258 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
259 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
260 }
261
262 /* default */
263 MAKE_STD_ZVAL(h);
264 ZVAL_LONG(h, 0);
265 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
266
267 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
268 * it decides not to read the response body.
269 */
270 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
271 && (*message)->type == PHP_HTTP_RESPONSE
272 && (*message)->http.info.response.code/100 == 3
273 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
274 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
275 ) {
276 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
277 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
278 zval_ptr_dtor(&h_loc);
279 zval_ptr_dtor(&h_con);
280 break;
281 }
282 }
283 if (h_loc) {
284 zval_ptr_dtor(&h_loc);
285 }
286 if (h_con) {
287 zval_ptr_dtor(&h_con);
288 }
289
290 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
291 if (php_http_match(Z_STRVAL_P(h), "gzip", PHP_HTTP_MATCH_WORD)
292 || php_http_match(Z_STRVAL_P(h), "x-gzip", PHP_HTTP_MATCH_WORD)
293 || php_http_match(Z_STRVAL_P(h), "deflate", PHP_HTTP_MATCH_WORD)
294 ) {
295 if (parser->inflate) {
296 php_http_encoding_stream_reset(&parser->inflate);
297 } else {
298 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
299 }
300 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
301 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
302 } else {
303 zval_ptr_dtor(&h);
304 }
305 }
306
307 if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
308 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
309 } else {
310 if (h_te) {
311 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
312 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
313 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
314 break;
315 }
316 }
317
318 if (h_cl) {
319 char *stop;
320
321 if (Z_TYPE_PP(h_cl) == IS_STRING) {
322 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
323
324 if (stop != Z_STRVAL_PP(h_cl)) {
325 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
326 break;
327 }
328 } else if (Z_TYPE_PP(h_cl) == IS_LONG) {
329 parser->body_length = Z_LVAL_PP(h_cl);
330 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
331 break;
332 }
333 }
334
335 if (h_cr) {
336 ulong total = 0, start = 0, end = 0;
337
338 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
339 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
340 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
341 || Z_STRVAL_P(h)[lenof("bytes")] == '='
342 )
343 ) {
344 char *total_at = NULL, *end_at = NULL;
345 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
346
347 start = strtoul(start_at, &end_at, 10);
348 if (end_at) {
349 end = strtoul(end_at + 1, &total_at, 10);
350 if (total_at && strncmp(total_at + 1, "*", 1)) {
351 total = strtoul(total_at + 1, NULL, 10);
352 }
353
354 if (end >= start && (!total || end < total)) {
355 parser->body_length = end + 1 - start;
356 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
357 break;
358 }
359 }
360 }
361 }
362
363
364 if ((*message)->type == PHP_HTTP_REQUEST) {
365 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
366 } else {
367 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
368 }
369 }
370 break;
371 }
372
373 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
374 {
375 if (len) {
376 zval *zcl;
377
378 if (parser->inflate) {
379 char *dec_str = NULL;
380 size_t dec_len;
381
382 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len)) {
383 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
384 }
385
386 if (str != buffer->data) {
387 STR_FREE(str);
388 }
389 str = dec_str;
390 len = dec_len;
391 }
392
393 php_stream_write(php_http_message_body_stream((*message)->body), str, len);
394
395 /* keep track */
396 MAKE_STD_ZVAL(zcl);
397 ZVAL_LONG(zcl, php_http_message_body_size((*message)->body));
398 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
399 }
400
401 if (cut) {
402 php_http_buffer_cut(buffer, 0, cut);
403 }
404
405 if (str != buffer->data) {
406 STR_FREE(str);
407 }
408
409 str = NULL;
410 len = 0;
411 cut = 0;
412 break;
413 }
414
415 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
416 {
417 str = buffer->data;
418 len = buffer->used;
419 cut = len;
420
421 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
422 break;
423 }
424
425 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
426 {
427 len = MIN(parser->body_length, buffer->used);
428 str = buffer->data;
429 cut = len;
430
431 parser->body_length -= len;
432
433 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
434 break;
435 }
436
437 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
438 {
439 /*
440 * - pass available data through the dechunk stream
441 * - pass decoded data along
442 * - if stream zeroed:
443 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
444 * - body done
445 * N: - parse ahaed
446 */
447 char *dec_str = NULL;
448 size_t dec_len;
449
450 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len)) {
451 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
452 }
453
454 str = dec_str;
455 len = dec_len;
456
457 if (php_http_encoding_stream_done(parser->dechunk)) {
458 cut = buffer->used - PHP_HTTP_BUFFER(parser->dechunk->ctx)->used;
459 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
460 } else {
461 cut = buffer->used;
462 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
463 }
464 break;
465 }
466
467 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
468 {
469 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
470
471 if (parser->dechunk) {
472 char *dec_str = NULL;
473 size_t dec_len;
474
475 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len)) {
476 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
477 }
478 php_http_encoding_stream_dtor(parser->dechunk);
479
480 if (dec_str && dec_len) {
481 str = dec_str;
482 len = dec_len;
483 cut = 0;
484 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
485 }
486 }
487
488 break;
489 }
490
491 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
492 char *ptr = buffer->data;
493
494 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
495 ++ptr;
496 }
497
498 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
499
500 if (!(flags & PHP_HTTP_MESSAGE_PARSER_GREEDY)) {
501 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
502 }
503 break;
504 }
505 }
506 }
507
508 return php_http_message_parser_state_is(parser);
509 }
510
511 /*
512 * Local variables:
513 * tab-width: 4
514 * c-basic-offset: 4
515 * End:
516 * vim600: noet sw=4 ts=4 fdm=marker
517 * vim<600: noet sw=4 ts=4
518 */
519