2 +--------------------------------------------------------------------+
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2014, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
13 #include "php_http_api.h"
19 typedef struct php_http_message_parser_state_spec
{
20 php_http_message_parser_state_t state
;
22 } php_http_message_parser_state_spec_t
;
24 static const php_http_message_parser_state_spec_t php_http_message_parser_states
[] = {
25 {PHP_HTTP_MESSAGE_PARSER_STATE_START
, 1},
26 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
, 1},
27 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE
, 0},
28 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY
, 0},
29 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB
, 1},
30 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
, 1},
31 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED
, 1},
32 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
, 0},
33 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE
, 0}
37 const char *php_http_message_parser_state_name(php_http_message_parser_state_t state
) {
38 const char *states
[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
40 if (state
< 0 || state
> (sizeof(states
)/sizeof(char*))-1) {
47 php_http_message_parser_t
*php_http_message_parser_init(php_http_message_parser_t
*parser TSRMLS_DC
)
50 parser
= emalloc(sizeof(*parser
));
52 memset(parser
, 0, sizeof(*parser
));
54 TSRMLS_SET_CTX(parser
->ts
);
56 php_http_header_parser_init(&parser
->header TSRMLS_CC
);
61 php_http_message_parser_state_t
php_http_message_parser_state_push(php_http_message_parser_t
*parser
, unsigned argc
, ...)
63 php_http_message_parser_state_t state
;
68 ZEND_PTR_STACK_RESIZE_IF_NEEDED((&parser
->stack
), argc
);
70 va_start(va_args
, argc
);
71 for (i
= 0; i
< argc
; ++i
) {
72 state
= va_arg(va_args
, php_http_message_parser_state_t
);
73 zend_ptr_stack_push(&parser
->stack
, (void *) state
);
80 php_http_message_parser_state_t
php_http_message_parser_state_is(php_http_message_parser_t
*parser
)
82 if (parser
->stack
.top
) {
83 return (php_http_message_parser_state_t
) parser
->stack
.elements
[parser
->stack
.top
- 1];
85 return PHP_HTTP_MESSAGE_PARSER_STATE_START
;
88 php_http_message_parser_state_t
php_http_message_parser_state_pop(php_http_message_parser_t
*parser
)
90 if (parser
->stack
.top
) {
91 return (php_http_message_parser_state_t
) zend_ptr_stack_pop(&parser
->stack
);
93 return PHP_HTTP_MESSAGE_PARSER_STATE_START
;
96 void php_http_message_parser_dtor(php_http_message_parser_t
*parser
)
98 php_http_header_parser_dtor(&parser
->header
);
99 zend_ptr_stack_destroy(&parser
->stack
);
100 if (parser
->dechunk
) {
101 php_http_encoding_stream_free(&parser
->dechunk
);
103 if (parser
->inflate
) {
104 php_http_encoding_stream_free(&parser
->inflate
);
108 void php_http_message_parser_free(php_http_message_parser_t
**parser
)
111 php_http_message_parser_dtor(*parser
);
117 php_http_message_parser_state_t
php_http_message_parser_parse_stream(php_http_message_parser_t
*parser
, php_stream
*s
, unsigned flags
, php_http_message_t
**message
)
119 php_http_buffer_t buf
;
120 php_http_message_parser_state_t state
= PHP_HTTP_MESSAGE_PARSER_STATE_START
;
121 TSRMLS_FETCH_FROM_CTX(parser
->ts
);
123 php_http_buffer_init_ex(&buf
, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC
);
125 while (!php_stream_eof(s
)) {
128 fprintf(stderr
, "#SP: %s (f:%u)\n", php_http_message_parser_state_name(state
), flags
);
131 case PHP_HTTP_MESSAGE_PARSER_STATE_START
:
132 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
:
133 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE
:
135 php_stream_get_line(s
, buf
.data
+ buf
.used
, buf
.free
, &len
);
136 php_http_buffer_account(&buf
, len
);
139 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB
:
141 php_http_buffer_account(&buf
, php_stream_read(s
, buf
.data
+ buf
.used
, buf
.free
));
144 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
:
145 /* read body_length */
146 php_http_buffer_account(&buf
, php_stream_read(s
, buf
.data
+ buf
.used
, MIN(buf
.free
, parser
->body_length
)));
149 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED
:
150 /* duh, this is very naive */
152 size_t read
= php_stream_read(s
, buf
.data
+ buf
.used
, MIN(len
, buf
.free
));
154 php_http_buffer_account(&buf
, read
);
158 php_http_buffer_resize(&buf
, 24);
159 php_stream_get_line(s
, buf
.data
, buf
.free
, &len
);
160 php_http_buffer_account(&buf
, len
);
162 len
= strtoul(buf
.data
+ buf
.used
- len
, NULL
, 16);
166 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY
:
167 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:
168 /* should not occur */
172 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE
:
173 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
:
174 php_http_buffer_dtor(&buf
);
175 return php_http_message_parser_state_is(parser
);
178 state
= php_http_message_parser_parse(parser
, &buf
, flags
, message
);
181 php_http_buffer_dtor(&buf
);
182 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE
;
186 php_http_message_parser_state_t
php_http_message_parser_parse(php_http_message_parser_t
*parser
, php_http_buffer_t
*buffer
, unsigned flags
, php_http_message_t
**message
)
191 TSRMLS_FETCH_FROM_CTX(parser
->ts
);
193 while (buffer
->used
|| !php_http_message_parser_states
[php_http_message_parser_state_is(parser
)].need_data
) {
195 fprintf(stderr
, "#MP: %s (f: %u, t:%d, l:%zu)\n",
196 php_http_message_parser_state_name(php_http_message_parser_state_is(parser
)),
198 message
&& *message
? (*message
)->type
: -1,
201 _dpf(0, buffer
->data
, buffer
->used
);
204 switch (php_http_message_parser_state_pop(parser
))
206 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
:
207 return php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
);
209 case PHP_HTTP_MESSAGE_PARSER_STATE_START
:
211 char *ptr
= buffer
->data
;
213 while (ptr
- buffer
->data
< buffer
->used
&& PHP_HTTP_IS_CTYPE(space
, *ptr
)) {
217 php_http_buffer_cut(buffer
, 0, ptr
- buffer
->data
);
220 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
);
225 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
:
227 unsigned header_parser_flags
= (flags
& PHP_HTTP_MESSAGE_PARSER_CLEANUP
) ? PHP_HTTP_HEADER_PARSER_CLEANUP
: 0;
229 switch (php_http_header_parser_parse(&parser
->header
, buffer
, header_parser_flags
, *message
? &(*message
)->hdrs
: NULL
, (php_http_info_callback_t
) php_http_message_info_callback
, message
)) {
230 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE
:
231 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
;
233 case PHP_HTTP_HEADER_PARSER_STATE_DONE
:
234 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE
);
238 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
);
240 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER
;
246 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE
:
248 zval
*h
, *h_loc
= NULL
, *h_con
= NULL
, **h_cl
= NULL
, **h_cr
= NULL
, **h_te
= NULL
;
250 if ((h
= php_http_message_header(*message
, ZEND_STRL("Transfer-Encoding"), 1))) {
251 zend_hash_update(&(*message
)->hdrs
, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h
, sizeof(zval
*), (void *) &h_te
);
252 zend_hash_del(&(*message
)->hdrs
, "Transfer-Encoding", sizeof("Transfer-Encoding"));
254 if ((h
= php_http_message_header(*message
, ZEND_STRL("Content-Length"), 1))) {
255 zend_hash_update(&(*message
)->hdrs
, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h
, sizeof(zval
*), (void *) &h_cl
);
257 if ((h
= php_http_message_header(*message
, ZEND_STRL("Content-Range"), 1))) {
258 zend_hash_update(&(*message
)->hdrs
, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h
, sizeof(zval
*), (void *) &h_cr
);
259 zend_hash_del(&(*message
)->hdrs
, "Content-Range", sizeof("Content-Range"));
265 zend_hash_update(&(*message
)->hdrs
, "Content-Length", sizeof("Content-Length"), &h
, sizeof(zval
*), NULL
);
267 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
268 * it decides not to read the response body.
270 if ((flags
& PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS
)
271 && (*message
)->type
== PHP_HTTP_RESPONSE
272 && (*message
)->http
.info
.response
.code
/100 == 3
273 && (h_loc
= php_http_message_header(*message
, ZEND_STRL("Location"), 1))
274 && (h_con
= php_http_message_header(*message
, ZEND_STRL("Connection"), 1))
276 if (php_http_match(Z_STRVAL_P(h_con
), "close", PHP_HTTP_MATCH_WORD
)) {
277 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE
);
278 zval_ptr_dtor(&h_loc
);
279 zval_ptr_dtor(&h_con
);
284 zval_ptr_dtor(&h_loc
);
287 zval_ptr_dtor(&h_con
);
290 if ((h
= php_http_message_header(*message
, ZEND_STRL("Content-Encoding"), 1))) {
291 if (php_http_match(Z_STRVAL_P(h
), "gzip", PHP_HTTP_MATCH_WORD
)
292 || php_http_match(Z_STRVAL_P(h
), "x-gzip", PHP_HTTP_MATCH_WORD
)
293 || php_http_match(Z_STRVAL_P(h
), "deflate", PHP_HTTP_MATCH_WORD
)
295 if (parser
->inflate
) {
296 php_http_encoding_stream_reset(&parser
->inflate
);
298 parser
->inflate
= php_http_encoding_stream_init(NULL
, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC
);
300 zend_hash_update(&(*message
)->hdrs
, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h
, sizeof(zval
*), NULL
);
301 zend_hash_del(&(*message
)->hdrs
, "Content-Encoding", sizeof("Content-Encoding"));
307 if ((flags
& PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES
)) {
308 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB
);
311 if (strstr(Z_STRVAL_PP(h_te
), "chunked")) {
312 parser
->dechunk
= php_http_encoding_stream_init(parser
->dechunk
, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC
);
313 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED
);
321 if (Z_TYPE_PP(h_cl
) == IS_STRING
) {
322 parser
->body_length
= strtoul(Z_STRVAL_PP(h_cl
), &stop
, 10);
324 if (stop
!= Z_STRVAL_PP(h_cl
)) {
325 php_http_message_parser_state_push(parser
, 1, !parser
->body_length
?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
);
328 } else if (Z_TYPE_PP(h_cl
) == IS_LONG
) {
329 parser
->body_length
= Z_LVAL_PP(h_cl
);
330 php_http_message_parser_state_push(parser
, 1, !parser
->body_length
?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
);
336 ulong total
= 0, start
= 0, end
= 0;
338 if (!strncasecmp(Z_STRVAL_PP(h_cr
), "bytes", lenof("bytes"))
339 && ( Z_STRVAL_P(h
)[lenof("bytes")] == ':'
340 || Z_STRVAL_P(h
)[lenof("bytes")] == ' '
341 || Z_STRVAL_P(h
)[lenof("bytes")] == '='
344 char *total_at
= NULL
, *end_at
= NULL
;
345 char *start_at
= Z_STRVAL_PP(h_cr
) + sizeof("bytes");
347 start
= strtoul(start_at
, &end_at
, 10);
349 end
= strtoul(end_at
+ 1, &total_at
, 10);
350 if (total_at
&& strncmp(total_at
+ 1, "*", 1)) {
351 total
= strtoul(total_at
+ 1, NULL
, 10);
354 if (end
>= start
&& (!total
|| end
< total
)) {
355 parser
->body_length
= end
+ 1 - start
;
356 php_http_message_parser_state_push(parser
, 1, !parser
->body_length
?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
);
364 if ((*message
)->type
== PHP_HTTP_REQUEST
) {
365 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE
);
367 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB
);
373 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY
:
378 if (parser
->inflate
) {
379 char *dec_str
= NULL
;
382 if (SUCCESS
!= php_http_encoding_stream_update(parser
->inflate
, str
, len
, &dec_str
, &dec_len
)) {
383 return php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
);
386 if (str
!= buffer
->data
) {
393 php_stream_write(php_http_message_body_stream((*message
)->body
), str
, len
);
397 ZVAL_LONG(zcl
, php_http_message_body_size((*message
)->body
));
398 zend_hash_update(&(*message
)->hdrs
, "Content-Length", sizeof("Content-Length"), &zcl
, sizeof(zval
*), NULL
);
402 php_http_buffer_cut(buffer
, 0, cut
);
405 if (str
!= buffer
->data
) {
415 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB
:
421 php_http_message_parser_state_push(parser
, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
, PHP_HTTP_MESSAGE_PARSER_STATE_BODY
);
425 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
:
427 len
= MIN(parser
->body_length
, buffer
->used
);
431 parser
->body_length
-= len
;
433 php_http_message_parser_state_push(parser
, 2, !parser
->body_length
?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH
, PHP_HTTP_MESSAGE_PARSER_STATE_BODY
);
437 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED
:
440 * - pass available data through the dechunk stream
441 * - pass decoded data along
442 * - if stream zeroed:
443 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
447 char *dec_str
= NULL
;
450 if (SUCCESS
!= php_http_encoding_stream_update(parser
->dechunk
, buffer
->data
, buffer
->used
, &dec_str
, &dec_len
)) {
451 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
;
457 if (php_http_encoding_stream_done(parser
->dechunk
)) {
458 cut
= buffer
->used
- PHP_HTTP_BUFFER(parser
->dechunk
->ctx
)->used
;
459 php_http_message_parser_state_push(parser
, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
, PHP_HTTP_MESSAGE_PARSER_STATE_BODY
);
462 php_http_message_parser_state_push(parser
, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED
, PHP_HTTP_MESSAGE_PARSER_STATE_BODY
);
467 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE
:
469 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE
);
471 if (parser
->dechunk
) {
472 char *dec_str
= NULL
;
475 if (SUCCESS
!= php_http_encoding_stream_finish(parser
->dechunk
, &dec_str
, &dec_len
)) {
476 return php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE
);
478 php_http_encoding_stream_dtor(parser
->dechunk
);
480 if (dec_str
&& dec_len
) {
484 php_http_message_parser_state_push(parser
, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY
);
491 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE
: {
492 char *ptr
= buffer
->data
;
494 while (ptr
- buffer
->data
< buffer
->used
&& PHP_HTTP_IS_CTYPE(space
, *ptr
)) {
498 php_http_buffer_cut(buffer
, 0, ptr
- buffer
->data
);
500 if (!(flags
& PHP_HTTP_MESSAGE_PARSER_GREEDY
)) {
501 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE
;
508 return php_http_message_parser_state_is(parser
);
516 * vim600: noet sw=4 ts=4 fdm=marker
517 * vim<600: noet sw=4 ts=4