fae3df3f46fc9021e5549bfd5ebfa8890984e2dd
[m6w6/ext-http] / php_http_message_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2011, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http_api.h"
14
15 typedef struct php_http_message_parser_state_spec {
16 php_http_message_parser_state_t state;
17 unsigned need_data:1;
18 } php_http_message_parser_state_spec_t;
19
20 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
21 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
22 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
23 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
24 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
25 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
26 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
27 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
28 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
29 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
30 };
31
32 PHP_HTTP_API php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
33 {
34 if (!parser) {
35 parser = emalloc(sizeof(*parser));
36 }
37 memset(parser, 0, sizeof(*parser));
38
39 TSRMLS_SET_CTX(parser->ts);
40
41 php_http_header_parser_init(&parser->header TSRMLS_CC);
42 zend_stack_init(&parser->stack);
43
44 return parser;
45 }
46
47 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
48 {
49 php_http_message_parser_state_t state;
50 va_list va_args;
51 unsigned i;
52
53 va_start(va_args, argc);
54 for (i = 0; i < argc; ++i) {
55 state = va_arg(va_args, php_http_message_parser_state_t);
56 zend_stack_push(&parser->stack, &state, sizeof(state));
57 }
58 va_end(va_args);
59
60 return state;
61 }
62
63 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
64 {
65 php_http_message_parser_state_t *state;
66
67 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state)) {
68 return *state;
69 }
70 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
71 }
72
73 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
74 {
75 php_http_message_parser_state_t state, *state_ptr;
76 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state_ptr)) {
77 state = *state_ptr;
78 zend_stack_del_top(&parser->stack);
79 return state;
80 }
81 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
82 }
83
84 PHP_HTTP_API void php_http_message_parser_dtor(php_http_message_parser_t *parser)
85 {
86 php_http_header_parser_dtor(&parser->header);
87 zend_stack_destroy(&parser->stack);
88 if (parser->dechunk) {
89 php_http_encoding_stream_free(&parser->dechunk);
90 }
91 if (parser->inflate) {
92 php_http_encoding_stream_free(&parser->inflate);
93 }
94 }
95
96 PHP_HTTP_API void php_http_message_parser_free(php_http_message_parser_t **parser)
97 {
98 if (*parser) {
99 php_http_message_parser_dtor(*parser);
100 efree(*parser);
101 *parser = NULL;
102 }
103 }
104
105 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse_stream(php_http_message_parser_t *parser, php_stream *s, php_http_message_t **message)
106 {
107 php_http_buffer_t buf;
108 TSRMLS_FETCH_FROM_CTX(parser->ts);
109
110 php_http_buffer_init_ex(&buf, 0x1000, PHP_HTTP_BUFFER_INIT_PREALLOC);
111
112 while (!php_stream_eof(s)) {
113 size_t len = 0;
114
115 switch (php_http_message_parser_state_is(parser)) {
116 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
117 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
118 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
119 /* read line */
120 php_stream_get_line(s, buf.data + buf.used, buf.free, &len);
121 php_http_buffer_account(&buf, len);
122 break;
123
124 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
125 /* read all */
126 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, buf.free));
127 break;
128
129 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
130 /* read body_length */
131 php_http_buffer_account(&buf, php_stream_read(s, buf.data + buf.used, MIN(buf.free, parser->body_length)));
132 break;
133
134 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
135 /* duh, this is very naive */
136 if (len) {
137 size_t read = php_stream_read(s, buf.data + buf.used, MIN(len, buf.free));
138
139 php_http_buffer_account(&buf, read);
140
141 len -= read;
142 } else {
143 php_http_buffer_resize(&buf, 24);
144 php_stream_get_line(s, buf.data, buf.free, &len);
145 php_http_buffer_account(&buf, len);
146
147 len = strtoul(buf.data + buf.used - len, NULL, 16);
148 }
149 break;
150
151 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
152 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
153 /* should not occur */
154 abort();
155 break;
156
157 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
158 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
159 php_http_buffer_dtor(&buf);
160 return php_http_message_parser_state_is(parser);
161 }
162
163 php_http_message_parser_parse(parser, &buf, 0, message);
164 }
165
166 php_http_buffer_dtor(&buf);
167 return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
168 }
169
170
171 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
172 {
173 char *str = NULL;
174 size_t len = 0;
175 size_t cut = 0;
176 TSRMLS_FETCH_FROM_CTX(parser->ts);
177
178 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
179 #if 0
180 const char *state[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
181 fprintf(stderr, "#MP: %s (%d)\n", php_http_message_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_message_parser_state_is(parser)], message && *message ? (*message)->type : -1);
182 _dpf(0, buffer->data, buffer->used);
183 #endif
184
185 switch (php_http_message_parser_state_pop(parser))
186 {
187 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
188 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
189
190 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
191 {
192 char *ptr = buffer->data;
193
194 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
195 ++ptr;
196 }
197
198 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
199
200 if (buffer->used) {
201 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
202 }
203 break;
204 }
205
206 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
207 {
208 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
209
210 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, *message ? &(*message)->hdrs : NULL, (php_http_info_callback_t) php_http_message_info_callback, message)) {
211 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
212 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
213
214 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
215 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
216 break;
217
218 default:
219 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
220 if (buffer->used) {
221 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
222 }
223 }
224 break;
225 }
226
227 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
228 {
229 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
230
231 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
232 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
233 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
234 }
235 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
236 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
237 }
238 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
239 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
240 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
241 }
242
243 /* default */
244 MAKE_STD_ZVAL(h);
245 ZVAL_LONG(h, 0);
246 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
247
248 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
249 * it decides not to read the response body.
250 */
251 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
252 && (*message)->type == PHP_HTTP_RESPONSE
253 && (*message)->http.info.response.code/100 == 3
254 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
255 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
256 ) {
257 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
258 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
259 zval_ptr_dtor(&h_loc);
260 zval_ptr_dtor(&h_con);
261 break;
262 }
263 }
264 if (h_loc) {
265 zval_ptr_dtor(&h_loc);
266 }
267 if (h_con) {
268 zval_ptr_dtor(&h_con);
269 }
270
271 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
272 if (php_http_match(Z_STRVAL_P(h), "gzip", PHP_HTTP_MATCH_WORD)
273 || php_http_match(Z_STRVAL_P(h), "x-gzip", PHP_HTTP_MATCH_WORD)
274 || php_http_match(Z_STRVAL_P(h), "deflate", PHP_HTTP_MATCH_WORD)
275 ) {
276 if (parser->inflate) {
277 php_http_encoding_stream_reset(&parser->inflate);
278 } else {
279 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
280 }
281 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
282 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
283 } else {
284 zval_ptr_dtor(&h);
285 }
286 }
287
288 if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
289 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
290 } else {
291 if (h_te) {
292 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
293 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
294 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
295 break;
296 }
297 }
298
299 if (h_cl) {
300 char *stop;
301
302 if (Z_TYPE_PP(h_cl) == IS_STRING) {
303 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
304
305 if (stop != Z_STRVAL_PP(h_cl)) {
306 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
307 break;
308 }
309 } else if (Z_TYPE_PP(h_cl) == IS_LONG) {
310 parser->body_length = Z_LVAL_PP(h_cl);
311 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
312 break;
313 }
314 }
315
316 if (h_cr) {
317 ulong total = 0, start = 0, end = 0;
318
319 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
320 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
321 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
322 || Z_STRVAL_P(h)[lenof("bytes")] == '='
323 )
324 ) {
325 char *total_at = NULL, *end_at = NULL;
326 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
327
328 start = strtoul(start_at, &end_at, 10);
329 if (end_at) {
330 end = strtoul(end_at + 1, &total_at, 10);
331 if (total_at && strncmp(total_at + 1, "*", 1)) {
332 total = strtoul(total_at + 1, NULL, 10);
333 }
334
335 if (end >= start && (!total || end < total)) {
336 parser->body_length = end + 1 - start;
337 php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
338 break;
339 }
340 }
341 }
342 }
343
344
345 if ((*message)->type == PHP_HTTP_REQUEST) {
346 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
347 } else {
348 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
349 }
350 }
351 break;
352 }
353
354 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
355 {
356 if (len) {
357 zval *zcl;
358
359 if (parser->inflate) {
360 char *dec_str = NULL;
361 size_t dec_len;
362
363 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len)) {
364 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
365 }
366
367 if (str != buffer->data) {
368 STR_FREE(str);
369 }
370 str = dec_str;
371 len = dec_len;
372 }
373
374 php_stream_write(php_http_message_body_stream(&(*message)->body), str, len);
375
376 /* keep track */
377 MAKE_STD_ZVAL(zcl);
378 ZVAL_LONG(zcl, php_http_message_body_size(&(*message)->body));
379 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
380 }
381
382 if (cut) {
383 php_http_buffer_cut(buffer, 0, cut);
384 }
385
386 if (str != buffer->data) {
387 STR_FREE(str);
388 }
389
390 str = NULL;
391 len = 0;
392 cut = 0;
393 break;
394 }
395
396 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
397 {
398 str = buffer->data;
399 len = buffer->used;
400 cut = len;
401
402 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
403 break;
404 }
405
406 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
407 {
408 len = MIN(parser->body_length, buffer->used);
409 str = buffer->data;
410 cut = len;
411
412 parser->body_length -= len;
413
414 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
415 break;
416 }
417
418 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
419 {
420 /*
421 * - pass available data through the dechunk stream
422 * - pass decoded data along
423 * - if stream zeroed:
424 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
425 * - body done
426 * N: - parse ahaed
427 */
428 char *dec_str = NULL;
429 size_t dec_len;
430
431 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len)) {
432 return FAILURE;
433 }
434
435 str = dec_str;
436 len = dec_len;
437
438 if (php_http_encoding_stream_done(parser->dechunk)) {
439 cut = buffer->used - PHP_HTTP_BUFFER_LEN(parser->dechunk->ctx);
440 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
441 } else {
442 cut = buffer->used;
443 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
444 }
445 break;
446 }
447
448 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
449 {
450 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
451
452 if (parser->dechunk) {
453 char *dec_str = NULL;
454 size_t dec_len;
455
456 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len)) {
457 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
458 }
459 php_http_encoding_stream_dtor(parser->dechunk);
460
461 if (dec_str && dec_len) {
462 str = dec_str;
463 len = dec_len;
464 cut = 0;
465 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
466 }
467 }
468
469 break;
470 }
471
472 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
473 char *ptr = buffer->data;
474
475 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
476 ++ptr;
477 }
478
479 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
480 break;
481 }
482 }
483 }
484
485 return php_http_message_parser_state_is(parser);
486 }
487
488 /*
489 * Local variables:
490 * tab-width: 4
491 * c-basic-offset: 4
492 * End:
493 * vim600: noet sw=4 ts=4 fdm=marker
494 * vim<600: noet sw=4 ts=4
495 */
496