use the new params parser
[m6w6/ext-http] / php_http_message_parser.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2011, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 #include "php_http.h"
14
15 typedef struct php_http_message_parser_state_spec {
16 php_http_message_parser_state_t state;
17 unsigned need_data:1;
18 } php_http_message_parser_state_spec_t;
19
20 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
21 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
22 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
23 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
24 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
25 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
26 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
27 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
28 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
29 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
30 };
31
32 PHP_HTTP_API php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
33 {
34 if (!parser) {
35 parser = emalloc(sizeof(*parser));
36 }
37 memset(parser, 0, sizeof(*parser));
38
39 TSRMLS_SET_CTX(parser->ts);
40
41 php_http_header_parser_init(&parser->header TSRMLS_CC);
42 zend_stack_init(&parser->stack);
43
44 return parser;
45 }
46
47 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
48 {
49 va_list va_args;
50 unsigned i;
51 va_start(va_args, argc);
52 php_http_message_parser_state_t state;
53
54 for (i = 0; i < argc; ++i) {
55 state = va_arg(va_args, php_http_message_parser_state_t);
56 zend_stack_push(&parser->stack, &state, sizeof(state));
57 }
58 va_end(va_args);
59
60 return state;
61 }
62
63 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
64 {
65 php_http_message_parser_state_t *state;
66
67 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state)) {
68 return *state;
69 }
70 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
71 }
72
73 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
74 {
75 php_http_message_parser_state_t state, *state_ptr;
76 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state_ptr)) {
77 state = *state_ptr;
78 zend_stack_del_top(&parser->stack);
79 return state;
80 }
81 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
82 }
83
84 PHP_HTTP_API void php_http_message_parser_dtor(php_http_message_parser_t *parser)
85 {
86 php_http_header_parser_dtor(&parser->header);
87 zend_stack_destroy(&parser->stack);
88 if (parser->dechunk) {
89 php_http_encoding_stream_free(&parser->dechunk);
90 }
91 if (parser->inflate) {
92 php_http_encoding_stream_free(&parser->inflate);
93 }
94 }
95
96 PHP_HTTP_API void php_http_message_parser_free(php_http_message_parser_t **parser)
97 {
98 if (*parser) {
99 php_http_message_parser_dtor(*parser);
100 efree(*parser);
101 *parser = NULL;
102 }
103 }
104
105
106 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
107 {
108 TSRMLS_FETCH_FROM_CTX(parser->ts);
109 char *str = NULL;
110 size_t len = 0;
111 size_t cut = 0;
112
113 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
114 #if 0
115 const char *state[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
116 fprintf(stderr, "#MP: %s (%d)\n", php_http_message_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_message_parser_state_is(parser)], (*message)->type);
117 _dpf(0, buffer->data, buffer->used);
118 #endif
119
120 switch (php_http_message_parser_state_pop(parser))
121 {
122 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
123 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
124
125 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
126 {
127 char *ptr = buffer->data;
128
129 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
130 ++ptr;
131 }
132
133 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
134
135 if (buffer->used) {
136 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
137 }
138 break;
139 }
140
141 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
142 {
143 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
144
145 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, &(*message)->hdrs, (php_http_info_callback_t) php_http_message_info_callback, message)) {
146 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
147 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
148
149 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
150 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
151 break;
152
153 default:
154 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
155 if (buffer->used) {
156 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
157 }
158 }
159 break;
160 }
161
162 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
163 {
164 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
165
166 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
167 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
168 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
169 }
170 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
171 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
172 }
173 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
174 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
175 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
176 }
177
178 /* default */
179 MAKE_STD_ZVAL(h);
180 ZVAL_LONG(h, 0);
181 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
182
183 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
184 * it decides not to read the response body.
185 */
186 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
187 && (*message)->type == PHP_HTTP_RESPONSE
188 && (*message)->http.info.response.code/100 == 3
189 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
190 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
191 ) {
192 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
193 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
194 zval_ptr_dtor(&h_loc);
195 zval_ptr_dtor(&h_con);
196 break;
197 }
198 }
199 if (h_loc) {
200 zval_ptr_dtor(&h_loc);
201 }
202 if (h_con) {
203 zval_ptr_dtor(&h_con);
204 }
205
206 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
207 if (php_http_match(Z_STRVAL_P(h), "gzip", PHP_HTTP_MATCH_WORD)
208 || php_http_match(Z_STRVAL_P(h), "x-gzip", PHP_HTTP_MATCH_WORD)
209 || php_http_match(Z_STRVAL_P(h), "deflate", PHP_HTTP_MATCH_WORD)
210 ) {
211 if (parser->inflate) {
212 php_http_encoding_stream_reset(&parser->inflate);
213 } else {
214 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
215 }
216 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
217 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
218 } else {
219 zval_ptr_dtor(&h);
220 }
221 }
222
223 if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
224 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
225 } else {
226 if (h_te) {
227 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
228 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
229 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
230 break;
231 }
232 }
233
234 if (h_cl) {
235 char *stop;
236
237 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
238
239 if (stop != Z_STRVAL_PP(h_cl)) {
240 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
241 break;
242 }
243 }
244
245 if (h_cr) {
246 ulong total = 0, start = 0, end = 0;
247
248 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
249 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
250 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
251 || Z_STRVAL_P(h)[lenof("bytes")] == '='
252 )
253 ) {
254 char *total_at = NULL, *end_at = NULL;
255 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
256
257 start = strtoul(start_at, &end_at, 10);
258 if (end_at) {
259 end = strtoul(end_at + 1, &total_at, 10);
260 if (total_at && strncmp(total_at + 1, "*", 1)) {
261 total = strtoul(total_at + 1, NULL, 10);
262 }
263
264 if (end >= start && (!total || end < total)) {
265 parser->body_length = end + 1 - start;
266 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
267 break;
268 }
269 }
270 }
271 }
272
273
274 if ((*message)->type == PHP_HTTP_REQUEST) {
275 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
276 } else {
277 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
278 }
279 }
280 break;
281 }
282
283 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
284 {
285 if (len) {
286 zval *zcl;
287
288 if (parser->inflate) {
289 char *dec_str = NULL;
290 size_t dec_len;
291
292 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len)) {
293 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
294 }
295
296 if (str != buffer->data) {
297 STR_FREE(str);
298 }
299 str = dec_str;
300 len = dec_len;
301 }
302
303 php_stream_write(php_http_message_body_stream(&(*message)->body), str, len);
304
305 /* keep track */
306 MAKE_STD_ZVAL(zcl);
307 ZVAL_LONG(zcl, php_http_message_body_size(&(*message)->body));
308 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
309 }
310
311 if (cut) {
312 php_http_buffer_cut(buffer, 0, cut);
313 }
314
315 if (str != buffer->data) {
316 STR_FREE(str);
317 }
318
319 str = NULL;
320 len = 0;
321 cut = 0;
322 break;
323 }
324
325 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
326 {
327 str = buffer->data;
328 len = buffer->used;
329 cut = len;
330
331 php_http_message_parser_state_push(parser, 2, !buffer->used?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
332 break;
333 }
334
335 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
336 {
337 len = MIN(parser->body_length, buffer->used);
338 str = buffer->data;
339 cut = len;
340
341 parser->body_length -= len;
342
343 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
344 break;
345 }
346
347 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
348 {
349 /*
350 * - pass available data through the dechunk stream
351 * - pass decoded data along
352 * - if stream zeroed:
353 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
354 * - body done
355 * N: - parse ahaed
356 */
357 char *dec_str = NULL;
358 size_t dec_len;
359
360 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len)) {
361 return FAILURE;
362 }
363
364 str = dec_str;
365 len = dec_len;
366
367 if (php_http_encoding_stream_done(parser->dechunk)) {
368 cut = buffer->used - PHP_HTTP_BUFFER_LEN(parser->dechunk->ctx);
369 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
370 } else {
371 cut = buffer->used;
372 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
373 }
374 break;
375 }
376
377 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
378 {
379 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
380
381 if (parser->dechunk) {
382 char *dec_str = NULL;
383 size_t dec_len;
384
385 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len)) {
386 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
387 }
388 php_http_encoding_stream_dtor(parser->dechunk);
389
390 if (dec_str && dec_len) {
391 str = dec_str;
392 len = dec_len;
393 cut = 0;
394 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
395 }
396 }
397
398 break;
399 }
400
401 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
402 char *ptr = buffer->data;
403
404 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
405 ++ptr;
406 }
407
408 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
409 break;
410 }
411 }
412 }
413
414 return php_http_message_parser_state_is(parser);
415 }
416
417 /*
418 * Local variables:
419 * tab-width: 4
420 * c-basic-offset: 4
421 * End:
422 * vim600: noet sw=4 ts=4 fdm=marker
423 * vim<600: noet sw=4 ts=4
424 */
425