push a load of changes before holidays
[m6w6/ext-http] / php_http_message_parser.c
1 #include "php_http.h"
2
3 typedef struct php_http_message_parser_state_spec {
4 php_http_message_parser_state_t state;
5 unsigned need_data:1;
6 } php_http_message_parser_state_spec_t;
7
8 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
9 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
10 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
11 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
12 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
13 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
14 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
15 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
16 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
17 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
18 };
19
20 PHP_HTTP_API php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
21 {
22 if (!parser) {
23 parser = emalloc(sizeof(*parser));
24 }
25 memset(parser, 0, sizeof(*parser));
26
27 TSRMLS_SET_CTX(parser->ts);
28
29 php_http_header_parser_init(&parser->header TSRMLS_CC);
30 zend_stack_init(&parser->stack);
31
32 return parser;
33 }
34
35 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
36 {
37 va_list va_args;
38 unsigned i;
39 va_start(va_args, argc);
40 php_http_message_parser_state_t state;
41
42 for (i = 0; i < argc; ++i) {
43 state = va_arg(va_args, php_http_message_parser_state_t);
44 zend_stack_push(&parser->stack, &state, sizeof(state));
45 }
46 va_end(va_args);
47
48 return state;
49 }
50
51 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
52 {
53 php_http_message_parser_state_t *state;
54
55 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state)) {
56 return *state;
57 }
58 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
59 }
60
61 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
62 {
63 php_http_message_parser_state_t state, *state_ptr;
64 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state_ptr)) {
65 state = *state_ptr;
66 zend_stack_del_top(&parser->stack);
67 return state;
68 }
69 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
70 }
71
72 PHP_HTTP_API void php_http_message_parser_dtor(php_http_message_parser_t *parser)
73 {
74 php_http_header_parser_dtor(&parser->header);
75 zend_stack_destroy(&parser->stack);
76 if (parser->dechunk) {
77 php_http_encoding_stream_free(&parser->dechunk TSRMLS_CC);
78 }
79 if (parser->inflate) {
80 php_http_encoding_stream_free(&parser->inflate TSRMLS_CC);
81 }
82 }
83
84 PHP_HTTP_API void php_http_message_parser_free(php_http_message_parser_t **parser)
85 {
86 if (*parser) {
87 php_http_message_parser_dtor(*parser);
88 efree(*parser);
89 *parser = NULL;
90 }
91 }
92
93
94 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
95 {
96 TSRMLS_FETCH_FROM_CTX(parser->ts);
97 char *str = NULL;
98 size_t len = 0;
99 size_t cut = 0;
100
101 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
102 #if 0
103 const char *state[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
104 fprintf(stderr, "#MP: %s (%d)\n", php_http_message_parser_state_is(parser) < 0 ? "FAILURE" : state[php_http_message_parser_state_is(parser)], (*message)->type);
105 _dpf(0, buffer->data, buffer->used);
106 #endif
107
108 switch (php_http_message_parser_state_pop(parser))
109 {
110 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
111 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
112
113 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
114 {
115 char *ptr = buffer->data;
116
117 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
118 ++ptr;
119 }
120
121 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
122
123 if (buffer->used) {
124 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
125 }
126 break;
127 }
128
129 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
130 {
131 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
132
133 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, &(*message)->hdrs, (php_http_info_callback_t) php_http_message_info_callback, message)) {
134 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
135 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
136
137 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
138 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
139 break;
140
141 default:
142 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
143 if (buffer->used) {
144 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
145 }
146 }
147 break;
148 }
149
150 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
151 {
152 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
153
154 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
155 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
156 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
157 }
158 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
159 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
160 }
161 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
162 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
163 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
164 }
165
166 /* default */
167 MAKE_STD_ZVAL(h);
168 ZVAL_LONG(h, 0);
169 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
170
171 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
172 * it decides not to read the response body.
173 */
174 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
175 && (*message)->type == PHP_HTTP_RESPONSE
176 && (*message)->http.info.response.code/100 == 3
177 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
178 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
179 ) {
180 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
181 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
182 zval_ptr_dtor(&h_loc);
183 zval_ptr_dtor(&h_con);
184 break;
185 }
186 }
187 if (h_loc) {
188 zval_ptr_dtor(&h_loc);
189 }
190 if (h_con) {
191 zval_ptr_dtor(&h_con);
192 }
193
194 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
195 if (php_http_match(Z_STRVAL_P(h), "gzip", PHP_HTTP_MATCH_WORD)
196 || php_http_match(Z_STRVAL_P(h), "x-gzip", PHP_HTTP_MATCH_WORD)
197 || php_http_match(Z_STRVAL_P(h), "deflate", PHP_HTTP_MATCH_WORD)
198 ) {
199 if (parser->inflate) {
200 php_http_encoding_stream_reset(&parser->inflate);
201 } else {
202 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
203 }
204 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
205 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
206 } else {
207 zval_ptr_dtor(&h);
208 }
209 }
210
211 if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
212 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
213 } else {
214 if (h_te) {
215 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
216 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
217 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
218 break;
219 }
220 }
221
222 if (h_cl) {
223 char *stop;
224
225 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
226
227 if (stop != Z_STRVAL_PP(h_cl)) {
228 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
229 break;
230 }
231 }
232
233 if (h_cr) {
234 ulong total = 0, start = 0, end = 0;
235
236 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
237 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
238 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
239 || Z_STRVAL_P(h)[lenof("bytes")] == '='
240 )
241 ) {
242 char *total_at = NULL, *end_at = NULL;
243 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
244
245 start = strtoul(start_at, &end_at, 10);
246 if (end_at) {
247 end = strtoul(end_at + 1, &total_at, 10);
248 if (total_at && strncmp(total_at + 1, "*", 1)) {
249 total = strtoul(total_at + 1, NULL, 10);
250 }
251
252 if (end >= start && (!total || end < total)) {
253 parser->body_length = end + 1 - start;
254 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
255 break;
256 }
257 }
258 }
259 }
260
261
262 if ((*message)->type == PHP_HTTP_REQUEST) {
263 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
264 } else {
265 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
266 }
267 }
268 break;
269 }
270
271 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
272 {
273 if (len) {
274 zval *zcl;
275
276 if (parser->inflate) {
277 char *dec_str = NULL;
278 size_t dec_len;
279
280 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len TSRMLS_CC)) {
281 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
282 }
283
284 if (str != buffer->data) {
285 STR_FREE(str);
286 }
287 str = dec_str;
288 len = dec_len;
289 }
290
291 php_stream_write(php_http_message_body_stream(&(*message)->body), str, len);
292
293 /* keep track */
294 MAKE_STD_ZVAL(zcl);
295 ZVAL_LONG(zcl, php_http_message_body_size(&(*message)->body));
296 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
297 }
298
299 if (cut) {
300 php_http_buffer_cut(buffer, 0, cut);
301 }
302
303 if (str != buffer->data) {
304 STR_FREE(str);
305 }
306
307 str = NULL;
308 len = 0;
309 cut = 0;
310 break;
311 }
312
313 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
314 {
315 str = buffer->data;
316 len = buffer->used;
317 cut = len;
318
319 php_http_message_parser_state_push(parser, 2, !buffer->used?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
320 break;
321 }
322
323 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
324 {
325 len = MIN(parser->body_length, buffer->used);
326 str = buffer->data;
327 cut = len;
328
329 parser->body_length -= len;
330
331 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
332 break;
333 }
334
335 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
336 {
337 /*
338 * - pass available data through the dechunk stream
339 * - pass decoded data along
340 * - if stream zeroed:
341 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
342 * - body done
343 * N: - parse ahaed
344 */
345 char *dec_str = NULL;
346 size_t dec_len;
347
348 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len TSRMLS_CC)) {
349 return FAILURE;
350 }
351
352 str = dec_str;
353 len = dec_len;
354
355 if (php_http_encoding_stream_done(parser->dechunk)) {
356 cut = buffer->used - PHP_HTTP_BUFFER_LEN(parser->dechunk->ctx);
357 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
358 } else {
359 cut = buffer->used;
360 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
361 }
362 break;
363 }
364
365 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
366 {
367 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
368
369 if (parser->dechunk) {
370 char *dec_str = NULL;
371 size_t dec_len;
372
373 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len TSRMLS_CC)) {
374 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
375 }
376 php_http_encoding_stream_dtor(parser->dechunk);
377
378 if (dec_str && dec_len) {
379 str = dec_str;
380 len = dec_len;
381 cut = 0;
382 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
383 }
384 }
385
386 break;
387 }
388
389 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
390 char *ptr = buffer->data;
391
392 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
393 ++ptr;
394 }
395
396 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
397 break;
398 }
399 }
400 }
401
402 return php_http_message_parser_state_is(parser);
403 }