* if curl sees a 3xx code, a Location header and a Connection:close header it decides...
[m6w6/ext-http] / php_http_message_parser.c
1 #include "php_http.h"
2
3 typedef struct php_http_message_parser_state_spec {
4 php_http_message_parser_state_t state;
5 unsigned need_data:1;
6 } php_http_message_parser_state_spec_t;
7
8 static const php_http_message_parser_state_spec_t php_http_message_parser_states[] = {
9 {PHP_HTTP_MESSAGE_PARSER_STATE_START, 1},
10 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER, 1},
11 {PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE, 0},
12 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY, 0},
13 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, 1},
14 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, 1},
15 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, 1},
16 {PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, 0},
17 {PHP_HTTP_MESSAGE_PARSER_STATE_DONE, 0}
18 };
19
20 PHP_HTTP_API php_http_message_parser_t *php_http_message_parser_init(php_http_message_parser_t *parser TSRMLS_DC)
21 {
22 if (!parser) {
23 parser = emalloc(sizeof(*parser));
24 }
25 memset(parser, 0, sizeof(*parser));
26
27 TSRMLS_SET_CTX(parser->ts);
28
29 php_http_header_parser_init(&parser->header TSRMLS_CC);
30 zend_stack_init(&parser->stack);
31
32 return parser;
33 }
34
35 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_push(php_http_message_parser_t *parser, unsigned argc, ...)
36 {
37 va_list va_args;
38 unsigned i;
39 va_start(va_args, argc);
40 php_http_message_parser_state_t state;
41
42 for (i = 0; i < argc; ++i) {
43 state = va_arg(va_args, php_http_message_parser_state_t);
44 zend_stack_push(&parser->stack, &state, sizeof(state));
45 }
46 va_end(va_args);
47
48 return state;
49 }
50
51 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_is(php_http_message_parser_t *parser)
52 {
53 php_http_message_parser_state_t *state;
54
55 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state)) {
56 return *state;
57 }
58 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
59 }
60
61 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_state_pop(php_http_message_parser_t *parser)
62 {
63 php_http_message_parser_state_t state, *state_ptr;
64 if (SUCCESS == zend_stack_top(&parser->stack, (void *) &state_ptr)) {
65 state = *state_ptr;
66 zend_stack_del_top(&parser->stack);
67 return state;
68 }
69 return PHP_HTTP_MESSAGE_PARSER_STATE_START;
70 }
71
72 PHP_HTTP_API void php_http_message_parser_dtor(php_http_message_parser_t *parser)
73 {
74 php_http_header_parser_dtor(&parser->header);
75 zend_stack_destroy(&parser->stack);
76 if (parser->dechunk) {
77 php_http_encoding_stream_free(&parser->dechunk TSRMLS_CC);
78 }
79 if (parser->inflate) {
80 php_http_encoding_stream_free(&parser->inflate TSRMLS_CC);
81 }
82 }
83
84 PHP_HTTP_API void php_http_message_parser_free(php_http_message_parser_t **parser)
85 {
86 if (*parser) {
87 php_http_message_parser_dtor(*parser);
88 efree(*parser);
89 *parser = NULL;
90 }
91 }
92
93
94 PHP_HTTP_API php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
95 {
96 TSRMLS_FETCH_FROM_CTX(parser->ts);
97 char *str = NULL;
98 size_t len = 0;
99 size_t cut = 0;
100
101 while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
102 #if 0
103 const char *state[] = {"START", "HEADER", "HEADER_DONE", "BODY", "BODY_DUMB", "BODY_LENGTH", "BODY_CHUNK", "BODY_DONE", "DONE"};
104 fprintf(stderr, "#MP: %s (%d) %.*s…\n",
105 state[php_http_message_parser_state_is(parser)], (*message)->type, MIN(32, buffer->used), buffer->data);
106 #endif
107
108 switch (php_http_message_parser_state_pop(parser))
109 {
110 case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
111 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
112
113 case PHP_HTTP_MESSAGE_PARSER_STATE_START:
114 {
115 char *ptr = buffer->data;
116
117 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
118 ++ptr;
119 }
120
121 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
122
123 if (buffer->used) {
124 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
125 }
126 break;
127 }
128
129 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
130 {
131 unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;
132
133 switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, &(*message)->hdrs, (php_http_info_callback_t) php_http_message_info_callback, message)) {
134 case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
135 return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
136
137 case PHP_HTTP_HEADER_PARSER_STATE_DONE:
138 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
139 break;
140
141 default:
142 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
143 if (buffer->used) {
144 return PHP_HTTP_MESSAGE_PARSER_STATE_HEADER;
145 }
146 }
147 break;
148 }
149
150 case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
151 {
152 zval *h, *h_loc = NULL, *h_con = NULL, **h_cl = NULL, **h_cr = NULL, **h_te = NULL;
153
154 if ((h = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding"), 1))) {
155 zend_hash_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", sizeof("X-Original-Transfer-Encoding"), &h, sizeof(zval *), (void *) &h_te);
156 zend_hash_del(&(*message)->hdrs, "Transfer-Encoding", sizeof("Transfer-Encoding"));
157 }
158 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Length"), 1))) {
159 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Length", sizeof("X-Original-Content-Length"), &h, sizeof(zval *), (void *) &h_cl);
160 }
161 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Range"), 1))) {
162 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Range", sizeof("X-Original-Content-Range"), &h, sizeof(zval *), (void *) &h_cr);
163 zend_hash_del(&(*message)->hdrs, "Content-Range", sizeof("Content-Range"));
164 }
165
166 if ((h = php_http_message_header(*message, ZEND_STRL("Content-Encoding"), 1))) {
167 if (strstr(Z_STRVAL_P(h), "gzip") || strstr(Z_STRVAL_P(h), "x-gzip") || strstr(Z_STRVAL_P(h), "deflate")) {
168 if (parser->inflate) {
169 php_http_encoding_stream_reset(&parser->inflate);
170 } else {
171 parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0 TSRMLS_CC);
172 }
173 zend_hash_update(&(*message)->hdrs, "X-Original-Content-Encoding", sizeof("X-Original-Content-Encoding"), &h, sizeof(zval *), NULL);
174 zend_hash_del(&(*message)->hdrs, "Content-Encoding", sizeof("Content-Encoding"));
175 } else {
176 zval_ptr_dtor(&h);
177 }
178 }
179
180 /* default */
181 MAKE_STD_ZVAL(h);
182 ZVAL_LONG(h, 0);
183 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &h, sizeof(zval *), NULL);
184
185 /* so, if curl sees a 3xx code, a Location header and a Connection:close header
186 * it decides not to read the response body.
187 */
188 if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
189 && (*message)->type == PHP_HTTP_RESPONSE
190 && (*message)->http.info.response.code/100 == 3
191 && (h_loc = php_http_message_header(*message, ZEND_STRL("Location"), 1))
192 && (h_con = php_http_message_header(*message, ZEND_STRL("Connection"), 1))
193 ) {
194 if (php_http_match(Z_STRVAL_P(h_con), "close", PHP_HTTP_MATCH_WORD)) {
195 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
196 zval_ptr_dtor(&h_loc);
197 zval_ptr_dtor(&h_con);
198 break;
199 }
200 }
201 if (h_loc) {
202 zval_ptr_dtor(&h_loc);
203 }
204 if (h_con) {
205 zval_ptr_dtor(&h_con);
206 }
207
208 if (h_te) {
209 if (strstr(Z_STRVAL_PP(h_te), "chunked")) {
210 parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0 TSRMLS_CC);
211 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
212 break;
213 }
214 }
215
216 if (h_cl) {
217 char *stop;
218
219 parser->body_length = strtoul(Z_STRVAL_PP(h_cl), &stop, 10);
220
221 if (stop != Z_STRVAL_PP(h_cl)) {
222 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
223 break;
224 }
225 }
226
227 if (h_cr) {
228 ulong total = 0, start = 0, end = 0;
229
230 if (!strncasecmp(Z_STRVAL_PP(h_cr), "bytes", lenof("bytes"))
231 && ( Z_STRVAL_P(h)[lenof("bytes")] == ':'
232 || Z_STRVAL_P(h)[lenof("bytes")] == ' '
233 || Z_STRVAL_P(h)[lenof("bytes")] == '='
234 )
235 ) {
236 char *total_at = NULL, *end_at = NULL;
237 char *start_at = Z_STRVAL_PP(h_cr) + sizeof("bytes");
238
239 start = strtoul(start_at, &end_at, 10);
240 if (end_at) {
241 end = strtoul(end_at + 1, &total_at, 10);
242 if (total_at && strncmp(total_at + 1, "*", 1)) {
243 total = strtoul(total_at + 1, NULL, 10);
244 }
245
246 if (end >= start && (!total || end < total)) {
247 parser->body_length = end + 1 - start;
248 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
249 break;
250 }
251 }
252 }
253 }
254
255
256 if ((*message)->type == PHP_HTTP_REQUEST) {
257 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
258 } else {
259 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
260 }
261 break;
262 }
263
264 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
265 {
266 if (len) {
267 zval *zcl;
268
269 if (parser->inflate) {
270 char *dec_str = NULL;
271 size_t dec_len;
272
273 if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len TSRMLS_CC)) {
274 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
275 }
276
277 if (str != buffer->data) {
278 STR_FREE(str);
279 }
280 str = dec_str;
281 len = dec_len;
282 }
283
284 php_stream_write(php_http_message_body_stream(&(*message)->body), str, len);
285
286 /* keep track */
287 MAKE_STD_ZVAL(zcl);
288 ZVAL_LONG(zcl, php_http_message_body_size(&(*message)->body));
289 zend_hash_update(&(*message)->hdrs, "Content-Length", sizeof("Content-Length"), &zcl, sizeof(zval *), NULL);
290 }
291
292 if (cut) {
293 php_http_buffer_cut(buffer, 0, cut);
294 }
295
296 if (str != buffer->data) {
297 STR_FREE(str);
298 }
299
300 str = NULL;
301 len = 0;
302 cut = 0;
303 break;
304 }
305
306 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
307 {
308 str = buffer->data;
309 len = buffer->used;
310 cut = len;
311
312 php_http_message_parser_state_push(parser, 2, !buffer->used?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
313 break;
314 }
315
316 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
317 {
318 len = MIN(parser->body_length, buffer->used);
319 str = buffer->data;
320 cut = len;
321
322 parser->body_length -= len;
323
324 php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
325 break;
326 }
327
328 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
329 {
330 /*
331 * - pass available data through the dechunk stream
332 * - pass decoded data along
333 * - if stream zeroed:
334 * Y: - cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
335 * - body done
336 * N: - parse ahaed
337 */
338 char *dec_str = NULL;
339 size_t dec_len;
340
341 if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len TSRMLS_CC)) {
342 return FAILURE;
343 }
344
345 str = dec_str;
346 len = dec_len;
347
348 if (php_http_encoding_stream_done(parser->dechunk)) {
349 cut = buffer->used - PHP_HTTP_BUFFER_LEN(parser->dechunk->ctx);
350 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
351 } else {
352 cut = buffer->used;
353 php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
354 }
355 break;
356 }
357
358 case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
359 {
360 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
361
362 if (parser->dechunk) {
363 char *dec_str = NULL;
364 size_t dec_len;
365
366 if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len TSRMLS_CC)) {
367 return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
368 }
369 php_http_encoding_stream_dtor(parser->dechunk);
370
371 if (dec_str && dec_len) {
372 str = dec_str;
373 len = dec_len;
374 cut = 0;
375 php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
376 }
377 }
378
379 break;
380 }
381
382 case PHP_HTTP_MESSAGE_PARSER_STATE_DONE: {
383 char *ptr = buffer->data;
384
385 while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
386 ++ptr;
387 }
388
389 php_http_buffer_cut(buffer, 0, ptr - buffer->data);
390 break;
391 }
392 }
393 }
394
395 return php_http_message_parser_state_is(parser);
396 }