5d1a908b5bfae52bab857d5b3c67e33695cfae5d
[m6w6/ext-http] / http_encoding_api.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2005, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 /* $Id$ */
14
15 #ifdef HAVE_CONFIG_H
16 # include "config.h"
17 #endif
18 #include "php_http.h"
19
20 #include "php_http_api.h"
21 #include "php_http_encoding_api.h"
22 #include "php_http_send_api.h"
23 #include "php_http_headers_api.h"
24
25 ZEND_EXTERN_MODULE_GLOBALS(http);
26
27 static inline int eol_match(char **line, int *eol_len)
28 {
29 char *ptr = *line;
30
31 while (0x20 == *ptr) ++ptr;
32
33 if (ptr == http_locate_eol(*line, eol_len)) {
34 *line = ptr;
35 return 1;
36 } else {
37 return 0;
38 }
39 }
40
41 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
42 PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC)
43 {
44 int eol_len = 0;
45 char *n_ptr = NULL;
46 const char *e_ptr = encoded;
47
48 *decoded_len = 0;
49 *decoded = ecalloc(1, encoded_len);
50
51 while ((encoded + encoded_len - e_ptr) > 0) {
52 ulong chunk_len = 0, rest;
53
54 chunk_len = strtoul(e_ptr, &n_ptr, 16);
55
56 /* we could not read in chunk size */
57 if (n_ptr == e_ptr) {
58 /*
59 * if this is the first turn and there doesn't seem to be a chunk
60 * size at the begining of the body, do not fail on apparently
61 * not encoded data and return a copy
62 */
63 if (e_ptr == encoded) {
64 http_error(HE_NOTICE, HTTP_E_ENCODING, "Data does not seem to be chunked encoded");
65 memcpy(*decoded, encoded, encoded_len);
66 *decoded_len = encoded_len;
67 return encoded + encoded_len;
68 } else {
69 efree(*decoded);
70 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected chunk size at pos %tu of %zu but got trash", n_ptr - encoded, encoded_len);
71 return NULL;
72 }
73 }
74
75 /* reached the end */
76 if (!chunk_len) {
77 /* move over '0' chunked encoding terminator */
78 while (*e_ptr == '0') ++e_ptr;
79 break;
80 }
81
82 /* there should be CRLF after the chunk size, but we'll ignore SP+ too */
83 if (*n_ptr && !eol_match(&n_ptr, &eol_len)) {
84 if (eol_len == 2) {
85 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected CRLF at pos %tu of %zu but got 0x%02X 0x%02X", n_ptr - encoded, encoded_len, *n_ptr, *(n_ptr + 1));
86 } else {
87 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected LF at pos %tu of %zu but got 0x%02X", n_ptr - encoded, encoded_len, *n_ptr);
88 }
89 }
90 n_ptr += eol_len;
91
92 /* chunk size pretends more data than we actually got, so it's probably a truncated message */
93 if (chunk_len > (rest = encoded + encoded_len - n_ptr)) {
94 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Truncated message: chunk size %lu exceeds remaining data size %lu at pos %tu of %zu", chunk_len, rest, n_ptr - encoded, encoded_len);
95 chunk_len = rest;
96 }
97
98 /* copy the chunk */
99 memcpy(*decoded + *decoded_len, n_ptr, chunk_len);
100 *decoded_len += chunk_len;
101
102 if (chunk_len == rest) {
103 e_ptr = n_ptr + chunk_len;
104 break;
105 } else {
106 /* advance to next chunk */
107 e_ptr = n_ptr + chunk_len + eol_len;
108 }
109 }
110
111 return e_ptr;
112 }
113 /* }}} */
114
115 #ifdef HTTP_HAVE_ZLIB
116
117 static const char http_encoding_gzip_header[] = {
118 (const char) 0x1f, // fixed value
119 (const char) 0x8b, // fixed value
120 (const char) Z_DEFLATED, // compression algorithm
121 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
122 (const char) 0, // MTIME
123 (const char) 0, // =*=
124 (const char) 0, // =*=
125 (const char) 0, // =*=
126 (const char) 0, // two possible flag values for 9 compression levels? o_O
127 #ifdef PHP_WIN32
128 (const char) 0x0b // OS_CODE
129 #else
130 (const char) 0x03 // OS_CODE
131 #endif
132 };
133
134 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, int mtime, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
135 {
136 z_stream Z;
137 STATUS status = Z_OK;
138
139 if (!(data && data_len)) {
140 return FAILURE;
141 }
142
143 *encoded = NULL;
144 *encoded_len = 0;
145 memset(&Z, 0, sizeof(z_stream));
146
147 Z.next_in = (Bytef *) data;
148 Z.avail_in = data_len;
149 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) + HTTP_ENCODING_SAFPAD - 1;
150
151 *encoded = emalloc(HTTP_ENCODING_BUFLEN(data_len) + sizeof(http_encoding_gzip_header) + HTTP_ENCODING_SAFPAD);
152 memcpy(*encoded, http_encoding_gzip_header, sizeof(http_encoding_gzip_header));
153
154 if (mtime) {
155 (*encoded)[4] = (char) (mtime & 0xFF);
156 (*encoded)[5] = (char) ((mtime >> 8) & 0xFF);
157 (*encoded)[6] = (char) ((mtime >> 16) & 0xFF);
158 (*encoded)[7] = (char) ((mtime >> 24) & 0xFF);
159 }
160
161 Z.next_out = (Bytef *) *encoded + sizeof(http_encoding_gzip_header);
162
163 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
164 status = deflate(&Z, Z_FINISH);
165 deflateEnd(&Z);
166
167 if (Z_STREAM_END == status) {
168 ulong crc;
169 char *trailer;
170
171 crc = crc32(0L, Z_NULL, 0);
172 crc = crc32(crc, (const Bytef *) data, data_len);
173
174 trailer = *encoded + sizeof(http_encoding_gzip_header) + Z.total_out;
175
176 /* LSB */
177 trailer[0] = (char) (crc & 0xFF);
178 trailer[1] = (char) ((crc >> 8) & 0xFF);
179 trailer[2] = (char) ((crc >> 16) & 0xFF);
180 trailer[3] = (char) ((crc >> 24) & 0xFF);
181 trailer[4] = (char) ((Z.total_in) & 0xFF);
182 trailer[5] = (char) ((Z.total_in >> 8) & 0xFF);
183 trailer[6] = (char) ((Z.total_in >> 16) & 0xFF);
184 trailer[7] = (char) ((Z.total_in >> 24) & 0xFF);
185
186 *encoded_len = Z.total_out + sizeof(http_encoding_gzip_header) + 8;
187 (*encoded)[*encoded_len] = '\0';
188 return SUCCESS;
189 }
190 }
191
192 STR_SET(*encoded, NULL);
193 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not gzencode data: %s", zError(status));
194 return FAILURE;
195 }
196
197 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
198 {
199 const char *encoded;
200 size_t encoded_len;
201
202 if ( (data && data_len) &&
203 (SUCCESS == http_encoding_gzencode_verify(data, data_len, &encoded, &encoded_len)) &&
204 (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
205 http_encoding_gzdecode_verify(data, data_len, *decoded, *decoded_len);
206 return SUCCESS;
207 }
208
209 return FAILURE;
210 }
211
212 PHP_HTTP_API STATUS _http_encoding_deflate(int level, int zhdr, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
213 {
214 z_stream Z;
215 STATUS status = Z_OK;
216
217 *encoded = NULL;
218 *encoded_len = 0;
219 memset(&Z, 0, sizeof(z_stream));
220
221 Z.data_type = Z_UNKNOWN;
222 Z.next_in = (Bytef *) data;
223 Z.avail_in = data_len;
224 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) - 1;
225 Z.next_out = emalloc(HTTP_ENCODING_BUFLEN(data_len));
226
227 *encoded = (char *) Z.next_out;
228
229 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, zhdr ? MAX_WBITS : -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
230 status = deflate(&Z, Z_FINISH);
231 deflateEnd(&Z);
232
233 if (Z_STREAM_END == status) {
234 (*encoded)[*encoded_len = Z.total_out] = '\0';
235 return SUCCESS;
236 }
237 }
238
239 STR_SET(*encoded, NULL);
240 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not deflate data: %s", zError(status));
241 return FAILURE;
242 }
243
244 PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
245 {
246 int max = 0, wbits = -MAX_WBITS;
247 STATUS status;
248 z_stream Z;
249
250 *decoded = NULL;
251 *decoded_len = 0;
252 memset(&Z, 0, sizeof(z_stream));
253
254 do {
255 if (!max) {
256 *decoded_len = data_len * 2;
257 *decoded = emalloc(*decoded_len + 1);
258 } else {
259 size_t new_len = *decoded_len << 2;
260 char *new_ptr = erealloc_recoverable(*decoded, new_len + 1);
261
262 if (new_ptr) {
263 *decoded = new_ptr;
264 *decoded_len = new_len;
265 } else {
266 max = INT_MAX-1; /* avoid integer overflow on increment op */
267 }
268 }
269
270 retry_inflate:
271 Z.next_in = (Bytef *) data;
272 Z.avail_in = data_len;
273 Z.next_out = (Bytef *) *decoded;
274 Z.avail_out = *decoded_len;
275
276 if (Z_OK == (status = inflateInit2(&Z, wbits))) {
277 status = inflate(&Z, Z_FINISH);
278 inflateEnd(&Z);
279
280 /* retry if it looks like we've got a zlib header */
281 if (wbits == -MAX_WBITS && status == Z_DATA_ERROR) {
282 wbits = MAX_WBITS;
283 goto retry_inflate;
284 }
285
286 if (Z_STREAM_END == status) {
287 (*decoded)[*decoded_len = Z.total_out] = '\0';
288 return SUCCESS;
289 }
290 }
291 } while (status == Z_BUF_ERROR && ++max < HTTP_ENCODING_MAXTRY);
292
293 STR_SET(*decoded, NULL);
294 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
295 return FAILURE;
296 }
297
298 PHP_HTTP_API STATUS _http_encoding_gzencode_verify(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
299 {
300 size_t offset = sizeof(http_encoding_gzip_header);
301
302 if (data_len < offset) {
303 goto really_bad_gzip_header;
304 }
305
306 if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
307 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
308 return FAILURE;
309 }
310
311 if (data[2] != (const char) Z_DEFLATED) {
312 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
313 /* still try to decode */
314 }
315 if ((data[3] & 0x4) == 0x4) {
316 if (data_len < offset + 2) {
317 goto really_bad_gzip_header;
318 }
319 /* there are extra fields, the length follows the common header as 2 bytes LSB */
320 offset += (unsigned) ((data[offset] & 0xFF));
321 offset += 1;
322 offset += (unsigned) ((data[offset] & 0xFF) << 8);
323 offset += 1;
324 }
325 if ((data[3] & 0x8) == 0x8) {
326 if (data_len <= offset) {
327 goto really_bad_gzip_header;
328 }
329 /* there's a file name */
330 offset += strlen(&data[offset]) + 1 /*NUL*/;
331 }
332 if ((data[3] & 0x10) == 0x10) {
333 if (data_len <= offset) {
334 goto really_bad_gzip_header;
335 }
336 /* there's a comment */
337 offset += strlen(&data[offset]) + 1 /* NUL */;
338 }
339 if ((data[3] & 0x2) == 0x2) {
340 /* there's a CRC16 of the header */
341 offset += 2;
342 if (data_len <= offset) {
343 goto really_bad_gzip_header;
344 } else {
345 ulong crc, cmp;
346
347 cmp = (unsigned) ((data[offset-2] & 0xFF));
348 cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
349
350 crc = crc32(0L, Z_NULL, 0);
351 crc = crc32(crc, (const Bytef *) data, sizeof(http_encoding_gzip_header));
352
353 if (cmp != (crc & 0xFFFF)) {
354 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
355 return FAILURE;
356 }
357 }
358 }
359
360 if (data_len < offset + 8) {
361 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
362 return FAILURE;
363 }
364
365 if (encoded) {
366 *encoded = data + offset;
367 }
368 if (encoded_len) {
369 *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;
370 }
371
372 return SUCCESS;
373
374 really_bad_gzip_header:
375 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
376 return FAILURE;
377 }
378
379 PHP_HTTP_API STATUS _http_encoding_gzdecode_verify(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
380 {
381 STATUS status = SUCCESS;
382 ulong len, cmp, crc;
383
384 crc = crc32(0L, Z_NULL, 0);
385 crc = crc32(crc, (const Bytef *) decoded, decoded_len);
386
387 cmp = (unsigned) ((data[data_len-8] & 0xFF));
388 cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
389 cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
390 cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
391 len = (unsigned) ((data[data_len-4] & 0xFF));
392 len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
393 len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
394 len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
395
396 if (cmp != crc) {
397 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
398 status = FAILURE;
399 }
400 if (len != decoded_len) {
401 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
402 status = FAILURE;
403 }
404 return status;
405 }
406
407 #define HTTP_ENCODING_STREAM_ERROR(status, tofree) \
408 { \
409 if (tofree) efree(tofree); \
410 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \
411 return FAILURE; \
412 }
413
414 PHP_HTTP_API STATUS _http_encoding_stream_init(http_encoding_stream *s, int gzip, int level, char **encoded, size_t *encoded_len TSRMLS_DC)
415 {
416 STATUS status;
417
418 memset(s, 0, sizeof(http_encoding_stream));
419 if (Z_OK != (status = deflateInit2(&s->Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
420 HTTP_ENCODING_STREAM_ERROR(status, NULL);
421 }
422
423 if ((s->gzip = gzip)) {
424 s->crc = crc32(0L, Z_NULL, 0);
425 *encoded_len = sizeof(http_encoding_gzip_header);
426 *encoded = emalloc(*encoded_len);
427 memcpy(*encoded, http_encoding_gzip_header, *encoded_len);
428 } else {
429 *encoded_len = 0;
430 *encoded = NULL;
431 }
432
433 return SUCCESS;
434 }
435
436 PHP_HTTP_API STATUS _http_encoding_stream_update(http_encoding_stream *s, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
437 {
438 STATUS status;
439
440 *encoded_len = HTTP_ENCODING_BUFLEN(data_len);
441 *encoded = emalloc(*encoded_len);
442
443 s->Z.next_in = (Bytef *) data;
444 s->Z.avail_in = data_len;
445 s->Z.next_out = (Bytef *) *encoded;
446 s->Z.avail_out = *encoded_len;
447
448 status = deflate(&s->Z, Z_SYNC_FLUSH);
449
450 if (Z_OK != status && Z_STREAM_END != status) {
451 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
452 }
453 *encoded_len -= s->Z.avail_out;
454
455 if (s->gzip) {
456 s->crc = crc32(s->crc, (const Bytef *) data, data_len);
457 }
458
459 return SUCCESS;
460 }
461
462 PHP_HTTP_API STATUS _http_encoding_stream_finish(http_encoding_stream *s, char **encoded, size_t *encoded_len TSRMLS_DC)
463 {
464 STATUS status;
465
466 *encoded_len = 1024;
467 *encoded = emalloc(*encoded_len);
468
469 s->Z.next_out = (Bytef *) *encoded;
470 s->Z.avail_out = *encoded_len;
471
472 if (Z_STREAM_END != (status = deflate(&s->Z, Z_FINISH)) || Z_OK != (status = deflateEnd(&s->Z))) {
473 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
474 }
475
476 *encoded_len -= s->Z.avail_out;
477 if (s->gzip) {
478 if (s->Z.avail_out < 8) {
479 *encoded = erealloc(*encoded, *encoded_len + 8);
480 }
481 (*encoded)[(*encoded_len)++] = (char) (s->crc & 0xFF);
482 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 8) & 0xFF);
483 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 16) & 0xFF);
484 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 24) & 0xFF);
485 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in) & 0xFF);
486 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 8) & 0xFF);
487 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 16) & 0xFF);
488 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 24) & 0xFF);
489 }
490
491 return SUCCESS;
492 }
493
494 #endif /* HTTP_HAVE_ZLIB */
495
496 PHP_HTTP_API zend_bool _http_encoding_response_start(size_t content_length TSRMLS_DC)
497 {
498 if ( php_ob_handler_used("ob_gzhandler" TSRMLS_CC) ||
499 php_ob_handler_used("zlib output compression" TSRMLS_CC)) {
500 HTTP_G(send).gzip_encoding = 0;
501 } else {
502 if (!HTTP_G(send).gzip_encoding) {
503 /* emit a content-length header */
504 if (content_length) {
505 char cl_header_str[128];
506 size_t cl_header_len;
507 cl_header_len = snprintf(cl_header_str, lenof(cl_header_str), "Content-Length: %zu", content_length);
508 http_send_header_string_ex(cl_header_str, cl_header_len, 1);
509 }
510 } else {
511 #ifndef HTTP_HAVE_ZLIB
512 HTTP_G(send).gzip_encoding = 0;
513 php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC);
514 #else
515 HashTable *selected;
516 zval zsupported;
517
518 INIT_PZVAL(&zsupported);
519 array_init(&zsupported);
520 add_next_index_stringl(&zsupported, "gzip", lenof("gzip"), 1);
521 add_next_index_stringl(&zsupported, "x-gzip", lenof("x-gzip"), 1);
522 add_next_index_stringl(&zsupported, "deflate", lenof("deflate"), 1);
523
524 HTTP_G(send).gzip_encoding = 0;
525
526 if ((selected = http_negotiate_encoding(&zsupported))) {
527 STATUS hs = FAILURE;
528 char *encoding = NULL;
529 ulong idx;
530
531 if (HASH_KEY_IS_STRING == zend_hash_get_current_key(selected, &encoding, &idx, 0) && encoding) {
532 if (!strcmp(encoding, "gzip") || !strcmp(encoding, "x-gzip")) {
533 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: gzip"))) {
534 HTTP_G(send).gzip_encoding = HTTP_ENCODING_GZIP;
535 }
536 } else if (!strcmp(encoding, "deflate")) {
537 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: deflate"))) {
538 HTTP_G(send).gzip_encoding = HTTP_ENCODING_DEFLATE;
539 }
540 }
541 if (SUCCESS == hs) {
542 http_send_header_string("Vary: Accept-Encoding");
543 }
544 }
545
546 zend_hash_destroy(selected);
547 FREE_HASHTABLE(selected);
548 }
549
550 zval_dtor(&zsupported);
551 return HTTP_G(send).gzip_encoding;
552 #endif
553 }
554 }
555 return 0;
556 }
557
558 /*
559 * Local variables:
560 * tab-width: 4
561 * c-basic-offset: 4
562 * End:
563 * vim600: noet sw=4 ts=4 fdm=marker
564 * vim<600: noet sw=4 ts=4
565 */
566