- remove http_compress() and http_uncompress() (deflate/inflate ambiguity)
[m6w6/ext-http] / http_encoding_api.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2005, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 /* $Id$ */
14
15 #ifdef HAVE_CONFIG_H
16 # include "config.h"
17 #endif
18 #include "php_http.h"
19
20 #include "php_http_api.h"
21 #include "php_http_encoding_api.h"
22 #include "php_http_send_api.h"
23 #include "php_http_headers_api.h"
24
25 ZEND_EXTERN_MODULE_GLOBALS(http);
26
27 static inline int eol_match(char **line, int *eol_len)
28 {
29 char *ptr = *line;
30
31 while (0x20 == *ptr) ++ptr;
32
33 if (ptr == http_locate_eol(*line, eol_len)) {
34 *line = ptr;
35 return 1;
36 } else {
37 return 0;
38 }
39 }
40
41 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
42 PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC)
43 {
44 int eol_len = 0;
45 char *n_ptr = NULL;
46 const char *e_ptr = encoded;
47
48 *decoded_len = 0;
49 *decoded = ecalloc(1, encoded_len);
50
51 while ((encoded + encoded_len - e_ptr) > 0) {
52 ulong chunk_len = 0, rest;
53
54 chunk_len = strtoul(e_ptr, &n_ptr, 16);
55
56 /* we could not read in chunk size */
57 if (n_ptr == e_ptr) {
58 /*
59 * if this is the first turn and there doesn't seem to be a chunk
60 * size at the begining of the body, do not fail on apparently
61 * not encoded data and return a copy
62 */
63 if (e_ptr == encoded) {
64 http_error(HE_NOTICE, HTTP_E_ENCODING, "Data does not seem to be chunked encoded");
65 memcpy(*decoded, encoded, encoded_len);
66 *decoded_len = encoded_len;
67 return encoded + encoded_len;
68 } else {
69 efree(*decoded);
70 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected chunk size at pos %tu of %zu but got trash", n_ptr - encoded, encoded_len);
71 return NULL;
72 }
73 }
74
75 /* reached the end */
76 if (!chunk_len) {
77 /* move over '0' chunked encoding terminator */
78 while (*e_ptr == '0') ++e_ptr;
79 break;
80 }
81
82 /* there should be CRLF after the chunk size, but we'll ignore SP+ too */
83 if (*n_ptr && !eol_match(&n_ptr, &eol_len)) {
84 if (eol_len == 2) {
85 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected CRLF at pos %tu of %zu but got 0x%02X 0x%02X", n_ptr - encoded, encoded_len, *n_ptr, *(n_ptr + 1));
86 } else {
87 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected LF at pos %tu of %zu but got 0x%02X", n_ptr - encoded, encoded_len, *n_ptr);
88 }
89 }
90 n_ptr += eol_len;
91
92 /* chunk size pretends more data than we actually got, so it's probably a truncated message */
93 if (chunk_len > (rest = encoded + encoded_len - n_ptr)) {
94 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Truncated message: chunk size %lu exceeds remaining data size %lu at pos %tu of %zu", chunk_len, rest, n_ptr - encoded, encoded_len);
95 chunk_len = rest;
96 }
97
98 /* copy the chunk */
99 memcpy(*decoded + *decoded_len, n_ptr, chunk_len);
100 *decoded_len += chunk_len;
101
102 if (chunk_len == rest) {
103 e_ptr = n_ptr + chunk_len;
104 break;
105 } else {
106 /* advance to next chunk */
107 e_ptr = n_ptr + chunk_len + eol_len;
108 }
109 }
110
111 return e_ptr;
112 }
113 /* }}} */
114
115 #ifdef HTTP_HAVE_ZLIB
116
117 static const char http_encoding_gzip_header[] = {
118 (const char) 0x1f, // fixed value
119 (const char) 0x8b, // fixed value
120 (const char) Z_DEFLATED, // compression algorithm
121 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
122 (const char) 0, // MTIME
123 (const char) 0, // =*=
124 (const char) 0, // =*=
125 (const char) 0, // =*=
126 (const char) 0, // two possible flag values for 9 compression levels? o_O
127 #ifdef PHP_WIN32
128 (const char) 0x0b // OS_CODE
129 #else
130 (const char) 0x03 // OS_CODE
131 #endif
132 };
133
134 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, int mtime, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
135 {
136 z_stream Z;
137 STATUS status = Z_OK;
138
139 if (!(data && data_len)) {
140 return FAILURE;
141 }
142
143 Z.zalloc = Z_NULL;
144 Z.zfree = Z_NULL;
145 Z.opaque = Z_NULL;
146 Z.next_in = (Bytef *) data;
147 Z.avail_in = data_len;
148 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) + HTTP_ENCODING_SAFPAD - 1;
149
150 *encoded = emalloc(HTTP_ENCODING_BUFLEN(data_len) + sizeof(http_encoding_gzip_header) + HTTP_ENCODING_SAFPAD);
151 memcpy(*encoded, http_encoding_gzip_header, sizeof(http_encoding_gzip_header));
152
153 if (mtime) {
154 (*encoded)[4] = (char) (mtime & 0xFF);
155 (*encoded)[5] = (char) ((mtime >> 8) & 0xFF);
156 (*encoded)[6] = (char) ((mtime >> 16) & 0xFF);
157 (*encoded)[7] = (char) ((mtime >> 24) & 0xFF);
158 }
159
160 Z.next_out = (Bytef *) *encoded + sizeof(http_encoding_gzip_header);
161
162 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
163 status = deflate(&Z, Z_FINISH);
164 deflateEnd(&Z);
165
166 if (Z_STREAM_END == status) {
167 ulong crc;
168 char *trailer;
169
170 crc = crc32(0L, Z_NULL, 0);
171 crc = crc32(crc, (const Bytef *) data, data_len);
172
173 trailer = *encoded + sizeof(http_encoding_gzip_header) + Z.total_out;
174
175 /* LSB */
176 trailer[0] = (char) (crc & 0xFF);
177 trailer[1] = (char) ((crc >> 8) & 0xFF);
178 trailer[2] = (char) ((crc >> 16) & 0xFF);
179 trailer[3] = (char) ((crc >> 24) & 0xFF);
180 trailer[4] = (char) ((Z.total_in) & 0xFF);
181 trailer[5] = (char) ((Z.total_in >> 8) & 0xFF);
182 trailer[6] = (char) ((Z.total_in >> 16) & 0xFF);
183 trailer[7] = (char) ((Z.total_in >> 24) & 0xFF);
184
185 *encoded_len = Z.total_out + sizeof(http_encoding_gzip_header) + 8;
186 (*encoded)[*encoded_len] = '\0';
187 return SUCCESS;
188 }
189 }
190
191 efree(*encoded);
192 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not gzencode data: %s", zError(status));
193 return FAILURE;
194 }
195
196 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
197 {
198 const char *encoded;
199 size_t encoded_len;
200
201 if ( (data && data_len) &&
202 (SUCCESS == http_encoding_gzencode_verify(data, data_len, &encoded, &encoded_len)) &&
203 (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
204 http_encoding_gzdecode_verify(data, data_len, *decoded, *decoded_len);
205 return SUCCESS;
206 }
207
208 return FAILURE;
209 }
210
211 PHP_HTTP_API STATUS _http_encoding_deflate(int level, int zhdr, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
212 {
213 z_stream Z;
214 STATUS status = Z_OK;
215
216 Z.zalloc = Z_NULL;
217 Z.zfree = Z_NULL;
218 Z.opaque = Z_NULL;
219 Z.data_type = Z_UNKNOWN;
220 Z.next_in = (Bytef *) data;
221 Z.avail_in = data_len;
222 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) - 1;
223 Z.next_out = emalloc(HTTP_ENCODING_BUFLEN(data_len));
224
225 *encoded = (char *) Z.next_out;
226
227 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, zhdr ? MAX_WBITS : -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
228 status = deflate(&Z, Z_FINISH);
229 deflateEnd(&Z);
230
231 if (Z_STREAM_END == status) {
232 (*encoded)[*encoded_len = Z.total_out] = '\0';
233 return SUCCESS;
234 }
235 }
236
237 efree(encoded);
238 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not deflate data: %s", zError(status));
239 return FAILURE;
240 }
241
242 PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
243 {
244 int max = 0, wbits = -MAX_WBITS;
245 STATUS status;
246 z_stream Z;
247
248 *decoded = NULL;
249 *decoded_len = 0;
250
251 retry_inflate:
252 do {
253 Z.zalloc = Z_NULL;
254 Z.zfree = Z_NULL;
255
256 if (!max) {
257 if (!*decoded) {
258 *decoded_len = data_len * 2;
259 *decoded = emalloc(*decoded_len + 1);
260 }
261 } else {
262 size_t new_len = *decoded_len << 2;
263 char *new_ptr = erealloc_recoverable(*decoded, new_len + 1);
264
265 if (new_ptr) {
266 *decoded = new_ptr;
267 *decoded_len = new_len;
268 } else {
269 max = INT_MAX-1; /* avoid integer overflow on increment op */
270 }
271 }
272
273 Z.next_in = (Bytef *) data;
274 Z.avail_in = data_len;
275 Z.next_out = (Bytef *) *decoded;
276 Z.avail_out = *decoded_len;
277
278 if (Z_OK == (status = inflateInit2(&Z, wbits))) {
279 status = inflate(&Z, Z_FINISH);
280 inflateEnd(&Z);
281
282 /* retry if it looks like we've got a zlib header */
283 if (wbits == -MAX_WBITS && status == Z_DATA_ERROR) {
284 wbits = MAX_WBITS;
285 goto retry_inflate;
286 }
287
288 if (Z_STREAM_END == status) {
289 (*decoded)[*decoded_len = Z.total_out] = '\0';
290 return SUCCESS;
291 }
292 }
293 } while (status == Z_BUF_ERROR && ++max < HTTP_ENCODING_MAXTRY);
294
295 efree(*decoded);
296 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
297 return FAILURE;
298 }
299
300 PHP_HTTP_API STATUS _http_encoding_gzencode_verify(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
301 {
302 size_t offset = sizeof(http_encoding_gzip_header);
303
304 if (data_len < offset) {
305 goto really_bad_gzip_header;
306 }
307
308 if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
309 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
310 return FAILURE;
311 }
312
313 if (data[2] != (const char) Z_DEFLATED) {
314 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
315 /* still try to decode */
316 }
317 if ((data[3] & 0x4) == 0x4) {
318 if (data_len < offset + 2) {
319 goto really_bad_gzip_header;
320 }
321 /* there are extra fields, the length follows the common header as 2 bytes LSB */
322 offset += (unsigned) ((data[offset] & 0xFF));
323 offset += 1;
324 offset += (unsigned) ((data[offset] & 0xFF) << 8);
325 offset += 1;
326 }
327 if ((data[3] & 0x8) == 0x8) {
328 if (data_len <= offset) {
329 goto really_bad_gzip_header;
330 }
331 /* there's a file name */
332 offset += strlen(&data[offset]) + 1 /*NUL*/;
333 }
334 if ((data[3] & 0x10) == 0x10) {
335 if (data_len <= offset) {
336 goto really_bad_gzip_header;
337 }
338 /* there's a comment */
339 offset += strlen(&data[offset]) + 1 /* NUL */;
340 }
341 if ((data[3] & 0x2) == 0x2) {
342 /* there's a CRC16 of the header */
343 offset += 2;
344 if (data_len <= offset) {
345 goto really_bad_gzip_header;
346 } else {
347 ulong crc, cmp;
348
349 cmp = (unsigned) ((data[offset-2] & 0xFF));
350 cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
351
352 crc = crc32(0L, Z_NULL, 0);
353 crc = crc32(crc, (const Bytef *) data, sizeof(http_encoding_gzip_header));
354
355 if (cmp != (crc & 0xFFFF)) {
356 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
357 return FAILURE;
358 }
359 }
360 }
361
362 if (data_len < offset + 8) {
363 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
364 return FAILURE;
365 }
366
367 if (encoded) {
368 *encoded = data + offset;
369 }
370 if (encoded_len) {
371 *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;
372 }
373
374 return SUCCESS;
375
376 really_bad_gzip_header:
377 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
378 return FAILURE;
379 }
380
381 PHP_HTTP_API STATUS _http_encoding_gzdecode_verify(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
382 {
383 STATUS status = SUCCESS;
384 ulong len, cmp, crc;
385
386 crc = crc32(0L, Z_NULL, 0);
387 crc = crc32(crc, (const Bytef *) decoded, decoded_len);
388
389 cmp = (unsigned) ((data[data_len-8] & 0xFF));
390 cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
391 cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
392 cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
393 len = (unsigned) ((data[data_len-4] & 0xFF));
394 len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
395 len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
396 len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
397
398 if (cmp != crc) {
399 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
400 status = FAILURE;
401 }
402 if (len != decoded_len) {
403 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
404 status = FAILURE;
405 }
406 return status;
407 }
408
409 #define HTTP_ENCODING_STREAM_ERROR(status, tofree) \
410 { \
411 if (tofree) efree(tofree); \
412 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \
413 return FAILURE; \
414 }
415
416 PHP_HTTP_API STATUS _http_encoding_stream_init(http_encoding_stream *s, int gzip, int level, char **encoded, size_t *encoded_len TSRMLS_DC)
417 {
418 STATUS status;
419
420 memset(s, 0, sizeof(http_encoding_stream));
421 if (Z_OK != (status = deflateInit2(&s->Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
422 HTTP_ENCODING_STREAM_ERROR(status, NULL);
423 }
424
425 if ((s->gzip = gzip)) {
426 s->crc = crc32(0L, Z_NULL, 0);
427 *encoded_len = sizeof(http_encoding_gzip_header);
428 *encoded = emalloc(*encoded_len);
429 memcpy(*encoded, http_encoding_gzip_header, *encoded_len);
430 } else {
431 *encoded_len = 0;
432 *encoded = NULL;
433 }
434
435 return SUCCESS;
436 }
437
438 PHP_HTTP_API STATUS _http_encoding_stream_update(http_encoding_stream *s, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
439 {
440 STATUS status;
441
442 *encoded_len = HTTP_ENCODING_BUFLEN(data_len);
443 *encoded = emalloc(*encoded_len);
444
445 s->Z.next_in = (Bytef *) data;
446 s->Z.avail_in = data_len;
447 s->Z.next_out = (Bytef *) *encoded;
448 s->Z.avail_out = *encoded_len;
449
450 status = deflate(&s->Z, Z_SYNC_FLUSH);
451
452 if (Z_OK != status && Z_STREAM_END != status) {
453 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
454 }
455 *encoded_len -= s->Z.avail_out;
456
457 if (s->gzip) {
458 s->crc = crc32(s->crc, (const Bytef *) data, data_len);
459 }
460
461 return SUCCESS;
462 }
463
464 PHP_HTTP_API STATUS _http_encoding_stream_finish(http_encoding_stream *s, char **encoded, size_t *encoded_len TSRMLS_DC)
465 {
466 STATUS status;
467
468 *encoded_len = 1024;
469 *encoded = emalloc(*encoded_len);
470
471 s->Z.next_out = (Bytef *) *encoded;
472 s->Z.avail_out = *encoded_len;
473
474 if (Z_STREAM_END != (status = deflate(&s->Z, Z_FINISH)) || Z_OK != (status = deflateEnd(&s->Z))) {
475 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
476 }
477
478 *encoded_len -= s->Z.avail_out;
479 if (s->gzip) {
480 if (s->Z.avail_out < 8) {
481 *encoded = erealloc(*encoded, *encoded_len + 8);
482 }
483 (*encoded)[(*encoded_len)++] = (char) (s->crc & 0xFF);
484 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 8) & 0xFF);
485 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 16) & 0xFF);
486 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 24) & 0xFF);
487 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in) & 0xFF);
488 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 8) & 0xFF);
489 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 16) & 0xFF);
490 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 24) & 0xFF);
491 }
492
493 return SUCCESS;
494 }
495
496 #endif /* HTTP_HAVE_ZLIB */
497
498 PHP_HTTP_API zend_bool _http_encoding_response_start(size_t content_length TSRMLS_DC)
499 {
500 if ( php_ob_handler_used("ob_gzhandler" TSRMLS_CC) ||
501 php_ob_handler_used("zlib output compression" TSRMLS_CC)) {
502 HTTP_G(send).gzip_encoding = 0;
503 } else {
504 if (!HTTP_G(send).gzip_encoding) {
505 /* emit a content-length header */
506 if (content_length) {
507 char cl_header_str[128];
508 size_t cl_header_len;
509 cl_header_len = snprintf(cl_header_str, lenof(cl_header_str), "Content-Length: %zu", content_length);
510 http_send_header_string_ex(cl_header_str, cl_header_len, 1);
511 }
512 } else {
513 #ifndef HTTP_HAVE_ZLIB
514 HTTP_G(send).gzip_encoding = 0;
515 php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC);
516 #else
517 HashTable *selected;
518 zval zsupported;
519
520 INIT_PZVAL(&zsupported);
521 array_init(&zsupported);
522 add_next_index_stringl(&zsupported, "gzip", lenof("gzip"), 1);
523 add_next_index_stringl(&zsupported, "deflate", lenof("deflate"), 1);
524
525 HTTP_G(send).gzip_encoding = 0;
526
527 if ((selected = http_negotiate_encoding(&zsupported))) {
528 STATUS hs = FAILURE;
529 char *encoding = NULL;
530 ulong idx;
531
532 if (HASH_KEY_IS_STRING == zend_hash_get_current_key(selected, &encoding, &idx, 0) && encoding) {
533 if (!strcmp(encoding, "gzip")) {
534 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: gzip"))) {
535 HTTP_G(send).gzip_encoding = HTTP_ENCODING_GZIP;
536 }
537 } else if (!strcmp(encoding, "deflate")) {
538 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: deflate"))) {
539 HTTP_G(send).gzip_encoding = HTTP_ENCODING_DEFLATE;
540 }
541 }
542 if (SUCCESS == hs) {
543 http_send_header_string("Vary: Accept-Encoding");
544 }
545 }
546
547 zend_hash_destroy(selected);
548 FREE_HASHTABLE(selected);
549 }
550
551 zval_dtor(&zsupported);
552 return HTTP_G(send).gzip_encoding;
553 #endif
554 }
555 }
556 return 0;
557 }
558
559 /*
560 * Local variables:
561 * tab-width: 4
562 * c-basic-offset: 4
563 * End:
564 * vim600: noet sw=4 ts=4 fdm=marker
565 * vim<600: noet sw=4 ts=4
566 */
567