- fix inclusion of zlib.h
[m6w6/ext-http] / http_encoding_api.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2005, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 /* $Id$ */
14
15 #ifdef HAVE_CONFIG_H
16 # include "config.h"
17 #endif
18
19 #define HTTP_WANT_ZLIB
20 #include "php_http.h"
21
22 #include "php_http_api.h"
23 #include "php_http_encoding_api.h"
24 #include "php_http_send_api.h"
25 #include "php_http_headers_api.h"
26
27 ZEND_EXTERN_MODULE_GLOBALS(http);
28
29 static inline int eol_match(char **line, int *eol_len)
30 {
31 char *ptr = *line;
32
33 while (0x20 == *ptr) ++ptr;
34
35 if (ptr == http_locate_eol(*line, eol_len)) {
36 *line = ptr;
37 return 1;
38 } else {
39 return 0;
40 }
41 }
42
43 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
44 PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC)
45 {
46 int eol_len = 0;
47 char *n_ptr = NULL;
48 const char *e_ptr = encoded;
49
50 *decoded_len = 0;
51 *decoded = ecalloc(1, encoded_len);
52
53 while ((encoded + encoded_len - e_ptr) > 0) {
54 ulong chunk_len = 0, rest;
55
56 chunk_len = strtoul(e_ptr, &n_ptr, 16);
57
58 /* we could not read in chunk size */
59 if (n_ptr == e_ptr) {
60 /*
61 * if this is the first turn and there doesn't seem to be a chunk
62 * size at the begining of the body, do not fail on apparently
63 * not encoded data and return a copy
64 */
65 if (e_ptr == encoded) {
66 http_error(HE_NOTICE, HTTP_E_ENCODING, "Data does not seem to be chunked encoded");
67 memcpy(*decoded, encoded, encoded_len);
68 *decoded_len = encoded_len;
69 return encoded + encoded_len;
70 } else {
71 efree(*decoded);
72 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected chunk size at pos %tu of %zu but got trash", n_ptr - encoded, encoded_len);
73 return NULL;
74 }
75 }
76
77 /* reached the end */
78 if (!chunk_len) {
79 /* move over '0' chunked encoding terminator */
80 while (*e_ptr == '0') ++e_ptr;
81 break;
82 }
83
84 /* there should be CRLF after the chunk size, but we'll ignore SP+ too */
85 if (*n_ptr && !eol_match(&n_ptr, &eol_len)) {
86 if (eol_len == 2) {
87 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected CRLF at pos %tu of %zu but got 0x%02X 0x%02X", n_ptr - encoded, encoded_len, *n_ptr, *(n_ptr + 1));
88 } else {
89 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected LF at pos %tu of %zu but got 0x%02X", n_ptr - encoded, encoded_len, *n_ptr);
90 }
91 }
92 n_ptr += eol_len;
93
94 /* chunk size pretends more data than we actually got, so it's probably a truncated message */
95 if (chunk_len > (rest = encoded + encoded_len - n_ptr)) {
96 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Truncated message: chunk size %lu exceeds remaining data size %lu at pos %tu of %zu", chunk_len, rest, n_ptr - encoded, encoded_len);
97 chunk_len = rest;
98 }
99
100 /* copy the chunk */
101 memcpy(*decoded + *decoded_len, n_ptr, chunk_len);
102 *decoded_len += chunk_len;
103
104 if (chunk_len == rest) {
105 e_ptr = n_ptr + chunk_len;
106 break;
107 } else {
108 /* advance to next chunk */
109 e_ptr = n_ptr + chunk_len + eol_len;
110 }
111 }
112
113 return e_ptr;
114 }
115 /* }}} */
116
117 #ifdef HTTP_HAVE_ZLIB
118
119 static const char http_encoding_gzip_header[] = {
120 (const char) 0x1f, // fixed value
121 (const char) 0x8b, // fixed value
122 (const char) Z_DEFLATED, // compression algorithm
123 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
124 (const char) 0, // MTIME
125 (const char) 0, // =*=
126 (const char) 0, // =*=
127 (const char) 0, // =*=
128 (const char) 0, // two possible flag values for 9 compression levels? o_O
129 #ifdef PHP_WIN32
130 (const char) 0x0b // OS_CODE
131 #else
132 (const char) 0x03 // OS_CODE
133 #endif
134 };
135
136 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, int mtime, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
137 {
138 z_stream Z;
139 STATUS status = Z_OK;
140
141 if (!(data && data_len)) {
142 return FAILURE;
143 }
144
145 *encoded = NULL;
146 *encoded_len = 0;
147 memset(&Z, 0, sizeof(z_stream));
148
149 Z.next_in = (Bytef *) data;
150 Z.avail_in = data_len;
151 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) + HTTP_ENCODING_SAFPAD - 1;
152
153 *encoded = emalloc(HTTP_ENCODING_BUFLEN(data_len) + sizeof(http_encoding_gzip_header) + HTTP_ENCODING_SAFPAD);
154 memcpy(*encoded, http_encoding_gzip_header, sizeof(http_encoding_gzip_header));
155
156 if (mtime) {
157 (*encoded)[4] = (char) (mtime & 0xFF);
158 (*encoded)[5] = (char) ((mtime >> 8) & 0xFF);
159 (*encoded)[6] = (char) ((mtime >> 16) & 0xFF);
160 (*encoded)[7] = (char) ((mtime >> 24) & 0xFF);
161 }
162
163 Z.next_out = (Bytef *) *encoded + sizeof(http_encoding_gzip_header);
164
165 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
166 status = deflate(&Z, Z_FINISH);
167 deflateEnd(&Z);
168
169 if (Z_STREAM_END == status) {
170 ulong crc;
171 char *trailer;
172
173 crc = crc32(0L, Z_NULL, 0);
174 crc = crc32(crc, (const Bytef *) data, data_len);
175
176 trailer = *encoded + sizeof(http_encoding_gzip_header) + Z.total_out;
177
178 /* LSB */
179 trailer[0] = (char) (crc & 0xFF);
180 trailer[1] = (char) ((crc >> 8) & 0xFF);
181 trailer[2] = (char) ((crc >> 16) & 0xFF);
182 trailer[3] = (char) ((crc >> 24) & 0xFF);
183 trailer[4] = (char) ((Z.total_in) & 0xFF);
184 trailer[5] = (char) ((Z.total_in >> 8) & 0xFF);
185 trailer[6] = (char) ((Z.total_in >> 16) & 0xFF);
186 trailer[7] = (char) ((Z.total_in >> 24) & 0xFF);
187
188 *encoded_len = Z.total_out + sizeof(http_encoding_gzip_header) + 8;
189 (*encoded)[*encoded_len] = '\0';
190 return SUCCESS;
191 }
192 }
193
194 STR_SET(*encoded, NULL);
195 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not gzencode data: %s", zError(status));
196 return FAILURE;
197 }
198
199 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
200 {
201 const char *encoded;
202 size_t encoded_len;
203
204 if ( (data && data_len) &&
205 (SUCCESS == http_encoding_gzencode_verify(data, data_len, &encoded, &encoded_len)) &&
206 (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
207 http_encoding_gzdecode_verify(data, data_len, *decoded, *decoded_len);
208 return SUCCESS;
209 }
210
211 return FAILURE;
212 }
213
214 PHP_HTTP_API STATUS _http_encoding_deflate(int level, int zhdr, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
215 {
216 z_stream Z;
217 STATUS status = Z_OK;
218
219 *encoded = NULL;
220 *encoded_len = 0;
221 memset(&Z, 0, sizeof(z_stream));
222
223 Z.data_type = Z_UNKNOWN;
224 Z.next_in = (Bytef *) data;
225 Z.avail_in = data_len;
226 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) - 1;
227 Z.next_out = emalloc(HTTP_ENCODING_BUFLEN(data_len));
228
229 *encoded = (char *) Z.next_out;
230
231 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, zhdr ? MAX_WBITS : -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
232 status = deflate(&Z, Z_FINISH);
233 deflateEnd(&Z);
234
235 if (Z_STREAM_END == status) {
236 (*encoded)[*encoded_len = Z.total_out] = '\0';
237 return SUCCESS;
238 }
239 }
240
241 STR_SET(*encoded, NULL);
242 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not deflate data: %s", zError(status));
243 return FAILURE;
244 }
245
246 PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
247 {
248 int max = 0, wbits = -MAX_WBITS;
249 STATUS status;
250 z_stream Z;
251
252 *decoded = NULL;
253 *decoded_len = 0;
254 memset(&Z, 0, sizeof(z_stream));
255
256 do {
257 if (!max) {
258 *decoded_len = data_len * 2;
259 *decoded = emalloc(*decoded_len + 1);
260 } else {
261 size_t new_len = *decoded_len << 2;
262 char *new_ptr = erealloc_recoverable(*decoded, new_len + 1);
263
264 if (new_ptr) {
265 *decoded = new_ptr;
266 *decoded_len = new_len;
267 } else {
268 max = INT_MAX-1; /* avoid integer overflow on increment op */
269 }
270 }
271
272 retry_inflate:
273 Z.next_in = (Bytef *) data;
274 Z.avail_in = data_len;
275 Z.next_out = (Bytef *) *decoded;
276 Z.avail_out = *decoded_len;
277
278 if (Z_OK == (status = inflateInit2(&Z, wbits))) {
279 status = inflate(&Z, Z_FINISH);
280 inflateEnd(&Z);
281
282 /* retry if it looks like we've got a zlib header */
283 if (wbits == -MAX_WBITS && status == Z_DATA_ERROR) {
284 wbits = MAX_WBITS;
285 goto retry_inflate;
286 }
287
288 if (Z_STREAM_END == status) {
289 (*decoded)[*decoded_len = Z.total_out] = '\0';
290 return SUCCESS;
291 }
292 }
293 } while (status == Z_BUF_ERROR && ++max < HTTP_ENCODING_MAXTRY);
294
295 STR_SET(*decoded, NULL);
296 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
297 return FAILURE;
298 }
299
300 PHP_HTTP_API STATUS _http_encoding_gzencode_verify(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
301 {
302 size_t offset = sizeof(http_encoding_gzip_header);
303
304 if (data_len < offset) {
305 goto really_bad_gzip_header;
306 }
307
308 if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
309 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
310 return FAILURE;
311 }
312
313 if (data[2] != (const char) Z_DEFLATED) {
314 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
315 /* still try to decode */
316 }
317 if ((data[3] & 0x4) == 0x4) {
318 if (data_len < offset + 2) {
319 goto really_bad_gzip_header;
320 }
321 /* there are extra fields, the length follows the common header as 2 bytes LSB */
322 offset += (unsigned) ((data[offset] & 0xFF));
323 offset += 1;
324 offset += (unsigned) ((data[offset] & 0xFF) << 8);
325 offset += 1;
326 }
327 if ((data[3] & 0x8) == 0x8) {
328 if (data_len <= offset) {
329 goto really_bad_gzip_header;
330 }
331 /* there's a file name */
332 offset += strlen(&data[offset]) + 1 /*NUL*/;
333 }
334 if ((data[3] & 0x10) == 0x10) {
335 if (data_len <= offset) {
336 goto really_bad_gzip_header;
337 }
338 /* there's a comment */
339 offset += strlen(&data[offset]) + 1 /* NUL */;
340 }
341 if ((data[3] & 0x2) == 0x2) {
342 /* there's a CRC16 of the header */
343 offset += 2;
344 if (data_len <= offset) {
345 goto really_bad_gzip_header;
346 } else {
347 ulong crc, cmp;
348
349 cmp = (unsigned) ((data[offset-2] & 0xFF));
350 cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
351
352 crc = crc32(0L, Z_NULL, 0);
353 crc = crc32(crc, (const Bytef *) data, sizeof(http_encoding_gzip_header));
354
355 if (cmp != (crc & 0xFFFF)) {
356 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
357 return FAILURE;
358 }
359 }
360 }
361
362 if (data_len < offset + 8) {
363 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
364 return FAILURE;
365 }
366
367 if (encoded) {
368 *encoded = data + offset;
369 }
370 if (encoded_len) {
371 *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;
372 }
373
374 return SUCCESS;
375
376 really_bad_gzip_header:
377 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
378 return FAILURE;
379 }
380
381 PHP_HTTP_API STATUS _http_encoding_gzdecode_verify(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
382 {
383 STATUS status = SUCCESS;
384 ulong len, cmp, crc;
385
386 crc = crc32(0L, Z_NULL, 0);
387 crc = crc32(crc, (const Bytef *) decoded, decoded_len);
388
389 cmp = (unsigned) ((data[data_len-8] & 0xFF));
390 cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
391 cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
392 cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
393 len = (unsigned) ((data[data_len-4] & 0xFF));
394 len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
395 len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
396 len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
397
398 if (cmp != crc) {
399 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
400 status = FAILURE;
401 }
402 if (len != decoded_len) {
403 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
404 status = FAILURE;
405 }
406 return status;
407 }
408
409 #define HTTP_ENCODING_STREAM_ERROR(status, tofree) \
410 { \
411 if (tofree) efree(tofree); \
412 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \
413 return FAILURE; \
414 }
415
416 PHP_HTTP_API STATUS _http_encoding_stream_init(http_encoding_stream *s, int flags, int level, char **encoded, size_t *encoded_len TSRMLS_DC)
417 {
418 STATUS status;
419 int wbits = (flags & HTTP_ENCODING_STREAM_ZLIB_HEADER) ? MAX_WBITS : -MAX_WBITS;
420
421 memset(s, 0, sizeof(http_encoding_stream));
422 if (Z_OK != (status = deflateInit2(&s->Z, level, Z_DEFLATED, wbits, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
423 HTTP_ENCODING_STREAM_ERROR(status, NULL);
424 }
425
426 s->persistent = (flags & HTTP_ENCODING_STREAM_PERSISTENT);
427 if ((s->gzip = (flags & HTTP_ENCODING_STREAM_GZIP_HEADER))) {
428 s->crc = crc32(0L, Z_NULL, 0);
429 *encoded_len = sizeof(http_encoding_gzip_header);
430 *encoded = pemalloc(*encoded_len, s->persistent);
431 memcpy(*encoded, http_encoding_gzip_header, *encoded_len);
432 } else {
433 *encoded_len = 0;
434 *encoded = NULL;
435 }
436
437 return SUCCESS;
438 }
439
440 PHP_HTTP_API STATUS _http_encoding_stream_update(http_encoding_stream *s, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
441 {
442 STATUS status;
443
444 *encoded_len = HTTP_ENCODING_BUFLEN(data_len);
445 *encoded = pemalloc(*encoded_len, s->persistent);
446
447 s->Z.next_in = (Bytef *) data;
448 s->Z.avail_in = data_len;
449 s->Z.next_out = (Bytef *) *encoded;
450 s->Z.avail_out = *encoded_len;
451
452 status = deflate(&s->Z, Z_SYNC_FLUSH);
453
454 if (Z_OK != status && Z_STREAM_END != status) {
455 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
456 }
457 *encoded_len -= s->Z.avail_out;
458
459 if (s->gzip) {
460 s->crc = crc32(s->crc, (const Bytef *) data, data_len);
461 }
462
463 return SUCCESS;
464 }
465
466 PHP_HTTP_API STATUS _http_encoding_stream_finish(http_encoding_stream *s, char **encoded, size_t *encoded_len TSRMLS_DC)
467 {
468 STATUS status;
469
470 *encoded_len = 1024;
471 *encoded = pemalloc(*encoded_len, s->persistent);
472
473 s->Z.next_out = (Bytef *) *encoded;
474 s->Z.avail_out = *encoded_len;
475
476 if (Z_STREAM_END != (status = deflate(&s->Z, Z_FINISH)) || Z_OK != (status = deflateEnd(&s->Z))) {
477 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
478 }
479
480 *encoded_len -= s->Z.avail_out;
481 if (s->gzip) {
482 if (s->Z.avail_out < 8) {
483 *encoded = perealloc(*encoded, *encoded_len + 8, s->persistent);
484 }
485 (*encoded)[(*encoded_len)++] = (char) (s->crc & 0xFF);
486 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 8) & 0xFF);
487 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 16) & 0xFF);
488 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 24) & 0xFF);
489 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in) & 0xFF);
490 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 8) & 0xFF);
491 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 16) & 0xFF);
492 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 24) & 0xFF);
493 }
494
495 return SUCCESS;
496 }
497
498 #endif /* HTTP_HAVE_ZLIB */
499
500 PHP_HTTP_API zend_bool _http_encoding_response_start(size_t content_length TSRMLS_DC)
501 {
502 if ( php_ob_handler_used("ob_gzhandler" TSRMLS_CC) ||
503 php_ob_handler_used("zlib output compression" TSRMLS_CC)) {
504 HTTP_G(send).gzip_encoding = 0;
505 } else {
506 if (!HTTP_G(send).gzip_encoding) {
507 /* emit a content-length header */
508 if (content_length) {
509 char cl_header_str[128];
510 size_t cl_header_len;
511 cl_header_len = snprintf(cl_header_str, lenof(cl_header_str), "Content-Length: %zu", content_length);
512 http_send_header_string_ex(cl_header_str, cl_header_len, 1);
513 }
514 } else {
515 #ifndef HTTP_HAVE_ZLIB
516 HTTP_G(send).gzip_encoding = 0;
517 php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC);
518 #else
519 HashTable *selected;
520 zval zsupported;
521
522 INIT_PZVAL(&zsupported);
523 array_init(&zsupported);
524 add_next_index_stringl(&zsupported, "gzip", lenof("gzip"), 1);
525 add_next_index_stringl(&zsupported, "x-gzip", lenof("x-gzip"), 1);
526 add_next_index_stringl(&zsupported, "deflate", lenof("deflate"), 1);
527
528 HTTP_G(send).gzip_encoding = 0;
529
530 if ((selected = http_negotiate_encoding(&zsupported))) {
531 STATUS hs = FAILURE;
532 char *encoding = NULL;
533 ulong idx;
534
535 if (HASH_KEY_IS_STRING == zend_hash_get_current_key(selected, &encoding, &idx, 0) && encoding) {
536 if (!strcmp(encoding, "gzip") || !strcmp(encoding, "x-gzip")) {
537 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: gzip"))) {
538 HTTP_G(send).gzip_encoding = HTTP_ENCODING_GZIP;
539 }
540 } else if (!strcmp(encoding, "deflate")) {
541 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: deflate"))) {
542 HTTP_G(send).gzip_encoding = HTTP_ENCODING_DEFLATE;
543 }
544 }
545 if (SUCCESS == hs) {
546 http_send_header_string("Vary: Accept-Encoding");
547 }
548 }
549
550 zend_hash_destroy(selected);
551 FREE_HASHTABLE(selected);
552 }
553
554 zval_dtor(&zsupported);
555 return HTTP_G(send).gzip_encoding;
556 #endif
557 }
558 }
559 return 0;
560 }
561
562 /*
563 * Local variables:
564 * tab-width: 4
565 * c-basic-offset: 4
566 * End:
567 * vim600: noet sw=4 ts=4 fdm=marker
568 * vim<600: noet sw=4 ts=4
569 */
570