- clean up
[m6w6/ext-http] / http_encoding_api.c
1 /*
2 +--------------------------------------------------------------------+
3 | PECL :: http |
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2005, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
11 */
12
13 /* $Id$ */
14
15 #ifdef HAVE_CONFIG_H
16 # include "config.h"
17 #endif
18 #include "php_http.h"
19
20 #include "php_http_api.h"
21 #include "php_http_encoding_api.h"
22 #include "php_http_send_api.h"
23 #include "php_http_headers_api.h"
24
25 ZEND_EXTERN_MODULE_GLOBALS(http);
26
27 static inline int eol_match(char **line, int *eol_len)
28 {
29 char *ptr = *line;
30
31 while (0x20 == *ptr) ++ptr;
32
33 if (ptr == http_locate_eol(*line, eol_len)) {
34 *line = ptr;
35 return 1;
36 } else {
37 return 0;
38 }
39 }
40
41 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
42 PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC)
43 {
44 int eol_len = 0;
45 char *n_ptr = NULL;
46 const char *e_ptr = encoded;
47
48 *decoded_len = 0;
49 *decoded = ecalloc(1, encoded_len);
50
51 while ((encoded + encoded_len - e_ptr) > 0) {
52 ulong chunk_len = 0, rest;
53
54 chunk_len = strtoul(e_ptr, &n_ptr, 16);
55
56 /* we could not read in chunk size */
57 if (n_ptr == e_ptr) {
58 /*
59 * if this is the first turn and there doesn't seem to be a chunk
60 * size at the begining of the body, do not fail on apparently
61 * not encoded data and return a copy
62 */
63 if (e_ptr == encoded) {
64 http_error(HE_NOTICE, HTTP_E_ENCODING, "Data does not seem to be chunked encoded");
65 memcpy(*decoded, encoded, encoded_len);
66 *decoded_len = encoded_len;
67 return encoded + encoded_len;
68 } else {
69 efree(*decoded);
70 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected chunk size at pos %tu of %zu but got trash", n_ptr - encoded, encoded_len);
71 return NULL;
72 }
73 }
74
75 /* reached the end */
76 if (!chunk_len) {
77 /* move over '0' chunked encoding terminator */
78 while (*e_ptr == '0') ++e_ptr;
79 break;
80 }
81
82 /* there should be CRLF after the chunk size, but we'll ignore SP+ too */
83 if (*n_ptr && !eol_match(&n_ptr, &eol_len)) {
84 if (eol_len == 2) {
85 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected CRLF at pos %tu of %zu but got 0x%02X 0x%02X", n_ptr - encoded, encoded_len, *n_ptr, *(n_ptr + 1));
86 } else {
87 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected LF at pos %tu of %zu but got 0x%02X", n_ptr - encoded, encoded_len, *n_ptr);
88 }
89 }
90 n_ptr += eol_len;
91
92 /* chunk size pretends more data than we actually got, so it's probably a truncated message */
93 if (chunk_len > (rest = encoded + encoded_len - n_ptr)) {
94 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Truncated message: chunk size %lu exceeds remaining data size %lu at pos %tu of %zu", chunk_len, rest, n_ptr - encoded, encoded_len);
95 chunk_len = rest;
96 }
97
98 /* copy the chunk */
99 memcpy(*decoded + *decoded_len, n_ptr, chunk_len);
100 *decoded_len += chunk_len;
101
102 if (chunk_len == rest) {
103 e_ptr = n_ptr + chunk_len;
104 break;
105 } else {
106 /* advance to next chunk */
107 e_ptr = n_ptr + chunk_len + eol_len;
108 }
109 }
110
111 return e_ptr;
112 }
113 /* }}} */
114
115 #ifdef HTTP_HAVE_ZLIB
116
117 static const char http_encoding_gzip_header[] = {
118 (const char) 0x1f, // fixed value
119 (const char) 0x8b, // fixed value
120 (const char) Z_DEFLATED, // compression algorithm
121 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
122 (const char) 0, // MTIME
123 (const char) 0, // =*=
124 (const char) 0, // =*=
125 (const char) 0, // =*=
126 (const char) 0, // two possible flag values for 9 compression levels? o_O
127 #ifdef PHP_WIN32
128 (const char) 0x0b // OS_CODE
129 #else
130 (const char) 0x03 // OS_CODE
131 #endif
132 };
133
134 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, int mtime, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
135 {
136 z_stream Z;
137 STATUS status = Z_OK;
138
139 if (!(data && data_len)) {
140 return FAILURE;
141 }
142
143 Z.zalloc = Z_NULL;
144 Z.zfree = Z_NULL;
145 Z.opaque = Z_NULL;
146 Z.next_in = (Bytef *) data;
147 Z.avail_in = data_len;
148 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) + HTTP_ENCODING_SAFPAD - 1;
149
150 *encoded = emalloc(HTTP_ENCODING_BUFLEN(data_len) + sizeof(http_encoding_gzip_header) + HTTP_ENCODING_SAFPAD);
151 memcpy(*encoded, http_encoding_gzip_header, sizeof(http_encoding_gzip_header));
152
153 if (mtime) {
154 (*encoded)[4] = (char) (mtime & 0xFF);
155 (*encoded)[5] = (char) ((mtime >> 8) & 0xFF);
156 (*encoded)[6] = (char) ((mtime >> 16) & 0xFF);
157 (*encoded)[7] = (char) ((mtime >> 24) & 0xFF);
158 }
159
160 Z.next_out = (Bytef *) *encoded + sizeof(http_encoding_gzip_header);
161
162 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
163 status = deflate(&Z, Z_FINISH);
164 deflateEnd(&Z);
165
166 if (Z_STREAM_END == status) {
167 ulong crc;
168 char *trailer;
169
170 crc = crc32(0L, Z_NULL, 0);
171 crc = crc32(crc, (const Bytef *) data, data_len);
172
173 trailer = *encoded + sizeof(http_encoding_gzip_header) + Z.total_out;
174
175 /* LSB */
176 trailer[0] = (char) (crc & 0xFF);
177 trailer[1] = (char) ((crc >> 8) & 0xFF);
178 trailer[2] = (char) ((crc >> 16) & 0xFF);
179 trailer[3] = (char) ((crc >> 24) & 0xFF);
180 trailer[4] = (char) ((Z.total_in) & 0xFF);
181 trailer[5] = (char) ((Z.total_in >> 8) & 0xFF);
182 trailer[6] = (char) ((Z.total_in >> 16) & 0xFF);
183 trailer[7] = (char) ((Z.total_in >> 24) & 0xFF);
184
185 *encoded_len = Z.total_out + sizeof(http_encoding_gzip_header) + 8;
186 (*encoded)[*encoded_len] = '\0';
187 return SUCCESS;
188 }
189 }
190
191 efree(*encoded);
192 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not gzencode data: %s", zError(status));
193 return FAILURE;
194 }
195
196 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
197 {
198 const char *encoded;
199 size_t encoded_len;
200
201 if ( (data && data_len) &&
202 (SUCCESS == http_encoding_gzencode_verify(data, data_len, &encoded, &encoded_len)) &&
203 (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
204 http_encoding_gzdecode_verify(data, data_len, *decoded, *decoded_len);
205 return SUCCESS;
206 }
207
208 return FAILURE;
209 }
210
211 PHP_HTTP_API STATUS _http_encoding_deflate(int level, int zhdr, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
212 {
213 z_stream Z;
214 STATUS status = Z_OK;
215
216 Z.zalloc = Z_NULL;
217 Z.zfree = Z_NULL;
218 Z.opaque = Z_NULL;
219 Z.data_type = Z_UNKNOWN;
220 Z.next_in = (Bytef *) data;
221 Z.avail_in = data_len;
222 Z.avail_out = HTTP_ENCODING_BUFLEN(data_len) - 1;
223 Z.next_out = emalloc(HTTP_ENCODING_BUFLEN(data_len));
224
225 *encoded = (char *) Z.next_out;
226
227 if (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, zhdr ? MAX_WBITS : -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
228 status = deflate(&Z, Z_FINISH);
229 deflateEnd(&Z);
230
231 if (Z_STREAM_END == status) {
232 (*encoded)[*encoded_len = Z.total_out] = '\0';
233 return SUCCESS;
234 }
235 }
236
237 efree(encoded);
238 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not deflate data: %s", zError(status));
239 return FAILURE;
240 }
241
242 PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
243 {
244 int max = 0, wbits = -MAX_WBITS;
245 STATUS status;
246 z_stream Z;
247
248 do {
249 Z.zalloc = Z_NULL;
250 Z.zfree = Z_NULL;
251
252 if (!max) {
253 *decoded_len = data_len * 2;
254 *decoded = emalloc(*decoded_len + 1);
255 } else {
256 size_t new_len = *decoded_len << 2;
257 char *new_ptr = erealloc_recoverable(*decoded, new_len + 1);
258
259 if (new_ptr) {
260 *decoded = new_ptr;
261 *decoded_len = new_len;
262 } else {
263 max = INT_MAX-1; /* avoid integer overflow on increment op */
264 }
265 }
266
267 retry_inflate:
268 Z.next_in = (Bytef *) data;
269 Z.avail_in = data_len;
270 Z.next_out = (Bytef *) *decoded;
271 Z.avail_out = *decoded_len;
272
273 if (Z_OK == (status = inflateInit2(&Z, wbits))) {
274 status = inflate(&Z, Z_FINISH);
275 inflateEnd(&Z);
276
277 /* retry if it looks like we've got a zlib header */
278 if (wbits == -MAX_WBITS && status == Z_DATA_ERROR) {
279 wbits = MAX_WBITS;
280 goto retry_inflate;
281 }
282
283 if (Z_STREAM_END == status) {
284 (*decoded)[*decoded_len = Z.total_out] = '\0';
285 return SUCCESS;
286 }
287 }
288 } while (status == Z_BUF_ERROR && ++max < HTTP_ENCODING_MAXTRY);
289
290 efree(*decoded);
291 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
292 return FAILURE;
293 }
294
295 PHP_HTTP_API STATUS _http_encoding_gzencode_verify(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
296 {
297 size_t offset = sizeof(http_encoding_gzip_header);
298
299 if (data_len < offset) {
300 goto really_bad_gzip_header;
301 }
302
303 if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
304 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
305 return FAILURE;
306 }
307
308 if (data[2] != (const char) Z_DEFLATED) {
309 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
310 /* still try to decode */
311 }
312 if ((data[3] & 0x4) == 0x4) {
313 if (data_len < offset + 2) {
314 goto really_bad_gzip_header;
315 }
316 /* there are extra fields, the length follows the common header as 2 bytes LSB */
317 offset += (unsigned) ((data[offset] & 0xFF));
318 offset += 1;
319 offset += (unsigned) ((data[offset] & 0xFF) << 8);
320 offset += 1;
321 }
322 if ((data[3] & 0x8) == 0x8) {
323 if (data_len <= offset) {
324 goto really_bad_gzip_header;
325 }
326 /* there's a file name */
327 offset += strlen(&data[offset]) + 1 /*NUL*/;
328 }
329 if ((data[3] & 0x10) == 0x10) {
330 if (data_len <= offset) {
331 goto really_bad_gzip_header;
332 }
333 /* there's a comment */
334 offset += strlen(&data[offset]) + 1 /* NUL */;
335 }
336 if ((data[3] & 0x2) == 0x2) {
337 /* there's a CRC16 of the header */
338 offset += 2;
339 if (data_len <= offset) {
340 goto really_bad_gzip_header;
341 } else {
342 ulong crc, cmp;
343
344 cmp = (unsigned) ((data[offset-2] & 0xFF));
345 cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
346
347 crc = crc32(0L, Z_NULL, 0);
348 crc = crc32(crc, (const Bytef *) data, sizeof(http_encoding_gzip_header));
349
350 if (cmp != (crc & 0xFFFF)) {
351 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
352 return FAILURE;
353 }
354 }
355 }
356
357 if (data_len < offset + 8) {
358 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
359 return FAILURE;
360 }
361
362 if (encoded) {
363 *encoded = data + offset;
364 }
365 if (encoded_len) {
366 *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;
367 }
368
369 return SUCCESS;
370
371 really_bad_gzip_header:
372 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
373 return FAILURE;
374 }
375
376 PHP_HTTP_API STATUS _http_encoding_gzdecode_verify(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
377 {
378 STATUS status = SUCCESS;
379 ulong len, cmp, crc;
380
381 crc = crc32(0L, Z_NULL, 0);
382 crc = crc32(crc, (const Bytef *) decoded, decoded_len);
383
384 cmp = (unsigned) ((data[data_len-8] & 0xFF));
385 cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
386 cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
387 cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
388 len = (unsigned) ((data[data_len-4] & 0xFF));
389 len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
390 len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
391 len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
392
393 if (cmp != crc) {
394 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
395 status = FAILURE;
396 }
397 if (len != decoded_len) {
398 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
399 status = FAILURE;
400 }
401 return status;
402 }
403
404 #define HTTP_ENCODING_STREAM_ERROR(status, tofree) \
405 { \
406 if (tofree) efree(tofree); \
407 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \
408 return FAILURE; \
409 }
410
411 PHP_HTTP_API STATUS _http_encoding_stream_init(http_encoding_stream *s, int gzip, int level, char **encoded, size_t *encoded_len TSRMLS_DC)
412 {
413 STATUS status;
414
415 memset(s, 0, sizeof(http_encoding_stream));
416 if (Z_OK != (status = deflateInit2(&s->Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) {
417 HTTP_ENCODING_STREAM_ERROR(status, NULL);
418 }
419
420 if ((s->gzip = gzip)) {
421 s->crc = crc32(0L, Z_NULL, 0);
422 *encoded_len = sizeof(http_encoding_gzip_header);
423 *encoded = emalloc(*encoded_len);
424 memcpy(*encoded, http_encoding_gzip_header, *encoded_len);
425 } else {
426 *encoded_len = 0;
427 *encoded = NULL;
428 }
429
430 return SUCCESS;
431 }
432
433 PHP_HTTP_API STATUS _http_encoding_stream_update(http_encoding_stream *s, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
434 {
435 STATUS status;
436
437 *encoded_len = HTTP_ENCODING_BUFLEN(data_len);
438 *encoded = emalloc(*encoded_len);
439
440 s->Z.next_in = (Bytef *) data;
441 s->Z.avail_in = data_len;
442 s->Z.next_out = (Bytef *) *encoded;
443 s->Z.avail_out = *encoded_len;
444
445 status = deflate(&s->Z, Z_SYNC_FLUSH);
446
447 if (Z_OK != status && Z_STREAM_END != status) {
448 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
449 }
450 *encoded_len -= s->Z.avail_out;
451
452 if (s->gzip) {
453 s->crc = crc32(s->crc, (const Bytef *) data, data_len);
454 }
455
456 return SUCCESS;
457 }
458
459 PHP_HTTP_API STATUS _http_encoding_stream_finish(http_encoding_stream *s, char **encoded, size_t *encoded_len TSRMLS_DC)
460 {
461 STATUS status;
462
463 *encoded_len = 1024;
464 *encoded = emalloc(*encoded_len);
465
466 s->Z.next_out = (Bytef *) *encoded;
467 s->Z.avail_out = *encoded_len;
468
469 if (Z_STREAM_END != (status = deflate(&s->Z, Z_FINISH)) || Z_OK != (status = deflateEnd(&s->Z))) {
470 HTTP_ENCODING_STREAM_ERROR(status, *encoded);
471 }
472
473 *encoded_len -= s->Z.avail_out;
474 if (s->gzip) {
475 if (s->Z.avail_out < 8) {
476 *encoded = erealloc(*encoded, *encoded_len + 8);
477 }
478 (*encoded)[(*encoded_len)++] = (char) (s->crc & 0xFF);
479 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 8) & 0xFF);
480 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 16) & 0xFF);
481 (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 24) & 0xFF);
482 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in) & 0xFF);
483 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 8) & 0xFF);
484 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 16) & 0xFF);
485 (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 24) & 0xFF);
486 }
487
488 return SUCCESS;
489 }
490
491 #endif /* HTTP_HAVE_ZLIB */
492
493 PHP_HTTP_API zend_bool _http_encoding_response_start(size_t content_length TSRMLS_DC)
494 {
495 if ( php_ob_handler_used("ob_gzhandler" TSRMLS_CC) ||
496 php_ob_handler_used("zlib output compression" TSRMLS_CC)) {
497 HTTP_G(send).gzip_encoding = 0;
498 } else {
499 if (!HTTP_G(send).gzip_encoding) {
500 /* emit a content-length header */
501 if (content_length) {
502 char cl_header_str[128];
503 size_t cl_header_len;
504 cl_header_len = snprintf(cl_header_str, lenof(cl_header_str), "Content-Length: %zu", content_length);
505 http_send_header_string_ex(cl_header_str, cl_header_len, 1);
506 }
507 } else {
508 #ifndef HTTP_HAVE_ZLIB
509 HTTP_G(send).gzip_encoding = 0;
510 php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC);
511 #else
512 HashTable *selected;
513 zval zsupported;
514
515 INIT_PZVAL(&zsupported);
516 array_init(&zsupported);
517 add_next_index_stringl(&zsupported, "gzip", lenof("gzip"), 1);
518 add_next_index_stringl(&zsupported, "deflate", lenof("deflate"), 1);
519
520 HTTP_G(send).gzip_encoding = 0;
521
522 if ((selected = http_negotiate_encoding(&zsupported))) {
523 STATUS hs = FAILURE;
524 char *encoding = NULL;
525 ulong idx;
526
527 if (HASH_KEY_IS_STRING == zend_hash_get_current_key(selected, &encoding, &idx, 0) && encoding) {
528 if (!strcmp(encoding, "gzip")) {
529 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: gzip"))) {
530 HTTP_G(send).gzip_encoding = HTTP_ENCODING_GZIP;
531 }
532 } else if (!strcmp(encoding, "deflate")) {
533 if (SUCCESS == (hs = http_send_header_string("Content-Encoding: deflate"))) {
534 HTTP_G(send).gzip_encoding = HTTP_ENCODING_DEFLATE;
535 }
536 }
537 if (SUCCESS == hs) {
538 http_send_header_string("Vary: Accept-Encoding");
539 }
540 }
541
542 zend_hash_destroy(selected);
543 FREE_HASHTABLE(selected);
544 }
545
546 zval_dtor(&zsupported);
547 return HTTP_G(send).gzip_encoding;
548 #endif
549 }
550 }
551 return 0;
552 }
553
554 /*
555 * Local variables:
556 * tab-width: 4
557 * c-basic-offset: 4
558 * End:
559 * vim600: noet sw=4 ts=4 fdm=marker
560 * vim<600: noet sw=4 ts=4
561 */
562