- allow negative time offsets
[m6w6/ext-http] / http_encoding_api.c
1 /*
2 +----------------------------------------------------------------------+
3 | PECL :: http |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.0 of the PHP license, that |
6 | is bundled with this package in the file LICENSE, and is available |
7 | through the world-wide-web at http://www.php.net/license/3_0.txt. |
8 | If you did not receive a copy of the PHP license and are unable to |
9 | obtain it through the world-wide-web, please send a note to |
10 | license@php.net so we can mail you a copy immediately. |
11 +----------------------------------------------------------------------+
12 | Copyright (c) 2004-2005 Michael Wallner <mike@php.net> |
13 +----------------------------------------------------------------------+
14 */
15
16 /* $Id$ */
17
18 #ifdef HAVE_CONFIG_H
19 # include "config.h"
20 #endif
21 #include "php.h"
22
23 #include "php_http_encoding_api.h"
24 #include "php_http.h"
25 #include "php_http_api.h"
26
27 ZEND_EXTERN_MODULE_GLOBALS(http);
28
29 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
30 PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC)
31 {
32 const char *e_ptr;
33 char *d_ptr;
34 long rest;
35
36 *decoded_len = 0;
37 *decoded = ecalloc(1, encoded_len);
38 d_ptr = *decoded;
39 e_ptr = encoded;
40
41 while ((rest = encoded + encoded_len - e_ptr) > 0) {
42 long chunk_len = 0;
43 int EOL_len = 0, eol_mismatch = 0;
44 char *n_ptr;
45
46 chunk_len = strtol(e_ptr, &n_ptr, 16);
47
48 /* check if:
49 * - we could not read in chunk size
50 * - we got a negative chunk size
51 * - chunk size is greater then remaining size
52 * - chunk size is not followed by (CR)LF|NUL
53 */
54 if ( (n_ptr == e_ptr) || (chunk_len < 0) || (chunk_len > rest) ||
55 (*n_ptr && (eol_mismatch = (n_ptr != http_locate_eol(e_ptr, &EOL_len))))) {
56 /* don't fail on apperently not encoded data */
57 if (e_ptr == encoded) {
58 memcpy(*decoded, encoded, encoded_len);
59 *decoded_len = encoded_len;
60 return encoded + encoded_len;
61 } else {
62 efree(*decoded);
63 if (eol_mismatch) {
64 if (EOL_len == 2) {
65 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid character (expected 0x0D 0x0A; got: 0x%X 0x%X)", *n_ptr, *(n_ptr + 1));
66 } else {
67 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid character (expected 0x0A; got: 0x%X)", *n_ptr);
68 }
69 } else {
70 char *error = estrndup(n_ptr, strcspn(n_ptr, "\r\n "));
71 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid chunk size: '%s' at pos %d", error, n_ptr - encoded);
72 efree(error);
73 }
74 return NULL;
75 }
76 } else {
77 e_ptr = n_ptr;
78 }
79
80 /* reached the end */
81 if (!chunk_len) {
82 break;
83 }
84
85 memcpy(d_ptr, e_ptr += EOL_len, chunk_len);
86 d_ptr += chunk_len;
87 e_ptr += chunk_len + EOL_len;
88 *decoded_len += chunk_len;
89 }
90
91 return e_ptr;
92 }
93 /* }}} */
94
95 #ifdef HTTP_HAVE_ZLIB
96 #include <zlib.h>
97
98 /* max count of uncompress trials, alloc_size <<= 2 for each try */
99 #define HTTP_GZMAXTRY 10
100 /* safe padding */
101 #define HTTP_GZSAFPAD 10
102 /* add 1% extra space in case we need to encode widely differing (binary) data */
103 #define HTTP_GZBUFLEN(l) (l + (l / 100) + HTTP_GZSAFPAD)
104
105 static const char http_gzencode_header[] = {
106 (const char) 0x1f, // fixed value
107 (const char) 0x8b, // fixed value
108 (const char) Z_DEFLATED, // compression algorithm
109 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
110 (const char) 0, // no MTIME available (4 bytes)
111 (const char) 0, // =*=
112 (const char) 0, // =*=
113 (const char) 0, // =*=
114 (const char) 0, // two possible flag values for 9 compression levels? o_O
115 (const char) 0x03 // assume *nix OS
116 };
117
118 inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr)
119 {
120 Z->zalloc = Z_NULL;
121 Z->zfree = Z_NULL;
122 Z->opaque = Z_NULL;
123
124 Z->next_in = (Bytef *) data;
125 Z->avail_in = data_len;
126 Z->avail_out = HTTP_GZBUFLEN(data_len) + HTTP_GZSAFPAD - 1;
127
128 *buf_ptr = emalloc(HTTP_GZBUFLEN(data_len) + sizeof(http_gzencode_header) + HTTP_GZSAFPAD);
129 memcpy(*buf_ptr, http_gzencode_header, sizeof(http_gzencode_header));
130
131 Z->next_out = *buf_ptr + sizeof(http_gzencode_header);
132 }
133
134 inline void http_init_deflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr)
135 {
136 Z->zalloc = Z_NULL;
137 Z->zfree = Z_NULL;
138 Z->opaque = Z_NULL;
139
140 Z->data_type = Z_UNKNOWN;
141 Z->next_in = (Bytef *) data;
142 Z->avail_in = data_len;
143 Z->avail_out = HTTP_GZBUFLEN(data_len) - 1;
144 Z->next_out = emalloc(HTTP_GZBUFLEN(data_len));
145
146 *buf_ptr = Z->next_out;
147 }
148
149 inline void http_init_uncompress_buffer(size_t data_len, char **buf_ptr, size_t *buf_len, int iteration)
150 {
151 if (!iteration) {
152 *buf_len = data_len * 2;
153 *buf_ptr = emalloc(*buf_len + 1);
154 } else {
155 *buf_len <<= 2;
156 *buf_ptr = erealloc(*buf_ptr, *buf_len + 1);
157 }
158 }
159
160 inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration)
161 {
162 Z->zalloc = Z_NULL;
163 Z->zfree = Z_NULL;
164
165 http_init_uncompress_buffer(data_len, buf_ptr, buf_len, iteration);
166
167 Z->next_in = (Bytef *) data;
168 Z->avail_in = data_len;
169 Z->avail_out = *buf_len;
170 Z->next_out = *buf_ptr;
171 }
172
173 inline size_t http_finish_buffer(size_t buf_len, char **buf_ptr)
174 {
175 (*buf_ptr)[buf_len] = '\0';
176 return buf_len;
177 }
178
179 inline size_t http_finish_gzencode_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr)
180 {
181 unsigned long crc;
182 char *trailer;
183
184 crc = crc32(0L, Z_NULL, 0);
185 crc = crc32(crc, (const Bytef *) data, data_len);
186
187 trailer = *buf_ptr + sizeof(http_gzencode_header) + Z->total_out;
188
189 /* LSB */
190 trailer[0] = (char) (crc & 0xFF);
191 trailer[1] = (char) ((crc >> 8) & 0xFF);
192 trailer[2] = (char) ((crc >> 16) & 0xFF);
193 trailer[3] = (char) ((crc >> 24) & 0xFF);
194 trailer[4] = (char) ((Z->total_in) & 0xFF);
195 trailer[5] = (char) ((Z->total_in >> 8) & 0xFF);
196 trailer[6] = (char) ((Z->total_in >> 16) & 0xFF);
197 trailer[7] = (char) ((Z->total_in >> 24) & 0xFF);
198
199 return http_finish_buffer(Z->total_out + sizeof(http_gzencode_header) + 8, buf_ptr);
200 }
201
202 inline STATUS http_verify_gzencode_buffer(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
203 {
204 size_t offset = sizeof(http_gzencode_header);
205
206 if (data_len < offset) {
207 goto really_bad_gzip_header;
208 }
209
210 if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
211 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
212 return FAILURE;
213 }
214
215 if (data[2] != (const char) Z_DEFLATED) {
216 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
217 /* still try to decode */
218 }
219 if ((data[3] & 0x4) == 0x4) {
220 if (data_len < offset + 2) {
221 goto really_bad_gzip_header;
222 }
223 /* there are extra fields, the length follows the common header as 2 bytes LSB */
224 offset += (unsigned) ((data[offset] & 0xFF));
225 offset += 1;
226 offset += (unsigned) ((data[offset] & 0xFF) << 8);
227 offset += 1;
228 }
229 if ((data[3] & 0x8) == 0x8) {
230 if (data_len <= offset) {
231 goto really_bad_gzip_header;
232 }
233 /* there's a file name */
234 offset += strlen(&data[offset]) + 1 /*NUL*/;
235 }
236 if ((data[3] & 0x10) == 0x10) {
237 if (data_len <= offset) {
238 goto really_bad_gzip_header;
239 }
240 /* there's a comment */
241 offset += strlen(&data[offset]) + 1 /* NUL */;
242 }
243 if ((data[3] & 0x2) == 0x2) {
244 /* there's a CRC16 of the header */
245 offset += 2;
246 if (data_len <= offset) {
247 goto really_bad_gzip_header;
248 } else {
249 unsigned long crc, cmp;
250
251 cmp = (unsigned) ((data[offset-2] & 0xFF));
252 cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
253
254 crc = crc32(0L, Z_NULL, 0);
255 crc = crc32(crc, data, sizeof(http_gzencode_header));
256
257 if (cmp != (crc & 0xFFFF)) {
258 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
259 return FAILURE;
260 }
261 }
262 }
263
264 if (data_len < offset + 8) {
265 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
266 return FAILURE;
267 }
268
269 if (encoded) {
270 *encoded = data + offset;
271 }
272 if (encoded_len) {
273 *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;
274 }
275
276 return SUCCESS;
277
278 really_bad_gzip_header:
279 http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
280 return FAILURE;
281 }
282
283 inline STATUS http_verify_gzdecode_buffer(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
284 {
285 STATUS status = SUCCESS;
286 unsigned long len, cmp, crc;
287
288 crc = crc32(0L, Z_NULL, 0);
289 crc = crc32(crc, (const Bytef *) decoded, decoded_len);
290
291 cmp = (unsigned) ((data[data_len-8] & 0xFF));
292 cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
293 cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
294 cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
295 len = (unsigned) ((data[data_len-4] & 0xFF));
296 len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
297 len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
298 len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
299
300 if (cmp != crc) {
301 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
302 status = FAILURE;
303 }
304 if (len != decoded_len) {
305 http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
306 status = FAILURE;
307 }
308 return status;
309 }
310
311 PHP_HTTP_API STATUS _http_encode(http_encoding_type type, int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
312 {
313 STATUS status = SUCCESS;
314
315 switch (type)
316 {
317 case HTTP_ENCODING_ANY:
318 case HTTP_ENCODING_GZIP:
319 status = http_encoding_gzencode(level, data, data_len, encoded, encoded_len);
320 break;
321
322 case HTTP_ENCODING_DEFLATE:
323 status = http_encoding_deflate(level, data, data_len, encoded, encoded_len);
324 break;
325
326 case HTTP_ENCODING_COMPRESS:
327 status = http_encoding_compress(level, data, data_len, encoded, encoded_len);
328 break;
329
330 case HTTP_ENCODING_NONE:
331 default:
332 *encoded = estrndup(data, data_len);
333 *encoded_len = data_len;
334 break;
335 }
336
337 return status;
338 }
339
340 PHP_HTTP_API STATUS _http_decode(http_encoding_type type, const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
341 {
342 STATUS status = SUCCESS;
343
344 switch (type)
345 {
346 case HTTP_ENCODING_ANY:
347 if ( SUCCESS != http_encoding_gzdecode(data, data_len, decoded, decoded_len) &&
348 SUCCESS != http_encoding_inflate(data, data_len, decoded, decoded_len) &&
349 SUCCESS != http_encoding_uncompress(data, data_len, decoded, decoded_len)) {
350 status = FAILURE;
351 }
352 break;
353
354 case HTTP_ENCODING_GZIP:
355 status = http_encoding_gzdecode(data, data_len, decoded, decoded_len);
356 break;
357
358 case HTTP_ENCODING_DEFLATE:
359 status = http_encoding_inflate(data, data_len, decoded, decoded_len);
360 break;
361
362 case HTTP_ENCODING_COMPRESS:
363 status = http_encoding_uncompress(data, data_len, decoded, decoded_len);
364 break;
365
366 case HTTP_ENCODING_NONE:
367 default:
368 *decoded = estrndup(data, data_len);
369 *decoded_len = data_len;
370 break;
371 }
372
373 return status;
374 }
375
376 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
377 {
378 z_stream Z;
379 STATUS status = Z_OK;
380
381 http_init_gzencode_buffer(&Z, data, data_len, encoded);
382
383 if ( (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) &&
384 (Z_STREAM_END == (status = deflate(&Z, Z_FINISH))) &&
385 (Z_OK == (status = deflateEnd(&Z)))) {
386 *encoded_len = http_finish_gzencode_buffer(&Z, data, data_len, encoded);
387 return SUCCESS;
388 }
389
390 efree(*encoded);
391 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not gzencode data: %s", zError(status));
392 return FAILURE;
393 }
394
395 PHP_HTTP_API STATUS _http_encoding_deflate(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
396 {
397 z_stream Z;
398 STATUS status = Z_OK;
399
400 http_init_deflate_buffer(&Z, data, data_len, encoded);
401
402 if ( (Z_OK == (status = deflateInit2(&Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) &&
403 (Z_STREAM_END == (status = deflate(&Z, Z_FINISH))) &&
404 (Z_OK == (status = deflateEnd(&Z)))) {
405 *encoded_len = http_finish_buffer(Z.total_out, encoded);
406 return SUCCESS;
407 }
408
409 efree(encoded);
410 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not deflate data: %s", zError(status));
411 return FAILURE;
412 }
413
414 PHP_HTTP_API STATUS _http_encoding_compress(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
415 {
416 STATUS status;
417
418 *encoded = emalloc(*encoded_len = HTTP_GZBUFLEN(data_len));
419
420 if (Z_OK == (status = compress2(*encoded, encoded_len, data, data_len, level))) {
421 http_finish_buffer(*encoded_len, encoded);
422 return SUCCESS;
423 }
424
425 efree(encoded);
426 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not compress data: %s", zError(status));
427 return FAILURE;
428 }
429
430 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
431 {
432 const char *encoded;
433 size_t encoded_len;
434
435 if ( (SUCCESS == http_verify_gzencode_buffer(data, data_len, &encoded, &encoded_len, HE_NOTICE)) &&
436 (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
437 http_verify_gzdecode_buffer(data, data_len, *decoded, *decoded_len, HE_NOTICE);
438 return SUCCESS;
439 }
440
441 return FAILURE;
442 }
443
444 PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
445 {
446 int max = 0;
447 STATUS status;
448 z_stream Z;
449
450 do {
451 http_init_inflate_buffer(&Z, data, data_len, decoded, decoded_len, max++);
452 if (Z_OK == (status = inflateInit2(&Z, -MAX_WBITS))) {
453 if (Z_STREAM_END == (status = inflate(&Z, Z_FINISH))) {
454 if (Z_OK == (status = inflateEnd(&Z))) {
455 *decoded_len = http_finish_buffer(Z.total_out, decoded);
456 return SUCCESS;
457 }
458 }
459 }
460 } while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR);
461
462 efree(*decoded);
463 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
464 return FAILURE;
465 }
466
467 PHP_HTTP_API STATUS _http_encoding_uncompress(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
468 {
469 int max = 0;
470 STATUS status;
471
472 do {
473 http_init_uncompress_buffer(data_len, decoded, decoded_len, max++);
474 if (Z_OK == (status = uncompress(*decoded, decoded_len, data, data_len))) {
475 http_finish_buffer(*decoded_len, decoded);
476 return SUCCESS;
477 }
478 } while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR);
479
480 efree(*decoded);
481 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not uncompress data: %s", zError(status));
482 return FAILURE;
483 }
484
485 #endif /* HTTP_HAVE_ZLIB */
486
487 /*
488 * Local variables:
489 * tab-width: 4
490 * c-basic-offset: 4
491 * End:
492 * vim600: noet sw=4 ts=4 fdm=marker
493 * vim<600: noet sw=4 ts=4
494 */
495