2 +--------------------------------------------------------------------+
4 +--------------------------------------------------------------------+
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the conditions mentioned |
7 | in the accompanying LICENSE file are met. |
8 +--------------------------------------------------------------------+
9 | Copyright (c) 2004-2005, Michael Wallner <mike@php.net> |
10 +--------------------------------------------------------------------+
20 #include "php_http_encoding_api.h"
22 #include "php_http_api.h"
25 # include "php_http_send_api.h"
26 # include "php_http_headers_api.h"
30 ZEND_EXTERN_MODULE_GLOBALS(http
);
32 static inline int eol_match(char **line
, int *EOL_len
)
36 while (0x20 == *ptr
) ++ptr
;
38 if (ptr
== http_locate_eol(*line
, EOL_len
)) {
46 /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */
47 PHP_HTTP_API
const char *_http_encoding_dechunk(const char *encoded
, size_t encoded_len
, char **decoded
, size_t *decoded_len TSRMLS_DC
)
54 *decoded
= ecalloc(1, encoded_len
);
58 while ((rest
= encoded
+ encoded_len
- e_ptr
) > 0) {
60 int EOL_len
= 0, eol_mismatch
= 0;
63 chunk_len
= strtol(e_ptr
, &n_ptr
, 16);
66 * - we could not read in chunk size
67 * - we got a negative chunk size
68 * - chunk size is greater then remaining size
69 * - chunk size is not followed by (CR)LF|NUL
71 if ( (n_ptr
== e_ptr
) || (chunk_len
< 0) || (chunk_len
> rest
) ||
72 (*n_ptr
&& (eol_mismatch
= !eol_match(&n_ptr
, &EOL_len
)))) {
73 /* don't fail on apperently not encoded data */
74 if (e_ptr
== encoded
) {
75 memcpy(*decoded
, encoded
, encoded_len
);
76 *decoded_len
= encoded_len
;
77 return encoded
+ encoded_len
;
82 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Invalid character (expected 0x0D 0x0A; got: 0x%02X 0x%02X)", *n_ptr
, *(n_ptr
+ 1));
84 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Invalid character (expected 0x0A; got: 0x%02X)", *n_ptr
);
87 char *error
= estrndup(e_ptr
, strcspn(n_ptr
, "\r\n "));
88 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Invalid chunk size: '%s' at pos %ld of %lu", error
, (long) (n_ptr
- encoded
), (unsigned long) encoded_len
);
102 memcpy(d_ptr
, e_ptr
+= EOL_len
, chunk_len
);
104 e_ptr
+= chunk_len
+ EOL_len
;
105 *decoded_len
+= chunk_len
;
112 #ifdef HTTP_HAVE_ZLIB
114 static const char http_encoding_gzip_header
[] = {
115 (const char) 0x1f, // fixed value
116 (const char) 0x8b, // fixed value
117 (const char) Z_DEFLATED
, // compression algorithm
118 (const char) 0, // none of the possible flags defined by the GZIP "RFC"
119 (const char) 0, // no MTIME available (4 bytes)
120 (const char) 0, // =*=
121 (const char) 0, // =*=
122 (const char) 0, // =*=
123 (const char) 0, // two possible flag values for 9 compression levels? o_O
124 (const char) 0x03 // assume *nix OS
127 inline void http_init_gzencode_buffer(z_stream
*Z
, const char *data
, size_t data_len
, char **buf_ptr
)
133 Z
->next_in
= (Bytef
*) data
;
134 Z
->avail_in
= data_len
;
135 Z
->avail_out
= HTTP_ENCODING_BUFLEN(data_len
) + HTTP_ENCODING_SAFPAD
- 1;
137 *buf_ptr
= emalloc(HTTP_ENCODING_BUFLEN(data_len
) + sizeof(http_encoding_gzip_header
) + HTTP_ENCODING_SAFPAD
);
138 memcpy(*buf_ptr
, http_encoding_gzip_header
, sizeof(http_encoding_gzip_header
));
140 Z
->next_out
= (Bytef
*) *buf_ptr
+ sizeof(http_encoding_gzip_header
);
143 inline void http_init_deflate_buffer(z_stream
*Z
, const char *data
, size_t data_len
, char **buf_ptr
)
149 Z
->data_type
= Z_UNKNOWN
;
150 Z
->next_in
= (Bytef
*) data
;
151 Z
->avail_in
= data_len
;
152 Z
->avail_out
= HTTP_ENCODING_BUFLEN(data_len
) - 1;
153 Z
->next_out
= emalloc(HTTP_ENCODING_BUFLEN(data_len
));
155 *buf_ptr
= (char *) Z
->next_out
;
158 inline void http_init_uncompress_buffer(size_t data_len
, char **buf_ptr
, size_t *buf_len
, int *iteration
)
161 *buf_len
= data_len
* 2;
162 *buf_ptr
= emalloc(*buf_len
+ 1);
164 size_t new_len
= *buf_len
<< 2;
165 char *new_ptr
= erealloc_recoverable(*buf_ptr
, new_len
+ 1);
171 *iteration
= INT_MAX
-1; /* avoid integer overflow on increment op */
176 inline void http_init_inflate_buffer(z_stream
*Z
, const char *data
, size_t data_len
, char **buf_ptr
, size_t *buf_len
, int *iteration
)
181 http_init_uncompress_buffer(data_len
, buf_ptr
, buf_len
, iteration
);
183 Z
->next_in
= (Bytef
*) data
;
184 Z
->avail_in
= data_len
;
185 Z
->avail_out
= *buf_len
;
186 Z
->next_out
= (Bytef
*) *buf_ptr
;
189 inline size_t http_finish_buffer(size_t buf_len
, char **buf_ptr
)
191 (*buf_ptr
)[buf_len
] = '\0';
195 inline size_t http_finish_gzencode_buffer(z_stream
*Z
, const char *data
, size_t data_len
, char **buf_ptr
)
200 crc
= crc32(0L, Z_NULL
, 0);
201 crc
= crc32(crc
, (const Bytef
*) data
, data_len
);
203 trailer
= *buf_ptr
+ sizeof(http_encoding_gzip_header
) + Z
->total_out
;
206 trailer
[0] = (char) (crc
& 0xFF);
207 trailer
[1] = (char) ((crc
>> 8) & 0xFF);
208 trailer
[2] = (char) ((crc
>> 16) & 0xFF);
209 trailer
[3] = (char) ((crc
>> 24) & 0xFF);
210 trailer
[4] = (char) ((Z
->total_in
) & 0xFF);
211 trailer
[5] = (char) ((Z
->total_in
>> 8) & 0xFF);
212 trailer
[6] = (char) ((Z
->total_in
>> 16) & 0xFF);
213 trailer
[7] = (char) ((Z
->total_in
>> 24) & 0xFF);
215 return http_finish_buffer(Z
->total_out
+ sizeof(http_encoding_gzip_header
) + 8, buf_ptr
);
218 inline STATUS
http_verify_gzencode_buffer(const char *data
, size_t data_len
, const char **encoded
, size_t *encoded_len
, int error_level TSRMLS_DC
)
220 size_t offset
= sizeof(http_encoding_gzip_header
);
222 if (data_len
< offset
) {
223 goto really_bad_gzip_header
;
226 if (data
[0] != (const char) 0x1F || data
[1] != (const char) 0x8B) {
227 http_error_ex(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data
[0], (int) (data
[1] & 0xFF));
231 if (data
[2] != (const char) Z_DEFLATED
) {
232 http_error_ex(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Unrecognized compression format (%d)", (int) (data
[2] & 0xFF));
233 /* still try to decode */
235 if ((data
[3] & 0x4) == 0x4) {
236 if (data_len
< offset
+ 2) {
237 goto really_bad_gzip_header
;
239 /* there are extra fields, the length follows the common header as 2 bytes LSB */
240 offset
+= (unsigned) ((data
[offset
] & 0xFF));
242 offset
+= (unsigned) ((data
[offset
] & 0xFF) << 8);
245 if ((data
[3] & 0x8) == 0x8) {
246 if (data_len
<= offset
) {
247 goto really_bad_gzip_header
;
249 /* there's a file name */
250 offset
+= strlen(&data
[offset
]) + 1 /*NUL*/;
252 if ((data
[3] & 0x10) == 0x10) {
253 if (data_len
<= offset
) {
254 goto really_bad_gzip_header
;
256 /* there's a comment */
257 offset
+= strlen(&data
[offset
]) + 1 /* NUL */;
259 if ((data
[3] & 0x2) == 0x2) {
260 /* there's a CRC16 of the header */
262 if (data_len
<= offset
) {
263 goto really_bad_gzip_header
;
265 unsigned long crc
, cmp
;
267 cmp
= (unsigned) ((data
[offset
-2] & 0xFF));
268 cmp
+= (unsigned) ((data
[offset
-1] & 0xFF) << 8);
270 crc
= crc32(0L, Z_NULL
, 0);
271 crc
= crc32(crc
, (const Bytef
*) data
, sizeof(http_encoding_gzip_header
));
273 if (cmp
!= (crc
& 0xFFFF)) {
274 http_error_ex(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp
, crc
& 0xFFFF);
280 if (data_len
< offset
+ 8) {
281 http_error(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Missing or truncated GZIP footer");
286 *encoded
= data
+ offset
;
289 *encoded_len
= data_len
- offset
- 8 /* size of the assumed GZIP footer */;
294 really_bad_gzip_header
:
295 http_error(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Missing or truncated GZIP header");
299 inline STATUS
http_verify_gzdecode_buffer(const char *data
, size_t data_len
, const char *decoded
, size_t decoded_len
, int error_level TSRMLS_DC
)
301 STATUS status
= SUCCESS
;
302 unsigned long len
, cmp
, crc
;
304 crc
= crc32(0L, Z_NULL
, 0);
305 crc
= crc32(crc
, (const Bytef
*) decoded
, decoded_len
);
307 cmp
= (unsigned) ((data
[data_len
-8] & 0xFF));
308 cmp
+= (unsigned) ((data
[data_len
-7] & 0xFF) << 8);
309 cmp
+= (unsigned) ((data
[data_len
-6] & 0xFF) << 16);
310 cmp
+= (unsigned) ((data
[data_len
-5] & 0xFF) << 24);
311 len
= (unsigned) ((data
[data_len
-4] & 0xFF));
312 len
+= (unsigned) ((data
[data_len
-3] & 0xFF) << 8);
313 len
+= (unsigned) ((data
[data_len
-2] & 0xFF) << 16);
314 len
+= (unsigned) ((data
[data_len
-1] & 0xFF) << 24);
317 http_error_ex(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp
, crc
);
320 if (len
!= decoded_len
) {
321 http_error_ex(error_level TSRMLS_CC
, HTTP_E_ENCODING
, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len
, decoded_len
);
327 PHP_HTTP_API STATUS
_http_encode(http_encoding_type type
, int level
, const char *data
, size_t data_len
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
329 STATUS status
= SUCCESS
;
333 case HTTP_ENCODING_ANY
:
334 case HTTP_ENCODING_GZIP
:
335 status
= http_encoding_gzencode(level
, data
, data_len
, encoded
, encoded_len
);
338 case HTTP_ENCODING_DEFLATE
:
339 status
= http_encoding_deflate(level
, data
, data_len
, encoded
, encoded_len
);
342 case HTTP_ENCODING_COMPRESS
:
343 status
= http_encoding_compress(level
, data
, data_len
, encoded
, encoded_len
);
346 case HTTP_ENCODING_NONE
:
348 *encoded
= estrndup(data
, data_len
);
349 *encoded_len
= data_len
;
356 PHP_HTTP_API STATUS
_http_decode(http_encoding_type type
, const char *data
, size_t data_len
, char **decoded
, size_t *decoded_len TSRMLS_DC
)
358 STATUS status
= SUCCESS
;
362 case HTTP_ENCODING_ANY
:
363 if ( SUCCESS
!= http_encoding_gzdecode(data
, data_len
, decoded
, decoded_len
) &&
364 SUCCESS
!= http_encoding_inflate(data
, data_len
, decoded
, decoded_len
) &&
365 SUCCESS
!= http_encoding_uncompress(data
, data_len
, decoded
, decoded_len
)) {
370 case HTTP_ENCODING_GZIP
:
371 status
= http_encoding_gzdecode(data
, data_len
, decoded
, decoded_len
);
374 case HTTP_ENCODING_DEFLATE
:
375 status
= http_encoding_inflate(data
, data_len
, decoded
, decoded_len
);
378 case HTTP_ENCODING_COMPRESS
:
379 status
= http_encoding_uncompress(data
, data_len
, decoded
, decoded_len
);
382 case HTTP_ENCODING_NONE
:
384 *decoded
= estrndup(data
, data_len
);
385 *decoded_len
= data_len
;
392 PHP_HTTP_API STATUS
_http_encoding_gzencode(int level
, const char *data
, size_t data_len
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
395 STATUS status
= Z_OK
;
397 http_init_gzencode_buffer(&Z
, data
, data_len
, encoded
);
399 if ( (Z_OK
== (status
= deflateInit2(&Z
, level
, Z_DEFLATED
, -MAX_WBITS
, MAX_MEM_LEVEL
, Z_DEFAULT_STRATEGY
))) &&
400 (Z_STREAM_END
== (status
= deflate(&Z
, Z_FINISH
))) &&
401 (Z_OK
== (status
= deflateEnd(&Z
)))) {
402 *encoded_len
= http_finish_gzencode_buffer(&Z
, data
, data_len
, encoded
);
407 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Could not gzencode data: %s", zError(status
));
411 PHP_HTTP_API STATUS
_http_encoding_deflate(int level
, const char *data
, size_t data_len
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
414 STATUS status
= Z_OK
;
416 http_init_deflate_buffer(&Z
, data
, data_len
, encoded
);
418 if ( (Z_OK
== (status
= deflateInit2(&Z
, level
, Z_DEFLATED
, -MAX_WBITS
, MAX_MEM_LEVEL
, Z_DEFAULT_STRATEGY
))) &&
419 (Z_STREAM_END
== (status
= deflate(&Z
, Z_FINISH
))) &&
420 (Z_OK
== (status
= deflateEnd(&Z
)))) {
421 *encoded_len
= http_finish_buffer(Z
.total_out
, encoded
);
426 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Could not deflate data: %s", zError(status
));
430 PHP_HTTP_API STATUS
_http_encoding_compress(int level
, const char *data
, size_t data_len
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
434 *encoded
= emalloc(*encoded_len
= HTTP_ENCODING_BUFLEN(data_len
));
436 if (Z_OK
== (status
= compress2((Bytef
*) *encoded
, (uLongf
*) encoded_len
, (const Bytef
*) data
, data_len
, level
))) {
437 http_finish_buffer(*encoded_len
, encoded
);
442 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Could not compress data: %s", zError(status
));
446 PHP_HTTP_API STATUS
_http_encoding_gzdecode(const char *data
, size_t data_len
, char **decoded
, size_t *decoded_len TSRMLS_DC
)
451 if ( (SUCCESS
== http_verify_gzencode_buffer(data
, data_len
, &encoded
, &encoded_len
, HE_NOTICE
)) &&
452 (SUCCESS
== http_encoding_inflate(encoded
, encoded_len
, decoded
, decoded_len
))) {
453 http_verify_gzdecode_buffer(data
, data_len
, *decoded
, *decoded_len
, HE_NOTICE
);
460 PHP_HTTP_API STATUS
_http_encoding_inflate(const char *data
, size_t data_len
, char **decoded
, size_t *decoded_len TSRMLS_DC
)
467 http_init_inflate_buffer(&Z
, data
, data_len
, decoded
, decoded_len
, &max
);
468 if (Z_OK
== (status
= inflateInit2(&Z
, -MAX_WBITS
))) {
469 if (Z_STREAM_END
== (status
= inflate(&Z
, Z_FINISH
))) {
470 if (Z_OK
== (status
= inflateEnd(&Z
))) {
471 *decoded_len
= http_finish_buffer(Z
.total_out
, decoded
);
476 } while (++max
< HTTP_ENCODING_MAXTRY
&& status
== Z_BUF_ERROR
);
479 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Could not inflate data: %s", zError(status
));
483 PHP_HTTP_API STATUS
_http_encoding_uncompress(const char *data
, size_t data_len
, char **decoded
, size_t *decoded_len TSRMLS_DC
)
489 http_init_uncompress_buffer(data_len
, decoded
, decoded_len
, &max
);
490 if (Z_OK
== (status
= uncompress((Bytef
*) *decoded
, (uLongf
*) decoded_len
, (const Bytef
*) data
, data_len
))) {
491 http_finish_buffer(*decoded_len
, decoded
);
494 } while (++max
< HTTP_ENCODING_MAXTRY
&& status
== Z_BUF_ERROR
);
497 http_error_ex(HE_WARNING
, HTTP_E_ENCODING
, "Could not uncompress data: %s", zError(status
));
501 #define HTTP_ENCODING_STREAM_ERROR(status, tofree) \
503 if (tofree) efree(tofree); \
504 http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \
508 PHP_HTTP_API STATUS
_http_encoding_stream_init(http_encoding_stream
*s
, int gzip
, int level
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
512 memset(s
, 0, sizeof(http_encoding_stream
));
513 if (Z_OK
!= (status
= deflateInit2(&s
->Z
, level
, Z_DEFLATED
, -MAX_WBITS
, MAX_MEM_LEVEL
, Z_DEFAULT_STRATEGY
))) {
514 HTTP_ENCODING_STREAM_ERROR(status
, NULL
);
517 if (s
->gzip
= gzip
) {
518 s
->crc
= crc32(0L, Z_NULL
, 0);
519 *encoded_len
= sizeof(http_encoding_gzip_header
);
520 *encoded
= emalloc(*encoded_len
);
521 memcpy(*encoded
, http_encoding_gzip_header
, *encoded_len
);
530 PHP_HTTP_API STATUS
_http_encoding_stream_update(http_encoding_stream
*s
, const char *data
, size_t data_len
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
534 *encoded_len
= HTTP_ENCODING_BUFLEN(data_len
);
535 *encoded
= emalloc(*encoded_len
);
537 s
->Z
.next_in
= (Bytef
*) data
;
538 s
->Z
.avail_in
= data_len
;
539 s
->Z
.next_out
= (Bytef
*) *encoded
;
540 s
->Z
.avail_out
= *encoded_len
;
542 status
= deflate(&s
->Z
, Z_SYNC_FLUSH
);
544 if (Z_OK
!= status
&& Z_STREAM_END
!= status
) {
545 HTTP_ENCODING_STREAM_ERROR(status
, *encoded
);
547 *encoded_len
-= s
->Z
.avail_out
;
550 s
->crc
= crc32(s
->crc
, (const Bytef
*) data
, data_len
);
556 PHP_HTTP_API STATUS
_http_encoding_stream_finish(http_encoding_stream
*s
, char **encoded
, size_t *encoded_len TSRMLS_DC
)
561 *encoded
= emalloc(*encoded_len
);
563 s
->Z
.next_out
= (Bytef
*) *encoded
;
564 s
->Z
.avail_out
= *encoded_len
;
566 if (Z_STREAM_END
!= (status
= deflate(&s
->Z
, Z_FINISH
)) || Z_OK
!= (status
= deflateEnd(&s
->Z
))) {
567 HTTP_ENCODING_STREAM_ERROR(status
, *encoded
);
570 *encoded_len
-= s
->Z
.avail_out
;
572 if (s
->Z
.avail_out
< 8) {
573 *encoded
= erealloc(*encoded
, *encoded_len
+ 8);
575 (*encoded
)[(*encoded_len
)++] = (char) (s
->crc
& 0xFF);
576 (*encoded
)[(*encoded_len
)++] = (char) ((s
->crc
>> 8) & 0xFF);
577 (*encoded
)[(*encoded_len
)++] = (char) ((s
->crc
>> 16) & 0xFF);
578 (*encoded
)[(*encoded_len
)++] = (char) ((s
->crc
>> 24) & 0xFF);
579 (*encoded
)[(*encoded_len
)++] = (char) ((s
->Z
.total_in
) & 0xFF);
580 (*encoded
)[(*encoded_len
)++] = (char) ((s
->Z
.total_in
>> 8) & 0xFF);
581 (*encoded
)[(*encoded_len
)++] = (char) ((s
->Z
.total_in
>> 16) & 0xFF);
582 (*encoded
)[(*encoded_len
)++] = (char) ((s
->Z
.total_in
>> 24) & 0xFF);
588 #endif /* HTTP_HAVE_ZLIB */
590 PHP_HTTP_API zend_bool
_http_encoding_response_start(size_t content_length TSRMLS_DC
)
592 if ( php_ob_handler_used("ob_gzhandler" TSRMLS_CC
) ||
593 php_ob_handler_used("zlib output compression" TSRMLS_CC
)) {
594 HTTP_G(send
).gzip_encoding
= 0;
596 if (!HTTP_G(send
).gzip_encoding
) {
597 /* emit a content-length header */
598 if (content_length
) {
599 char cl_header_str
[128];
600 size_t cl_header_len
;
601 cl_header_len
= snprintf(cl_header_str
, lenof(cl_header_str
), "Content-Length: %lu", (unsigned long) content_length
);
602 http_send_header_string_ex(cl_header_str
, cl_header_len
, 1);
605 #ifndef HTTP_HAVE_ZLIB
606 HTTP_G(send
).gzip_encoding
= 0;
607 php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC
);
612 INIT_PZVAL(&zsupported
);
613 array_init(&zsupported
);
614 add_next_index_stringl(&zsupported
, "gzip", lenof("gzip"), 1);
615 add_next_index_stringl(&zsupported
, "deflate", lenof("deflate"), 1);
617 HTTP_G(send
).gzip_encoding
= 0;
619 if (selected
= http_negotiate_encoding(&zsupported
)) {
621 char *encoding
= NULL
;
624 if (HASH_KEY_IS_STRING
== zend_hash_get_current_key(selected
, &encoding
, &idx
, 0) && encoding
) {
625 if (!strcmp(encoding
, "gzip")) {
626 if (SUCCESS
== (hs
= http_send_header_string("Content-Encoding: gzip"))) {
627 HTTP_G(send
).gzip_encoding
= HTTP_ENCODING_GZIP
;
629 } else if (!strcmp(encoding
, "deflate")) {
630 if (SUCCESS
== (hs
= http_send_header_string("Content-Encoding: deflate"))) {
631 HTTP_G(send
).gzip_encoding
= HTTP_ENCODING_DEFLATE
;
635 http_send_header_string("Vary: Accept-Encoding");
639 zend_hash_destroy(selected
);
640 FREE_HASHTABLE(selected
);
643 zval_dtor(&zsupported
);
644 return HTTP_G(send
).gzip_encoding
;
656 * vim600: noet sw=4 ts=4 fdm=marker
657 * vim<600: noet sw=4 ts=4