X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=http_encoding_api.c;h=cba2d2547a595721790007a3d6cfc47610debff6;hp=f0328cdabfd4832978b26d0afd83967ba8c561f0;hb=5c5ddf9042732a05100245844fe2fb70bfe6d495;hpb=6541b0935ad5edc8e45b4a99c5e3f67812489bca diff --git a/http_encoding_api.c b/http_encoding_api.c index f0328cd..cba2d25 100644 --- a/http_encoding_api.c +++ b/http_encoding_api.c @@ -24,6 +24,12 @@ #include "php_http.h" #include "php_http_api.h" +#ifdef HTTP_HAVE_ZLIB +# include "php_http_send_api.h" +# include "php_http_headers_api.h" +# include +#endif + ZEND_EXTERN_MODULE_GLOBALS(http); /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */ @@ -93,21 +99,18 @@ PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t enco /* }}} */ #ifdef HTTP_HAVE_ZLIB -#include - -/* max count of uncompress trials, alloc_size <<= 2 for each try */ -#define HTTP_GZMAXTRY 10 -/* safe padding */ -#define HTTP_GZSAFPAD 10 -/* add 1% extra space in case we need to encode widely differing (binary) data */ -#define HTTP_GZBUFLEN(l) (l + (l / 100) + HTTP_GZSAFPAD) - -static const char http_gzencode_header[] = { - (const char) 0x1f, - (const char) 0x8b, - (const char) Z_DEFLATED, - 0, 0, 0, 0, 0, 0, - (const char) 0x03 + +static const char http_encoding_gzip_header[] = { + (const char) 0x1f, // fixed value + (const char) 0x8b, // fixed value + (const char) Z_DEFLATED, // compression algorithm + (const char) 0, // none of the possible flags defined by the GZIP "RFC" + (const char) 0, // no MTIME available (4 bytes) + (const char) 0, // =*= + (const char) 0, // =*= + (const char) 0, // =*= + (const char) 0, // two possible flag values for 9 compression levels? o_O + (const char) 0x03 // assume *nix OS }; inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr) @@ -118,12 +121,12 @@ inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data Z->next_in = (Bytef *) data; Z->avail_in = data_len; - Z->avail_out = HTTP_GZBUFLEN(data_len) + HTTP_GZSAFPAD - 1; + Z->avail_out = HTTP_ENCODING_BUFLEN(data_len) + HTTP_ENCODING_SAFPAD - 1; - *buf_ptr = emalloc(HTTP_GZBUFLEN(data_len) + sizeof(http_gzencode_header)); - memcpy(*buf_ptr, http_gzencode_header, sizeof(http_gzencode_header)); + *buf_ptr = emalloc(HTTP_ENCODING_BUFLEN(data_len) + sizeof(http_encoding_gzip_header) + HTTP_ENCODING_SAFPAD); + memcpy(*buf_ptr, http_encoding_gzip_header, sizeof(http_encoding_gzip_header)); - Z->next_out = *buf_ptr + sizeof(http_gzencode_header); + Z->next_out = *buf_ptr + sizeof(http_encoding_gzip_header); } inline void http_init_deflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr) @@ -135,24 +138,36 @@ inline void http_init_deflate_buffer(z_stream *Z, const char *data, size_t data_ Z->data_type = Z_UNKNOWN; Z->next_in = (Bytef *) data; Z->avail_in = data_len; - Z->avail_out = HTTP_GZBUFLEN(data_len) - 1; - Z->next_out = emalloc(HTTP_GZBUFLEN(data_len)); + Z->avail_out = HTTP_ENCODING_BUFLEN(data_len) - 1; + Z->next_out = emalloc(HTTP_ENCODING_BUFLEN(data_len)); *buf_ptr = Z->next_out; } -inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration) +inline void http_init_uncompress_buffer(size_t data_len, char **buf_ptr, size_t *buf_len, int *iteration) { - Z->zalloc = Z_NULL; - Z->zfree = Z_NULL; - - if (!iteration) { + if (!*iteration) { *buf_len = data_len * 2; *buf_ptr = emalloc(*buf_len + 1); } else { - *buf_len <<= 2; - *buf_ptr = erealloc(*buf_ptr, *buf_len + 1); + size_t new_len = *buf_len << 2; + char *new_ptr = erealloc(*buf_ptr, new_len + 1); + + if (new_ptr) { + *buf_ptr = new_ptr; + *buf_len = new_len; + } else { + *iteration = INT_MAX; + } } +} + +inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int *iteration) +{ + Z->zalloc = Z_NULL; + Z->zfree = Z_NULL; + + http_init_uncompress_buffer(data_len, buf_ptr, buf_len, iteration); Z->next_in = (Bytef *) data; Z->avail_in = data_len; @@ -174,7 +189,7 @@ inline size_t http_finish_gzencode_buffer(z_stream *Z, const char *data, size_t crc = crc32(0L, Z_NULL, 0); crc = crc32(crc, (const Bytef *) data, data_len); - trailer = *buf_ptr + sizeof(http_gzencode_header) + Z->total_out; + trailer = *buf_ptr + sizeof(http_encoding_gzip_header) + Z->total_out; /* LSB */ trailer[0] = (char) (crc & 0xFF); @@ -186,9 +201,182 @@ inline size_t http_finish_gzencode_buffer(z_stream *Z, const char *data, size_t trailer[6] = (char) ((Z->total_in >> 16) & 0xFF); trailer[7] = (char) ((Z->total_in >> 24) & 0xFF); - return http_finish_buffer(Z->total_out + sizeof(http_gzencode_header) + 8, buf_ptr); + return http_finish_buffer(Z->total_out + sizeof(http_encoding_gzip_header) + 8, buf_ptr); +} + +inline STATUS http_verify_gzencode_buffer(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC) +{ + size_t offset = sizeof(http_encoding_gzip_header); + + if (data_len < offset) { + goto really_bad_gzip_header; + } + + if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF)); + return FAILURE; + } + + if (data[2] != (const char) Z_DEFLATED) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF)); + /* still try to decode */ + } + if ((data[3] & 0x4) == 0x4) { + if (data_len < offset + 2) { + goto really_bad_gzip_header; + } + /* there are extra fields, the length follows the common header as 2 bytes LSB */ + offset += (unsigned) ((data[offset] & 0xFF)); + offset += 1; + offset += (unsigned) ((data[offset] & 0xFF) << 8); + offset += 1; + } + if ((data[3] & 0x8) == 0x8) { + if (data_len <= offset) { + goto really_bad_gzip_header; + } + /* there's a file name */ + offset += strlen(&data[offset]) + 1 /*NUL*/; + } + if ((data[3] & 0x10) == 0x10) { + if (data_len <= offset) { + goto really_bad_gzip_header; + } + /* there's a comment */ + offset += strlen(&data[offset]) + 1 /* NUL */; + } + if ((data[3] & 0x2) == 0x2) { + /* there's a CRC16 of the header */ + offset += 2; + if (data_len <= offset) { + goto really_bad_gzip_header; + } else { + unsigned long crc, cmp; + + cmp = (unsigned) ((data[offset-2] & 0xFF)); + cmp += (unsigned) ((data[offset-1] & 0xFF) << 8); + + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, data, sizeof(http_encoding_gzip_header)); + + if (cmp != (crc & 0xFFFF)) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF); + return FAILURE; + } + } + } + + if (data_len < offset + 8) { + http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer"); + return FAILURE; + } + + if (encoded) { + *encoded = data + offset; + } + if (encoded_len) { + *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */; + } + + return SUCCESS; + +really_bad_gzip_header: + http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header"); + return FAILURE; } +inline STATUS http_verify_gzdecode_buffer(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC) +{ + STATUS status = SUCCESS; + unsigned long len, cmp, crc; + + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, (const Bytef *) decoded, decoded_len); + + cmp = (unsigned) ((data[data_len-8] & 0xFF)); + cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8); + cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16); + cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24); + len = (unsigned) ((data[data_len-4] & 0xFF)); + len += (unsigned) ((data[data_len-3] & 0xFF) << 8); + len += (unsigned) ((data[data_len-2] & 0xFF) << 16); + len += (unsigned) ((data[data_len-1] & 0xFF) << 24); + + if (cmp != crc) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc); + status = FAILURE; + } + if (len != decoded_len) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len); + status = FAILURE; + } + return status; +} + +PHP_HTTP_API STATUS _http_encode(http_encoding_type type, int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC) +{ + STATUS status = SUCCESS; + + switch (type) + { + case HTTP_ENCODING_ANY: + case HTTP_ENCODING_GZIP: + status = http_encoding_gzencode(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_DEFLATE: + status = http_encoding_deflate(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_COMPRESS: + status = http_encoding_compress(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_NONE: + default: + *encoded = estrndup(data, data_len); + *encoded_len = data_len; + break; + } + + return status; +} + +PHP_HTTP_API STATUS _http_decode(http_encoding_type type, const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC) +{ + STATUS status = SUCCESS; + + switch (type) + { + case HTTP_ENCODING_ANY: + if ( SUCCESS != http_encoding_gzdecode(data, data_len, decoded, decoded_len) && + SUCCESS != http_encoding_inflate(data, data_len, decoded, decoded_len) && + SUCCESS != http_encoding_uncompress(data, data_len, decoded, decoded_len)) { + status = FAILURE; + } + break; + + case HTTP_ENCODING_GZIP: + status = http_encoding_gzdecode(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_DEFLATE: + status = http_encoding_inflate(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_COMPRESS: + status = http_encoding_uncompress(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_NONE: + default: + *decoded = estrndup(data, data_len); + *decoded_len = data_len; + break; + } + + return status; +} PHP_HTTP_API STATUS _http_encoding_gzencode(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC) { @@ -232,7 +420,7 @@ PHP_HTTP_API STATUS _http_encoding_compress(int level, const char *data, size_t { STATUS status; - *encoded = emalloc(*encoded_len = HTTP_GZBUFLEN(data_len)); + *encoded = emalloc(*encoded_len = HTTP_ENCODING_BUFLEN(data_len)); if (Z_OK == (status = compress2(*encoded, encoded_len, data, data_len, level))) { http_finish_buffer(*encoded_len, encoded); @@ -246,38 +434,15 @@ PHP_HTTP_API STATUS _http_encoding_compress(int level, const char *data, size_t PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC) { - const char *encoded = data + sizeof(http_gzencode_header); + const char *encoded; size_t encoded_len; - if (data_len <= sizeof(http_gzencode_header) + 8) { - http_error(HE_WARNING, HTTP_E_ENCODING, "Could not gzdecode data: too short data length"); - } else { - encoded_len = data_len - sizeof(http_gzencode_header) - 8; - - if (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len)) { - unsigned long len = 0, cmp = 0, crc = crc32(0L, Z_NULL, 0); - - crc = crc32(crc, (const Bytef *) *decoded, *decoded_len); - - cmp = (unsigned) ((data[data_len-8] & 0xFF)); - cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8); - cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16); - cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24); - len = (unsigned) ((data[data_len-4] & 0xFF)); - len += (unsigned) ((data[data_len-3] & 0xFF) << 8); - len += (unsigned) ((data[data_len-2] & 0xFF) << 16); - len += (unsigned) ((data[data_len-1] & 0xFF) << 24); - - if (cmp != crc) { - http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc); - } - if (len != *decoded_len) { - http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, *decoded_len); - } - - return SUCCESS; - } + if ( (SUCCESS == http_verify_gzencode_buffer(data, data_len, &encoded, &encoded_len, HE_NOTICE)) && + (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) { + http_verify_gzdecode_buffer(data, data_len, *decoded, *decoded_len, HE_NOTICE); + return SUCCESS; } + return FAILURE; } @@ -288,7 +453,7 @@ PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, ch z_stream Z; do { - http_init_inflate_buffer(&Z, data, data_len, decoded, decoded_len, max++); + http_init_inflate_buffer(&Z, data, data_len, decoded, decoded_len, &max); if (Z_OK == (status = inflateInit2(&Z, -MAX_WBITS))) { if (Z_STREAM_END == (status = inflate(&Z, Z_FINISH))) { if (Z_OK == (status = inflateEnd(&Z))) { @@ -297,8 +462,9 @@ PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, ch } } } - } while (max < HTTP_GZMAXTRY); + } while (++max < HTTP_ENCODING_MAXTRY && status == Z_BUF_ERROR); + efree(*decoded); http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status)); return FAILURE; } @@ -307,29 +473,170 @@ PHP_HTTP_API STATUS _http_encoding_uncompress(const char *data, size_t data_len, { int max = 0; STATUS status; - size_t want = data_len * 2; - - *decoded = emalloc(want + 1); - if (Z_BUF_ERROR == (status = uncompress(*decoded, &want, data, data_len))) do { - /* this is a lot faster with large data than gzuncompress(), - but could be a problem with a low memory limit */ - want <<= 2; - *decoded = erealloc(*decoded, want + 1); - status = uncompress(*decoded, &want, data, data_len); - } while (++max < HTTP_GZMAXTRY && status == Z_BUF_ERROR); - - if (Z_OK == status) { - *decoded_len = http_finish_buffer(want, decoded); - return SUCCESS; - } + + do { + http_init_uncompress_buffer(data_len, decoded, decoded_len, &max); + if (Z_OK == (status = uncompress(*decoded, decoded_len, data, data_len))) { + http_finish_buffer(*decoded_len, decoded); + return SUCCESS; + } + } while (++max < HTTP_ENCODING_MAXTRY && status == Z_BUF_ERROR); efree(*decoded); http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not uncompress data: %s", zError(status)); return FAILURE; } +#define HTTP_ENCODING_STREAM_ERROR(status, tofree) \ + { \ + if (tofree) efree(tofree); \ + http_error_ex(HE_WARNING, HTTP_E_ENCODING, "GZIP stream error: %s", zError(status)); \ + return FAILURE; \ + } + +PHP_HTTP_API STATUS _http_encoding_stream_init(http_encoding_stream *s, int gzip, int level, char **encoded, size_t *encoded_len TSRMLS_DC) +{ + STATUS status; + + memset(s, 0, sizeof(http_encoding_stream)); + if (Z_OK != (status = deflateInit2(&s->Z, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY))) { + HTTP_ENCODING_STREAM_ERROR(status, NULL); + } + + if (s->gzip = gzip) { + s->crc = crc32(0L, Z_NULL, 0); + *encoded_len = sizeof(http_encoding_gzip_header); + *encoded = emalloc(*encoded_len); + memcpy(*encoded, http_encoding_gzip_header, *encoded_len); + } else { + *encoded_len = 0; + *encoded = NULL; + } + + return SUCCESS; +} + +PHP_HTTP_API STATUS _http_encoding_stream_update(http_encoding_stream *s, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC) +{ + STATUS status; + + *encoded_len = HTTP_ENCODING_BUFLEN(data_len); + *encoded = emalloc(*encoded_len); + + s->Z.next_in = (Bytef *) data; + s->Z.avail_in = data_len; + s->Z.next_out = *encoded; + s->Z.avail_out = *encoded_len; + + status = deflate(&s->Z, Z_SYNC_FLUSH); + + if (Z_OK != status && Z_STREAM_END != status) { + HTTP_GZSTREAM_ERROR(status, *encoded); + } + *encoded_len -= s->Z.avail_out; + + if (s->gzip) { + s->crc = crc32(s->crc, (const Bytef *) data, data_len); + } + + return SUCCESS; +} + +PHP_HTTP_API STATUS _http_encoding_stream_finish(http_encoding_stream *s, char **encoded, size_t *encoded_len TSRMLS_DC) +{ + STATUS status; + + *encoded_len = 1024; + *encoded = emalloc(*encoded_len); + + s->Z.next_out = *encoded; + s->Z.avail_out = *encoded_len; + + if (Z_STREAM_END != (status = deflate(&s->Z, Z_FINISH)) || Z_OK != (status = deflateEnd(&s->Z))) { + HTTP_ENCODING_STREAM_ERROR(status, *encoded); + } + + fprintf(stderr, "Needed %d bytes\n", *encoded_len - s->Z.avail_out); + + *encoded_len -= s->Z.avail_out; + if (s->gzip) { + if (s->Z.avail_out < 8) { + *encoded = erealloc(*encoded, *encoded_len + 8); + } + (*encoded)[(*encoded_len)++] = (char) (s->crc & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 8) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 16) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->crc >> 24) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 8) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 16) & 0xFF); + (*encoded)[(*encoded_len)++] = (char) ((s->Z.total_in >> 24) & 0xFF); + } + + return SUCCESS; +} + #endif /* HTTP_HAVE_ZLIB */ +PHP_HTTP_API zend_bool _http_encoding_response_start(size_t content_length TSRMLS_DC) +{ + if (php_ob_handler_used("ob_gzhandler" TSRMLS_DC)||php_ob_handler_used("zlib output compression" TSRMLS_DC)) { + HTTP_G(send).gzip_encoding = 0; + } else { + if (!HTTP_G(send).gzip_encoding) { + /* emit a content-length header */ + if (content_length) { + char *cl; + spprintf(&cl, 0, "Content-Length: %lu", (unsigned long) content_length); + http_send_header_string(cl); + efree(cl); + } + } else { +#ifndef HTTP_HAVE_ZLIB + php_start_ob_buffer_named("ob_gzhandler", 0, 0 TSRMLS_CC); +#else + HashTable *selected; + zval zsupported; + + INIT_PZVAL(&zsupported); + array_init(&zsupported); + add_next_index_stringl(&zsupported, "gzip", lenof("gzip"), 1); + add_next_index_stringl(&zsupported, "deflate", lenof("deflate"), 1); + + if (selected = http_negotiate_encoding(&zsupported)) { + STATUS hs = FAILURE; + char *encoding = NULL; + ulong idx; + + if (HASH_KEY_IS_STRING == zend_hash_get_current_key(selected, &encoding, &idx, 0) && encoding) { + if (!strcmp(encoding, "gzip")) { + if (SUCCESS == (hs = http_send_header_string("Content-Encoding: gzip"))) { + HTTP_G(send).gzip_encoding = HTTP_ENCODING_GZIP; + } + } else if (!strcmp(encoding, "deflate")) { + if (SUCCESS == (hs = http_send_header_string("Content-Encoding: deflate"))) { + HTTP_G(send).gzip_encoding = HTTP_ENCODING_DEFLATE; + } + } + if (SUCCESS == hs) { + http_send_header_string("Vary: Accept-Encoding"); + } else { + HTTP_G(send).gzip_encoding = 0; + } + } + + zend_hash_destroy(selected); + FREE_HASHTABLE(selected); + } + + zval_dtor(&zsupported); + return 1; +#endif + } + } + return 0; +} + /* * Local variables: * tab-width: 4