X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=http_encoding_api.c;h=28ea4232d2627dcf2d5d5e61e51f6ec1ac0eafe1;hp=a1893df44395e0b2de4472ce6a3e05f4ed030406;hb=61e3ea78efcc501cf7ec2df4749aa92935c0964e;hpb=a6b4fe496b44ab45fbc84d0b491ce7322e7532f7 diff --git a/http_encoding_api.c b/http_encoding_api.c index a1893df..28ea423 100644 --- a/http_encoding_api.c +++ b/http_encoding_api.c @@ -95,15 +95,24 @@ PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t enco #ifdef HTTP_HAVE_ZLIB #include +/* max count of uncompress trials, alloc_size <<= 2 for each try */ #define HTTP_GZMAXTRY 10 -#define HTTP_GZBUFLEN(l) (l + (l / 1000) + 16 + 1) +/* safe padding */ +#define HTTP_GZSAFPAD 10 +/* add 1% extra space in case we need to encode widely differing (binary) data */ +#define HTTP_GZBUFLEN(l) (l + (l / 100) + HTTP_GZSAFPAD) static const char http_gzencode_header[] = { - (const char) 0x1f, - (const char) 0x8b, - (const char) Z_DEFLATED, - 0, 0, 0, 0, 0, 0, - (const char) 0x03 + (const char) 0x1f, // fixed value + (const char) 0x8b, // fixed value + (const char) Z_DEFLATED, // compression algorithm + (const char) 0, // none of the possible flags defined by the GZIP "RFC" + (const char) 0, // no MTIME available (4 bytes) + (const char) 0, // =*= + (const char) 0, // =*= + (const char) 0, // =*= + (const char) 0, // two possible flag values for 9 compression levels? o_O + (const char) 0x03 // assume *nix OS }; inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr) @@ -114,9 +123,9 @@ inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data Z->next_in = (Bytef *) data; Z->avail_in = data_len; - Z->avail_out = HTTP_GZBUFLEN(data_len) - 1; + Z->avail_out = HTTP_GZBUFLEN(data_len) + HTTP_GZSAFPAD - 1; - *buf_ptr = emalloc(Z->avail_out + sizeof(http_gzencode_header)); + *buf_ptr = emalloc(HTTP_GZBUFLEN(data_len) + sizeof(http_gzencode_header) + HTTP_GZSAFPAD); memcpy(*buf_ptr, http_gzencode_header, sizeof(http_gzencode_header)); Z->next_out = *buf_ptr + sizeof(http_gzencode_header); @@ -128,20 +137,17 @@ inline void http_init_deflate_buffer(z_stream *Z, const char *data, size_t data_ Z->zfree = Z_NULL; Z->opaque = Z_NULL; - Z->data_type = Z_ASCII; + Z->data_type = Z_UNKNOWN; Z->next_in = (Bytef *) data; Z->avail_in = data_len; Z->avail_out = HTTP_GZBUFLEN(data_len) - 1; - Z->next_out = emalloc(Z->avail_out); + Z->next_out = emalloc(HTTP_GZBUFLEN(data_len)); *buf_ptr = Z->next_out; } -inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration) +inline void http_init_uncompress_buffer(size_t data_len, char **buf_ptr, size_t *buf_len, int iteration) { - Z->zalloc = Z_NULL; - Z->zfree = Z_NULL; - if (!iteration) { *buf_len = data_len * 2; *buf_ptr = emalloc(*buf_len + 1); @@ -149,6 +155,14 @@ inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_ *buf_len <<= 2; *buf_ptr = erealloc(*buf_ptr, *buf_len + 1); } +} + +inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration) +{ + Z->zalloc = Z_NULL; + Z->zfree = Z_NULL; + + http_init_uncompress_buffer(data_len, buf_ptr, buf_len, iteration); Z->next_in = (Bytef *) data; Z->avail_in = data_len; @@ -185,6 +199,179 @@ inline size_t http_finish_gzencode_buffer(z_stream *Z, const char *data, size_t return http_finish_buffer(Z->total_out + sizeof(http_gzencode_header) + 8, buf_ptr); } +inline STATUS http_verify_gzencode_buffer(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC) +{ + size_t offset = sizeof(http_gzencode_header); + + if (data_len < offset) { + goto really_bad_gzip_header; + } + + if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF)); + return FAILURE; + } + + if (data[2] != (const char) Z_DEFLATED) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF)); + /* still try to decode */ + } + if ((data[3] & 0x4) == 0x4) { + if (data_len < offset + 2) { + goto really_bad_gzip_header; + } + /* there are extra fields, the length follows the common header as 2 bytes LSB */ + offset += (unsigned) ((data[offset] & 0xFF)); + offset += 1; + offset += (unsigned) ((data[offset] & 0xFF) << 8); + offset += 1; + } + if ((data[3] & 0x8) == 0x8) { + if (data_len <= offset) { + goto really_bad_gzip_header; + } + /* there's a file name */ + offset += strlen(&data[offset]) + 1 /*NUL*/; + } + if ((data[3] & 0x10) == 0x10) { + if (data_len <= offset) { + goto really_bad_gzip_header; + } + /* there's a comment */ + offset += strlen(&data[offset]) + 1 /* NUL */; + } + if ((data[3] & 0x2) == 0x2) { + /* there's a CRC16 of the header */ + offset += 2; + if (data_len <= offset) { + goto really_bad_gzip_header; + } else { + unsigned long crc, cmp; + + cmp = (unsigned) ((data[offset-2] & 0xFF)); + cmp += (unsigned) ((data[offset-1] & 0xFF) << 8); + + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, data, sizeof(http_gzencode_header)); + + if (cmp != (crc & 0xFFFF)) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF); + return FAILURE; + } + } + } + + if (data_len < offset + 8) { + http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer"); + return FAILURE; + } + + if (encoded) { + *encoded = data + offset; + } + if (encoded_len) { + *encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */; + } + + return SUCCESS; + +really_bad_gzip_header: + http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header"); + return FAILURE; +} + +inline STATUS http_verify_gzdecode_buffer(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC) +{ + STATUS status = SUCCESS; + unsigned long len, cmp, crc; + + crc = crc32(0L, Z_NULL, 0); + crc = crc32(crc, (const Bytef *) decoded, decoded_len); + + cmp = (unsigned) ((data[data_len-8] & 0xFF)); + cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8); + cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16); + cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24); + len = (unsigned) ((data[data_len-4] & 0xFF)); + len += (unsigned) ((data[data_len-3] & 0xFF) << 8); + len += (unsigned) ((data[data_len-2] & 0xFF) << 16); + len += (unsigned) ((data[data_len-1] & 0xFF) << 24); + + if (cmp != crc) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc); + status = FAILURE; + } + if (len != decoded_len) { + http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len); + status = FAILURE; + } + return status; +} + +PHP_HTTP_API STATUS _http_encode(http_encoding_type type, int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC) +{ + STATUS status = SUCCESS; + + switch (type) + { + case HTTP_ENCODING_ANY: + case HTTP_ENCODING_GZIP: + status = http_encoding_gzencode(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_DEFLATE: + status = http_encoding_deflate(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_COMPRESS: + status = http_encoding_compress(level, data, data_len, encoded, encoded_len); + break; + + case HTTP_ENCODING_NONE: + default: + *encoded = estrndup(data, data_len); + *encoded_len = data_len; + break; + } + + return status; +} + +PHP_HTTP_API STATUS _http_decode(http_encoding_type type, const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC) +{ + STATUS status = SUCCESS; + + switch (type) + { + case HTTP_ENCODING_ANY: + if ( SUCCESS != http_encoding_gzdecode(data, data_len, decoded, decoded_len) && + SUCCESS != http_encoding_inflate(data, data_len, decoded, decoded_len) && + SUCCESS != http_encoding_uncompress(data, data_len, decoded, decoded_len)) { + status = FAILURE; + } + break; + + case HTTP_ENCODING_GZIP: + status = http_encoding_gzdecode(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_DEFLATE: + status = http_encoding_inflate(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_COMPRESS: + status = http_encoding_uncompress(data, data_len, decoded, decoded_len); + break; + + case HTTP_ENCODING_NONE: + default: + *decoded = estrndup(data, data_len); + *decoded_len = data_len; + break; + } + + return status; +} PHP_HTTP_API STATUS _http_encoding_gzencode(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC) { @@ -242,38 +429,15 @@ PHP_HTTP_API STATUS _http_encoding_compress(int level, const char *data, size_t PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC) { - const char *encoded = data + sizeof(http_gzencode_header); + const char *encoded; size_t encoded_len; - if (data_len <= sizeof(http_gzencode_header) + 8) { - http_error(HE_WARNING, HTTP_E_ENCODING, "Could not gzdecode data: too short data length"); - } else { - encoded_len = data_len - sizeof(http_gzencode_header) - 8; - - if (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len)) { - unsigned long len = 0, cmp = 0, crc = crc32(0L, Z_NULL, 0); - - crc = crc32(crc, (const Bytef *) *decoded, *decoded_len); - - cmp = (unsigned) ((data[data_len-8] & 0xFF)); - cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8); - cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16); - cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24); - len = (unsigned) ((data[data_len-4] & 0xFF)); - len += (unsigned) ((data[data_len-3] & 0xFF) << 8); - len += (unsigned) ((data[data_len-2] & 0xFF) << 16); - len += (unsigned) ((data[data_len-1] & 0xFF) << 24); - - if (cmp != crc) { - http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc); - } - if (len != *decoded_len) { - http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, *decoded_len); - } - - return SUCCESS; - } + if ( (SUCCESS == http_verify_gzencode_buffer(data, data_len, &encoded, &encoded_len, HE_NOTICE)) && + (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) { + http_verify_gzdecode_buffer(data, data_len, *decoded, *decoded_len, HE_NOTICE); + return SUCCESS; } + return FAILURE; } @@ -293,8 +457,9 @@ PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, ch } } } - } while (max < HTTP_GZMAXTRY); + } while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR); + efree(*decoded); http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status)); return FAILURE; } @@ -303,21 +468,14 @@ PHP_HTTP_API STATUS _http_encoding_uncompress(const char *data, size_t data_len, { int max = 0; STATUS status; - size_t want = data_len * 2; - - *decoded = emalloc(want + 1); - if (Z_BUF_ERROR == (status = uncompress(*decoded, &want, data, data_len))) do { - /* this is a lot faster with large data than gzuncompress(), - but could be a problem with a low memory limit */ - want <<= 2; - *decoded = erealloc(*decoded, want + 1); - status = uncompress(*decoded, &want, data, data_len); - } while (++max < HTTP_GZMAXTRY && status == Z_BUF_ERROR); - - if (Z_OK == status) { - *decoded_len = http_finish_buffer(want, decoded); - return SUCCESS; - } + + do { + http_init_uncompress_buffer(data_len, decoded, decoded_len, max++); + if (Z_OK == (status = uncompress(*decoded, decoded_len, data, data_len))) { + http_finish_buffer(*decoded_len, decoded); + return SUCCESS; + } + } while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR); efree(*decoded); http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not uncompress data: %s", zError(status));