X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=blobdiff_plain;f=http_encoding_api.c;h=28ea4232d2627dcf2d5d5e61e51f6ec1ac0eafe1;hp=a1893df44395e0b2de4472ce6a3e05f4ed030406;hb=61e3ea78efcc501cf7ec2df4749aa92935c0964e;hpb=a6b4fe496b44ab45fbc84d0b491ce7322e7532f7

diff --git a/http_encoding_api.c b/http_encoding_api.c
index a1893df..28ea423 100644
--- a/http_encoding_api.c
+++ b/http_encoding_api.c
@@ -95,15 +95,24 @@ PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t enco
 #ifdef HTTP_HAVE_ZLIB
 #include <zlib.h>
 
+/* max count of uncompress trials, alloc_size <<= 2 for each try */
 #define HTTP_GZMAXTRY 10
-#define HTTP_GZBUFLEN(l) (l + (l / 1000) + 16 + 1)
+/* safe padding */
+#define HTTP_GZSAFPAD 10
+/* add 1% extra space in case we need to encode widely differing (binary) data */
+#define HTTP_GZBUFLEN(l) (l + (l / 100) + HTTP_GZSAFPAD)
 
 static const char http_gzencode_header[] = {
-	(const char) 0x1f, 
-	(const char) 0x8b, 
-	(const char) Z_DEFLATED, 
-	0, 0, 0, 0, 0, 0, 
-	(const char) 0x03
+	(const char) 0x1f,			// fixed value
+	(const char) 0x8b,			// fixed value
+	(const char) Z_DEFLATED,	// compression algorithm
+	(const char) 0,				// none of the possible flags defined by the GZIP "RFC"
+	(const char) 0,				// no MTIME available (4 bytes)
+	(const char) 0,				// =*=
+	(const char) 0,				// =*=
+	(const char) 0,				// =*=
+	(const char) 0,				// two possible flag values for 9 compression levels? o_O
+	(const char) 0x03			// assume *nix OS
 };
 
 inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr)
@@ -114,9 +123,9 @@ inline void http_init_gzencode_buffer(z_stream *Z, const char *data, size_t data
 	
 	Z->next_in   = (Bytef *) data;
 	Z->avail_in  = data_len;
-	Z->avail_out = HTTP_GZBUFLEN(data_len) - 1;
+	Z->avail_out = HTTP_GZBUFLEN(data_len) + HTTP_GZSAFPAD - 1;
 	
-	*buf_ptr = emalloc(Z->avail_out + sizeof(http_gzencode_header));
+	*buf_ptr = emalloc(HTTP_GZBUFLEN(data_len) + sizeof(http_gzencode_header) + HTTP_GZSAFPAD);
 	memcpy(*buf_ptr, http_gzencode_header, sizeof(http_gzencode_header));
 	
 	Z->next_out = *buf_ptr + sizeof(http_gzencode_header);
@@ -128,20 +137,17 @@ inline void http_init_deflate_buffer(z_stream *Z, const char *data, size_t data_
 	Z->zfree  = Z_NULL;
 	Z->opaque = Z_NULL;
 
-	Z->data_type = Z_ASCII;
+	Z->data_type = Z_UNKNOWN;
 	Z->next_in   = (Bytef *) data;
 	Z->avail_in  = data_len;
 	Z->avail_out = HTTP_GZBUFLEN(data_len) - 1;
-	Z->next_out  = emalloc(Z->avail_out);
+	Z->next_out  = emalloc(HTTP_GZBUFLEN(data_len));
 	
 	*buf_ptr = Z->next_out;
 }
 
-inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration)
+inline void http_init_uncompress_buffer(size_t data_len, char **buf_ptr, size_t *buf_len, int iteration)
 {
-	Z->zalloc = Z_NULL;
-	Z->zfree  = Z_NULL;
-	
 	if (!iteration) {
 		*buf_len = data_len * 2;
 		*buf_ptr = emalloc(*buf_len + 1);
@@ -149,6 +155,14 @@ inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_
 		*buf_len <<= 2;
 		*buf_ptr = erealloc(*buf_ptr, *buf_len + 1);
 	}
+}
+
+inline void http_init_inflate_buffer(z_stream *Z, const char *data, size_t data_len, char **buf_ptr, size_t *buf_len, int iteration)
+{
+	Z->zalloc = Z_NULL;
+	Z->zfree  = Z_NULL;
+	
+	http_init_uncompress_buffer(data_len, buf_ptr, buf_len, iteration);
 	
 	Z->next_in   = (Bytef *) data;
 	Z->avail_in  = data_len;
@@ -185,6 +199,179 @@ inline size_t http_finish_gzencode_buffer(z_stream *Z, const char *data, size_t
 	return http_finish_buffer(Z->total_out + sizeof(http_gzencode_header) + 8, buf_ptr);
 }
 
+inline STATUS http_verify_gzencode_buffer(const char *data, size_t data_len, const char **encoded, size_t *encoded_len, int error_level TSRMLS_DC)
+{
+	size_t offset = sizeof(http_gzencode_header);
+	
+	if (data_len < offset) {
+		goto really_bad_gzip_header;
+	}
+	
+	if (data[0] != (const char) 0x1F || data[1] != (const char) 0x8B) {
+		http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized GZIP header start: 0x%02X 0x%02X", (int) data[0], (int) (data[1] & 0xFF));
+		return FAILURE;
+	}
+	
+	if (data[2] != (const char) Z_DEFLATED) {
+		http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Unrecognized compression format (%d)", (int) (data[2] & 0xFF));
+		/* still try to decode */
+	}
+	if ((data[3] & 0x4) == 0x4) {
+		if (data_len < offset + 2) {
+			goto really_bad_gzip_header;
+		}
+		/* there are extra fields, the length follows the common header as 2 bytes LSB */
+		offset += (unsigned) ((data[offset] & 0xFF));
+		offset += 1;
+		offset += (unsigned) ((data[offset] & 0xFF) << 8);
+		offset += 1;
+	}
+	if ((data[3] & 0x8) == 0x8) {
+		if (data_len <= offset) {
+			goto really_bad_gzip_header;
+		}
+		/* there's a file name */
+		offset += strlen(&data[offset]) + 1 /*NUL*/;
+	}
+	if ((data[3] & 0x10) == 0x10) {
+		if (data_len <= offset) {
+			goto really_bad_gzip_header;
+		}
+		/* there's a comment */
+		offset += strlen(&data[offset]) + 1 /* NUL */;
+	}
+	if ((data[3] & 0x2) == 0x2) {
+		/* there's a CRC16 of the header */
+		offset += 2;
+		if (data_len <= offset) {
+			goto really_bad_gzip_header;
+		} else {
+			unsigned long crc, cmp;
+			
+			cmp =  (unsigned) ((data[offset-2] & 0xFF));
+			cmp += (unsigned) ((data[offset-1] & 0xFF) << 8);
+			
+			crc = crc32(0L, Z_NULL, 0);
+			crc = crc32(crc, data, sizeof(http_gzencode_header));
+			
+			if (cmp != (crc & 0xFFFF)) {
+				http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "GZIP headers CRC checksums so not match (%lu, %lu)", cmp, crc & 0xFFFF);
+				return FAILURE;
+			}
+		}
+	}
+	
+	if (data_len < offset + 8) {
+		http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP footer");
+		return FAILURE;
+	}
+	
+	if (encoded) {
+		*encoded = data + offset;
+	}
+	if (encoded_len) {
+		*encoded_len = data_len - offset - 8 /* size of the assumed GZIP footer */;	
+	}
+	
+	return SUCCESS;
+	
+really_bad_gzip_header:
+	http_error(error_level TSRMLS_CC, HTTP_E_ENCODING, "Missing or truncated GZIP header");
+	return FAILURE;
+}
+
+inline STATUS http_verify_gzdecode_buffer(const char *data, size_t data_len, const char *decoded, size_t decoded_len, int error_level TSRMLS_DC)
+{
+	STATUS status = SUCCESS;
+	unsigned long len, cmp, crc;
+	
+	crc = crc32(0L, Z_NULL, 0);
+	crc = crc32(crc, (const Bytef *) decoded, decoded_len);
+	
+	cmp  = (unsigned) ((data[data_len-8] & 0xFF));
+	cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
+	cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
+	cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
+	len  = (unsigned) ((data[data_len-4] & 0xFF));
+	len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
+	len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
+	len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
+	
+	if (cmp != crc) {
+		http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
+		status = FAILURE;
+	}
+	if (len != decoded_len) {
+		http_error_ex(error_level TSRMLS_CC, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, decoded_len);
+		status = FAILURE;
+	}
+	return status;
+}
+
+PHP_HTTP_API STATUS _http_encode(http_encoding_type type, int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
+{
+	STATUS status = SUCCESS;
+	
+	switch (type)
+	{
+		case HTTP_ENCODING_ANY:
+		case HTTP_ENCODING_GZIP:
+			status = http_encoding_gzencode(level, data, data_len, encoded, encoded_len);
+		break;
+		
+		case HTTP_ENCODING_DEFLATE:
+			status = http_encoding_deflate(level, data, data_len, encoded, encoded_len);
+		break;
+		
+		case HTTP_ENCODING_COMPRESS:
+			status = http_encoding_compress(level, data, data_len, encoded, encoded_len);
+		break;
+		
+		case HTTP_ENCODING_NONE:
+		default:
+			*encoded = estrndup(data, data_len);
+			*encoded_len = data_len;
+		break;
+	}
+	
+	return status;
+}
+
+PHP_HTTP_API STATUS _http_decode(http_encoding_type type, const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
+{
+	STATUS status = SUCCESS;
+	
+	switch (type)
+	{
+		case HTTP_ENCODING_ANY:
+			if (	SUCCESS != http_encoding_gzdecode(data, data_len, decoded, decoded_len) &&
+					SUCCESS != http_encoding_inflate(data, data_len, decoded, decoded_len) &&
+					SUCCESS != http_encoding_uncompress(data, data_len, decoded, decoded_len)) {
+				status = FAILURE;
+			}
+		break;
+		
+		case HTTP_ENCODING_GZIP:
+			status = http_encoding_gzdecode(data, data_len, decoded, decoded_len);
+		break;
+		
+		case HTTP_ENCODING_DEFLATE:
+			status = http_encoding_inflate(data, data_len, decoded, decoded_len);
+		break;
+		
+		case HTTP_ENCODING_COMPRESS:
+			status = http_encoding_uncompress(data, data_len, decoded, decoded_len);
+		break;
+		
+		case HTTP_ENCODING_NONE:
+		default:
+			*decoded = estrndup(data, data_len);
+			*decoded_len = data_len;
+		break;
+	}
+	
+	return status;
+}
 
 PHP_HTTP_API STATUS _http_encoding_gzencode(int level, const char *data, size_t data_len, char **encoded, size_t *encoded_len TSRMLS_DC)
 {
@@ -242,38 +429,15 @@ PHP_HTTP_API STATUS _http_encoding_compress(int level, const char *data, size_t
 
 PHP_HTTP_API STATUS _http_encoding_gzdecode(const char *data, size_t data_len, char **decoded, size_t *decoded_len TSRMLS_DC)
 {
-	const char *encoded = data + sizeof(http_gzencode_header);
+	const char *encoded;
 	size_t encoded_len;
 	
-	if (data_len <= sizeof(http_gzencode_header) + 8) {
-		http_error(HE_WARNING, HTTP_E_ENCODING, "Could not gzdecode data: too short data length");
-	} else {
-		encoded_len = data_len - sizeof(http_gzencode_header) - 8;
-		
-		if (SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len)) {
-			unsigned long len = 0, cmp = 0, crc = crc32(0L, Z_NULL, 0);
-			
-			crc = crc32(crc, (const Bytef *) *decoded, *decoded_len);
-			
-			cmp  = (unsigned) ((data[data_len-8] & 0xFF));
-			cmp += (unsigned) ((data[data_len-7] & 0xFF) << 8);
-			cmp += (unsigned) ((data[data_len-6] & 0xFF) << 16);
-			cmp += (unsigned) ((data[data_len-5] & 0xFF) << 24);
-			len  = (unsigned) ((data[data_len-4] & 0xFF));
-			len += (unsigned) ((data[data_len-3] & 0xFF) << 8);
-			len += (unsigned) ((data[data_len-2] & 0xFF) << 16);
-			len += (unsigned) ((data[data_len-1] & 0xFF) << 24);
-			
-			if (cmp != crc) {
-				http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: CRC checksums do not match (%lu, %lu)", cmp, crc);
-			}
-			if (len != *decoded_len) {
-				http_error_ex(HE_NOTICE, HTTP_E_ENCODING, "Could not verify data integrity: data sizes do not match (%lu, %lu)", len, *decoded_len);
-			}
-			
-			return SUCCESS;
-		}
+	if (	(SUCCESS == http_verify_gzencode_buffer(data, data_len, &encoded, &encoded_len, HE_NOTICE)) &&
+			(SUCCESS == http_encoding_inflate(encoded, encoded_len, decoded, decoded_len))) {
+		http_verify_gzdecode_buffer(data, data_len, *decoded, *decoded_len, HE_NOTICE);
+		return SUCCESS;
 	}
+	
 	return FAILURE;
 }
 
@@ -293,8 +457,9 @@ PHP_HTTP_API STATUS _http_encoding_inflate(const char *data, size_t data_len, ch
 				}
 			}
 		}
-	} while (max < HTTP_GZMAXTRY);
+	} while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR);
 	
+	efree(*decoded);
 	http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not inflate data: %s", zError(status));
 	return FAILURE;
 }
@@ -303,21 +468,14 @@ PHP_HTTP_API STATUS _http_encoding_uncompress(const char *data, size_t data_len,
 {
 	int max = 0;
 	STATUS status;
-	size_t want = data_len * 2;
-	
-	*decoded = emalloc(want + 1);
-	if (Z_BUF_ERROR == (status = uncompress(*decoded, &want, data, data_len))) do {
-		/*	this is a lot faster with large data than gzuncompress(),
-			but could be a problem with a low memory limit */
-		want <<= 2;
-		*decoded = erealloc(*decoded, want + 1);
-		status = uncompress(*decoded, &want, data, data_len);
-	} while (++max < HTTP_GZMAXTRY && status == Z_BUF_ERROR);
-	
-	if (Z_OK == status) {
-		*decoded_len = http_finish_buffer(want, decoded);
-		return SUCCESS;
-	}
+	
+	do {
+		http_init_uncompress_buffer(data_len, decoded, decoded_len, max++);
+		if (Z_OK == (status = uncompress(*decoded, decoded_len, data, data_len))) {
+			http_finish_buffer(*decoded_len, decoded);
+			return SUCCESS;
+		}
+	} while (max < HTTP_GZMAXTRY && status == Z_BUF_ERROR);
 	
 	efree(*decoded);
 	http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Could not uncompress data: %s", zError(status));