From: Michael Wallner Date: Fri, 28 Oct 2005 10:51:11 +0000 (+0000) Subject: - fix request errors hidden under the hood of the request pool X-Git-Tag: RELEASE_0_17_0~14 X-Git-Url: https://git.m6w6.name/?p=m6w6%2Fext-http;a=commitdiff_plain;h=f0d2eafadc9967abd53546c6b7f395ce095255e9 - fix request errors hidden under the hood of the request pool - smarter chunked decoder - add exhaustive HttpRequestPool test - add two tests for truncated chunked encoded data --- diff --git a/http_encoding_api.c b/http_encoding_api.c index 0062623..b4244f0 100644 --- a/http_encoding_api.c +++ b/http_encoding_api.c @@ -29,13 +29,13 @@ ZEND_EXTERN_MODULE_GLOBALS(http); -static inline int eol_match(char **line, int *EOL_len) +static inline int eol_match(char **line, int *eol_len) { char *ptr = *line; while (0x20 == *ptr) ++ptr; - if (ptr == http_locate_eol(*line, EOL_len)) { + if (ptr == http_locate_eol(*line, eol_len)) { *line = ptr; return 1; } else { @@ -46,63 +46,69 @@ static inline int eol_match(char **line, int *EOL_len) /* {{{ char *http_encoding_dechunk(char *, size_t, char **, size_t *) */ PHP_HTTP_API const char *_http_encoding_dechunk(const char *encoded, size_t encoded_len, char **decoded, size_t *decoded_len TSRMLS_DC) { - const char *e_ptr; - char *d_ptr; - long rest; + int eol_len = 0; + char *n_ptr = NULL; + const char *e_ptr = encoded; *decoded_len = 0; *decoded = ecalloc(1, encoded_len); - d_ptr = *decoded; - e_ptr = encoded; - - while ((rest = encoded + encoded_len - e_ptr) > 0) { - long chunk_len = 0; - int EOL_len = 0, eol_mismatch = 0; - char *n_ptr; - - chunk_len = strtol(e_ptr, &n_ptr, 16); - - /* check if: - * - we could not read in chunk size - * - we got a negative chunk size - * - chunk size is greater then remaining size - * - chunk size is not followed by (CR)LF|NUL - */ - if ( (n_ptr == e_ptr) || (chunk_len < 0) || (chunk_len > rest) || - (*n_ptr && (eol_mismatch = !eol_match(&n_ptr, &EOL_len)))) { - /* don't fail on apperently not encoded data */ + + while ((encoded + encoded_len - e_ptr) > 0) { + unsigned long chunk_len = 0, rest; + + chunk_len = strtoul(e_ptr, &n_ptr, 16); + + /* we could not read in chunk size */ + if (n_ptr == e_ptr) { + /* + * if this is the first turn and there doesn't seem to be a chunk + * size at the begining of the body, do not fail on apparently + * not encoded data and return a copy + */ if (e_ptr == encoded) { + http_error(HE_NOTICE, HTTP_E_ENCODING, "Data does not seem to be chunked encoded"); memcpy(*decoded, encoded, encoded_len); *decoded_len = encoded_len; return encoded + encoded_len; } else { efree(*decoded); - if (eol_mismatch) { - if (EOL_len == 2) { - http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid character (expected 0x0D 0x0A; got: 0x%02X 0x%02X)", *n_ptr, *(n_ptr + 1)); - } else { - http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid character (expected 0x0A; got: 0x%02X)", *n_ptr); - } - } else { - char *error = estrndup(e_ptr, strcspn(n_ptr, "\r\n ")); - http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Invalid chunk size: '%s' at pos %ld of %lu", error, (long) (n_ptr - encoded), (unsigned long) encoded_len); - efree(error); - } + http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected chunk size at pos %lu of %lu but got trash", (unsigned long) (n_ptr - encoded), (unsigned long) encoded_len); return NULL; } - } else { - e_ptr = n_ptr; } - + /* reached the end */ if (!chunk_len) { break; } - memcpy(d_ptr, e_ptr += EOL_len, chunk_len); - d_ptr += chunk_len; - e_ptr += chunk_len + EOL_len; + /* there should be CRLF after the chunk size, but we'll ignore SP+ too */ + if (*n_ptr && !eol_match(&n_ptr, &eol_len)) { + if (eol_len == 2) { + http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected CRLF at pos %lu of %lu but got 0x%02X 0x%02X", (unsigned long) (n_ptr - encoded), (unsigned long) encoded_len, *n_ptr, *(n_ptr + 1)); + } else { + http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Expected LF at pos %lu of %lu but got 0x%02X", (unsigned long) (n_ptr - encoded), (unsigned long) encoded_len, *n_ptr); + } + } + n_ptr += eol_len; + + /* chunk size pretends more data than we actually got, so it's probably a truncated message */ + if (chunk_len > (rest = encoded + encoded_len - n_ptr)) { + http_error_ex(HE_WARNING, HTTP_E_ENCODING, "Truncated message: chunk size %lu exceeds remaining data size %lu at pos %lu of %lu", chunk_len, rest, (unsigned long) (n_ptr - encoded), (unsigned long) encoded_len); + chunk_len = rest; + } + + /* copy the chunk */ + memcpy(*decoded + *decoded_len, n_ptr, chunk_len); *decoded_len += chunk_len; + + if (chunk_len == rest) { + e_ptr = n_ptr + chunk_len; + break; + } else { + /* advance to next chunk */ + e_ptr = n_ptr + chunk_len + eol_len; + } } return e_ptr; diff --git a/http_request_pool_api.c b/http_request_pool_api.c index ca7aec7..0faf275 100644 --- a/http_request_pool_api.c +++ b/http_request_pool_api.c @@ -239,6 +239,12 @@ PHP_HTTP_API void _http_request_pool_dtor(http_request_pool *pool TSRMLS_DC) } /* }}} */ +#ifdef PHP_WIN32 +# define SELECT_ERROR SOCKET_ERROR +#else +# define SELECT_ERROR -1 +#endif + /* {{{ STATUS http_request_pool_select(http_request_pool *) */ PHP_HTTP_API STATUS _http_request_pool_select(http_request_pool *pool) { @@ -250,12 +256,12 @@ PHP_HTTP_API STATUS _http_request_pool_select(http_request_pool *pool) FD_ZERO(&W); FD_ZERO(&E); - curl_multi_fdset(pool->ch, &R, &W, &E, &MAX); -#ifdef PHP_WIN32 - return (SOCKET_ERROR != select(MAX + 1, &R, &W, &E, &timeout)) ? SUCCESS : FAILURE; -#else - return (-1 != select(MAX + 1, &R, &W, &E, &timeout)) ? SUCCESS : FAILURE; -#endif + if (CURLM_OK == curl_multi_fdset(pool->ch, &R, &W, &E, &MAX)) { + if (MAX == -1 || SELECT_ERROR != select(MAX + 1, &R, &W, &E, &timeout)) { + return SUCCESS; + } + } + return FAILURE; } /* }}} */ @@ -269,11 +275,9 @@ PHP_HTTP_API int _http_request_pool_perform(http_request_pool *pool TSRMLS_DC) while (msg = curl_multi_info_read(pool->ch, &remaining)) { if (CURLMSG_DONE == msg->msg) { - -#if HTTP_DEBUG_REQPOOLS - fprintf(stderr, "Done CURL handle: %p (remaining: %d)\n", msg->easy_handle, remaining); -#endif - + if (CURLE_OK != msg->data.result) { + http_error(HE_WARNING, HTTP_E_REQUEST, curl_easy_strerror(msg->data.result)); + } zend_llist_apply_with_argument(&pool->handles, (llist_apply_with_arg_func_t) http_request_pool_responsehandler, msg->easy_handle TSRMLS_CC); } } diff --git a/package.xml b/package.xml index 74d2221..4cda6ae 100644 --- a/package.xml +++ b/package.xml @@ -24,12 +24,17 @@ 0.16.0 - 2005-10-24 + 2005-10-28 BSD, revised beta + Added ext/zlib independant GZIP support + Added HttpRequestPool::getAttachedRequests() and getFinishedRequests() -+ Added experimental thread safety for curl-gnutls builds ++ Added experimental thread safety for builds linking against libcurl-gnutls ++ Improved the chunked decoder + +- License change! + +* Fixed a bug where HttpRequest warnings were hidden within the HttpRequestPool @@ -66,8 +71,12 @@ + + + + @@ -85,6 +94,7 @@ + @@ -95,6 +105,7 @@ + @@ -105,6 +116,7 @@ + @@ -122,6 +134,7 @@ + @@ -149,6 +162,7 @@ + diff --git a/package2.xml b/package2.xml index 2bab9b3..bccc86e 100644 --- a/package2.xml +++ b/package2.xml @@ -41,7 +41,12 @@ @@ -118,14 +123,19 @@ + + + + + @@ -142,6 +152,7 @@ + @@ -151,6 +162,7 @@ + @@ -161,6 +173,7 @@ + diff --git a/tests/HttpRequestPool_003.phpt b/tests/HttpRequestPool_003.phpt new file mode 100644 index 0000000..c89efa4 --- /dev/null +++ b/tests/HttpRequestPool_003.phpt @@ -0,0 +1,171 @@ +--TEST-- +HttpRequestPool chain +--SKIPIF-- + +--FILE-- +dir = (is_dir($cache_dir) or @mkdir($cache_dir)) ? $cache_dir : null; + + foreach (array_map('trim', file($urls_file)) as $url) { + $this->all[$url] = $this->dir ? $this->dir .'/'. md5($url) : null; + } + + $this->send(); + } + + public final function send() + { + if (RMAX) { + $now = array_slice($this->all, 0, RMAX); + $this->rem = array_slice($this->all, RMAX); + } else { + $now = $urls; + $this->rem = array(); + } + + foreach ($now as $url => $file) { + $this->attach( + new HttpRequest( + $url, + HttpRequest::METH_GET, + array( + 'redirect' => 5, + 'timeout' => 50, + 'connecttimeout' => 10, + 'lastmodified' => is_file($file)?filemtime($file):0 + ) + ) + ); + } + + while ($this->socketPerform()) { + if (!$this->socketSelect()) { + throw new HttpSocketException; + } + } + } + + protected final function socketPerform() + { + try { + $rc = parent::socketPerform(); + } catch (HttpRequestException $x) { + // a request may have thrown an exception, + // but it is still save to continue + echo $x->getMessage(), "\n"; + } + + foreach ($this->getFinishedRequests() as $r) { + $this->detach($r); + + $u = $r->getUrl(); + $c = $r->getResponseCode(); + $b = $r->getResponseBody(); + + printf("%d %s %d\n", $c, $u, strlen($b)); + + if ($c == 200 && $this->dir) { + file_put_contents($this->all[$u], $b); + } + + if ($a = each($this->rem)) { + list($url, $file) = $a; + $this->attach( + new HttpRequest( + $url, + HttpRequest::METH_GET, + array( + 'redirect' => 3, + 'timeout' => 25, + 'compress' => true, + 'connecttimeout' => 10, + 'lastmodified' => is_file($file)?filemtime($file):0 + ) + ) + ); + } + } + return $rc; + } +} + +define('RMAX', 10); +chdir(dirname(__FILE__)); + +$time = microtime(true); +new Pool(); +printf("Elapsed: %0.3fs\n", microtime(true)-$time); + +echo "Done\n"; +?> +--EXPECTF-- +%sTEST +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +%d %s %d +Elapsed: %fs +Done diff --git a/tests/chunked_decode_003.phpt b/tests/chunked_decode_003.phpt new file mode 100644 index 0000000..5af29a4 --- /dev/null +++ b/tests/chunked_decode_003.phpt @@ -0,0 +1,27 @@ +--TEST-- +http_chunked_decode() truncated message +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +%sTEST +%sWarning%s:%shttp_chunked_decode()%sTruncated message: chunk size %d exceeds remaining data size %d at pos %d of %d%s +string(24) "abra +cadabra +all we got +" diff --git a/tests/chunked_decode_004.phpt b/tests/chunked_decode_004.phpt new file mode 100644 index 0000000..77aed28 --- /dev/null +++ b/tests/chunked_decode_004.phpt @@ -0,0 +1,26 @@ +--TEST-- +http_chunked_decode() truncated message ending with NUL after a chunk +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +%sTEST +string(24) "abra +cadabra +all we got +"