} current;
unsigned quotes:1;
unsigned escape:1;
+ unsigned rfc5987:1;
} php_http_params_state_t;
static inline void sanitize_default(zval *zv TSRMLS_DC)
--ht->nApplyCount;
}
-static inline void sanitize_key(unsigned flags, char *str, size_t len, zval *zv TSRMLS_DC)
+static inline void sanitize_key(unsigned flags, char *str, size_t len, zval *zv, zend_bool *rfc5987 TSRMLS_DC)
{
+ char *eos;
+
zval_dtor(zv);
php_trim(str, len, NULL, 0, zv, 3 TSRMLS_CC);
sanitize_default(zv TSRMLS_CC);
}
+ eos = &Z_STRVAL_P(zv)[Z_STRLEN_P(zv)-1];
+ if (*eos == '*') {
+ *eos = '\0';
+ *rfc5987 = 1;
+ Z_STRLEN_P(zv) -= 1;
+ }
+
if (flags & PHP_HTTP_PARAMS_URLENCODED) {
sanitize_urlencoded(zv TSRMLS_CC);
}
}
}
-static inline void sanitize_value(unsigned flags, char *str, size_t len, zval *zv TSRMLS_DC)
+static inline void sanitize_rfc5987(zval *zv, char **language, zend_bool *latin1 TSRMLS_DC)
+{
+ char *ptr;
+
+ /* examples:
+ * iso-8850-1'de'bl%f6der%20schei%df%21
+ * utf-8'de-DE'bl%c3%b6der%20schei%c3%9f%21
+ */
+
+ switch (Z_STRVAL_P(zv)[0]) {
+ case 'I':
+ case 'i':
+ if (!strncasecmp(Z_STRVAL_P(zv), ZEND_STRL("iso-8859-1"))) {
+ *latin1 = 1;
+ ptr = Z_STRVAL_P(zv) + lenof("iso-8859-1");
+ break;
+ }
+ /* no break */
+ case 'U':
+ case 'u':
+ if (!strncasecmp(Z_STRVAL_P(zv), ZEND_STRL("utf-8"))) {
+ *latin1 = 0;
+ ptr = Z_STRVAL_P(zv) + lenof("utf-8");
+ break;
+ }
+ /* no break */
+ default:
+ return;
+ }
+
+ /* extract language */
+ if (*ptr == '\'') {
+ for (*language = ++ptr; *ptr && *ptr != '\''; ++ptr);
+ if (!*ptr) {
+ *language = NULL;
+ return;
+ }
+ *language = estrndup(*language, ptr - *language);
+
+ /* remainder */
+ ptr = estrdup(++ptr);
+ zval_dtor(zv);
+ ZVAL_STRING(zv, ptr, 0);
+ }
+}
+
+static void utf8encode(zval *zv)
{
+ size_t pos, len = 0;
+ unsigned char *ptr = (unsigned char *) Z_STRVAL_P(zv);
+
+ while (*ptr) {
+ if (*ptr++ >= 0x80) {
+ ++len;
+ }
+ ++len;
+ }
+
+ ptr = safe_emalloc(1, len, 1);
+ for (len = 0, pos = 0; len <= Z_STRLEN_P(zv); ++len, ++pos) {
+ ptr[pos] = Z_STRVAL_P(zv)[len];
+ if ((ptr[pos]) >= 0x80) {
+ ptr[pos + 1] = 0x80 | (ptr[pos] & 0x3f);
+ ptr[pos] = 0xc0 | ((ptr[pos] >> 6) & 0x1f);
+ ++pos;
+ }
+ }
+ zval_dtor(zv);
+ ZVAL_STRINGL(zv, (char *) ptr, pos-1, 0);
+}
+
+static inline void sanitize_value(unsigned flags, char *str, size_t len, zval *zv, zend_bool rfc5987 TSRMLS_DC)
+{
+ char *language = NULL;
+ zend_bool latin1 = 0;
+
zval_dtor(zv);
php_trim(str, len, NULL, 0, zv, 3 TSRMLS_CC);
+ if (rfc5987) {
+ sanitize_rfc5987(zv, &language, &latin1 TSRMLS_CC);
+ }
+
if (flags & PHP_HTTP_PARAMS_DEFAULT) {
sanitize_default(zv TSRMLS_CC);
}
- if (flags & PHP_HTTP_PARAMS_URLENCODED) {
+ if ((flags & PHP_HTTP_PARAMS_URLENCODED) || (rfc5987 && language)) {
sanitize_urlencoded(zv TSRMLS_CC);
}
+
+ if (rfc5987 && language) {
+ zval *tmp;
+
+ if (latin1) {
+ utf8encode(zv);
+ }
+
+ MAKE_STD_ZVAL(tmp);
+ ZVAL_COPY_VALUE(tmp, zv);
+ array_init(zv);
+ add_assoc_zval(zv, language, tmp);
+ STR_FREE(language);
+ }
}
static inline void prepare_key(unsigned flags, char *old_key, size_t old_len, char **new_key, size_t *new_len TSRMLS_DC)
{
if (state->val.str) {
if (0 < (state->val.len = state->input.str - state->val.str)) {
- sanitize_value(opts->flags, state->val.str, state->val.len, *(state->current.val) TSRMLS_CC);
+ sanitize_value(opts->flags, state->val.str, state->val.len, *(state->current.val), state->rfc5987 TSRMLS_CC);
}
+ state->rfc5987 = 0;
} else if (state->arg.str) {
if (0 < (state->arg.len = state->input.str - state->arg.str)) {
zval *val, key;
+ zend_bool rfc5987 = 0;
INIT_PZVAL(&key);
ZVAL_NULL(&key);
- sanitize_key(opts->flags, state->arg.str, state->arg.len, &key TSRMLS_CC);
+ sanitize_key(opts->flags, state->arg.str, state->arg.len, &key, &rfc5987 TSRMLS_CC);
+ state->rfc5987 = rfc5987;
if (Z_TYPE(key) == IS_STRING && Z_STRLEN(key)) {
MAKE_STD_ZVAL(val);
ZVAL_TRUE(val);
} else if (state->param.str) {
if (0 < (state->param.len = state->input.str - state->param.str)) {
zval *prm, *arg, *val, *key;
+ zend_bool rfc5987 = 0;
MAKE_STD_ZVAL(key);
ZVAL_NULL(key);
- sanitize_key(opts->flags, state->param.str, state->param.len, key TSRMLS_CC);
+ sanitize_key(opts->flags, state->param.str, state->param.len, key, &rfc5987 TSRMLS_CC);
+ state->rfc5987 = rfc5987;
if (Z_TYPE_P(key) != IS_STRING) {
merge_param(params, key, &state->current.val, &state->current.args TSRMLS_CC);
} else if (Z_STRLEN_P(key)) {