split utf8 ranges and single chars
authorMichael Wallner <mike@php.net>
Tue, 6 Mar 2018 12:50:04 +0000 (13:50 +0100)
committerMichael Wallner <mike@php.net>
Tue, 6 Mar 2018 12:50:04 +0000 (13:50 +0100)
scripts/gen_utf8.php
src/php_http_url.c
src/php_http_utf8.h

index 865a2f5d5f4e92159e925e76bf4becfa783403e7..43b49e382fdc2c64c4dac5e216ec0453b6f2e7c2 100755 (executable)
@@ -11,6 +11,8 @@ $i18n = $argc >= 2 ? $argv[1] : "/usr/share/i18n/locales/i18n";
 $f = fopen($i18n, "r");
 $c = false;
 $a = false;
+$r = [];
+$n = [];
 
 ob_start(null, 0xffff);
 while (!feof($f)) {
@@ -55,23 +57,14 @@ while (!feof($f)) {
                                        sscanf($sstart, "<U%X>", $start);
                                        break;
                                }
-                               print "\t{";
-                               if ($start >= 0xffff) {
-                                       printf("0x%08X, ", $start);
-                                       if ($end) {
-                                               printf("0x%08X, ", $end);
-                                       } else {
-                                               print("         0, ");
+                               if ($end) {
+                                       if ($step != 1) {
+                                               die("UNEXPECTED step=$step\n");
                                        }
+                                       $r[] = [$start, $end];
                                } else {
-                                       printf("    0x%04X, ", $start);
-                                       if ($end) {
-                                               printf("    0x%04X, ", $end);
-                                       } else {
-                                               print("         0, ");
-                                       }
+                                       $n[] = $start;
                                }
-                               printf("%d},\n", $step);
                        }
                }
                break;
@@ -85,6 +78,29 @@ while (!feof($f)) {
        }
 }
 
+$maxstep = 0;
+printf("static const utf8_range_t utf8_ranges[] = {\n\t{");
+foreach ($r as $i => list($start, $end)) {
+       if ($i) if ($i%3) {
+               printf(", {");
+       } else {
+               printf(",\n\t{");
+       }
+               
+       printf("0x%08X, 0x%08X}", $start, $end);
+}
+printf("\n};\n\n");
+printf("static const unsigned utf8_chars[] = {\n\t");
+foreach ($n as $i => $u) {
+       if ($i) if (($i%6)) {
+               printf(", ");
+       } else {
+               printf(",\n\t");
+       }
+       printf("0x%08X", $u);
+}
+printf("\n};\n");
+
 file_put_contents("php_http_utf8.h",
        preg_replace('/(\/\* BEGIN::UTF8TABLE \*\/\n).*(\n\s*\/\* END::UTF8TABLE \*\/)/s', '$1'. ob_get_contents() .'$2',
                file_get_contents("php_http_utf8.h")));
index 01b556c5fe1cf20845aebd457e344f65f7e75ffb..71ca6e958a52301274b4a6cc9c090d696c0fae72 100644 (file)
@@ -743,7 +743,7 @@ static inline size_t parse_mb(struct parse_state *state, parse_mb_what_t what, c
                        if (what == PARSE_HOSTINFO && (state->flags & PHP_HTTP_URL_PARSE_TOIDN)) {
                                /* idna */
                        } else if (state->flags & PHP_HTTP_URL_PARSE_MBUTF8) {
-#if 0&&PHP_HTTP_HAVE_LIBICU
+#if PHP_HTTP_HAVE_LIBICU
                                if (!u_isalnum(wchar)) {
 #else
                                if (!isualnum(wchar)) {
index 74fd19bb0046a543990f8cc32c5e376b735dbd18..82a58d105d02aaf515131fe9e6ea112b296c2440 100644 (file)
 #ifndef PHP_HTTP_UTF8_H
 #define PHP_HTTP_UTF8_H
 
-typedef struct utf8_range {
-       unsigned int start;
-       unsigned int end;
-       unsigned char step;
-} utf8_range_t;
-
 static const unsigned char utf8_mblen[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -42,706 +36,234 @@ static const unsigned char utf8_mask[] = {
                0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01
 };
 
-static const utf8_range_t utf8_ranges[] = {
+typedef struct utf8_range {
+       const unsigned int start;
+       const unsigned int end;
+} utf8_range_t;
+
 /* BEGIN::UTF8TABLE */
-       {    0x0041,     0x005A, 1},
-       {    0x0061,     0x007A, 1},
-       {    0x00AA,          0, 0},
-       {    0x00B5,          0, 0},
-       {    0x00BA,          0, 0},
-       {    0x00C0,     0x00D6, 1},
-       {    0x00D8,     0x00F6, 1},
-       {    0x00F8,     0x02C1, 1},
-       {    0x02C6,     0x02D1, 1},
-       {    0x02E0,     0x02E4, 1},
-       {    0x02EC,          0, 0},
-       {    0x02EE,          0, 0},
-       {    0x0345,          0, 0},
-       {    0x0370,     0x0374, 1},
-       {    0x0376,     0x0377, 1},
-       {    0x037A,     0x037D, 1},
-       {    0x037F,          0, 0},
-       {    0x0386,          0, 0},
-       {    0x0388,     0x038A, 1},
-       {    0x038C,          0, 0},
-       {    0x038E,     0x03A1, 1},
-       {    0x03A3,     0x03F5, 1},
-       {    0x03F7,     0x0481, 1},
-       {    0x048A,     0x052F, 1},
-       {    0x0531,     0x0556, 1},
-       {    0x0559,          0, 0},
-       {    0x0561,     0x0587, 1},
-       {    0x05B0,     0x05BD, 1},
-       {    0x05BF,          0, 0},
-       {    0x05C1,     0x05C2, 1},
-       {    0x05C4,     0x05C5, 1},
-       {    0x05C7,          0, 0},
-       {    0x05D0,     0x05EA, 1},
-       {    0x05F0,     0x05F2, 1},
-       {    0x0610,     0x061A, 1},
-       {    0x0620,     0x0657, 1},
-       {    0x0659,     0x0669, 1},
-       {    0x066E,     0x06D3, 1},
-       {    0x06D5,     0x06DC, 1},
-       {    0x06E1,     0x06E8, 1},
-       {    0x06ED,     0x06FC, 1},
-       {    0x06FF,          0, 0},
-       {    0x0710,     0x073F, 1},
-       {    0x074D,     0x07B1, 1},
-       {    0x07C0,     0x07EA, 1},
-       {    0x07F4,     0x07F5, 1},
-       {    0x07FA,          0, 0},
-       {    0x0800,     0x0817, 1},
-       {    0x081A,     0x082C, 1},
-       {    0x0840,     0x0858, 1},
-       {    0x0860,     0x086A, 1},
-       {    0x08A0,     0x08B4, 1},
-       {    0x08B6,     0x08BD, 1},
-       {    0x08D4,     0x08DF, 1},
-       {    0x08E3,     0x08E9, 1},
-       {    0x08F0,     0x093B, 1},
-       {    0x093D,     0x094C, 1},
-       {    0x094E,     0x0950, 1},
-       {    0x0955,     0x0963, 1},
-       {    0x0966,     0x096F, 1},
-       {    0x0971,     0x0983, 1},
-       {    0x0985,     0x098C, 1},
-       {    0x098F,     0x0990, 1},
-       {    0x0993,     0x09A8, 1},
-       {    0x09AA,     0x09B0, 1},
-       {    0x09B2,          0, 0},
-       {    0x09B6,     0x09B9, 1},
-       {    0x09BD,     0x09C4, 1},
-       {    0x09C7,     0x09C8, 1},
-       {    0x09CB,     0x09CC, 1},
-       {    0x09CE,          0, 0},
-       {    0x09D7,          0, 0},
-       {    0x09DC,     0x09DD, 1},
-       {    0x09DF,     0x09E3, 1},
-       {    0x09E6,     0x09F1, 1},
-       {    0x09FC,          0, 0},
-       {    0x0A01,     0x0A03, 1},
-       {    0x0A05,     0x0A0A, 1},
-       {    0x0A0F,     0x0A10, 1},
-       {    0x0A13,     0x0A28, 1},
-       {    0x0A2A,     0x0A30, 1},
-       {    0x0A32,     0x0A33, 1},
-       {    0x0A35,     0x0A36, 1},
-       {    0x0A38,     0x0A39, 1},
-       {    0x0A3E,     0x0A42, 1},
-       {    0x0A47,     0x0A48, 1},
-       {    0x0A4B,     0x0A4C, 1},
-       {    0x0A51,          0, 0},
-       {    0x0A59,     0x0A5C, 1},
-       {    0x0A5E,          0, 0},
-       {    0x0A66,     0x0A75, 1},
-       {    0x0A81,     0x0A83, 1},
-       {    0x0A85,     0x0A8D, 1},
-       {    0x0A8F,     0x0A91, 1},
-       {    0x0A93,     0x0AA8, 1},
-       {    0x0AAA,     0x0AB0, 1},
-       {    0x0AB2,     0x0AB3, 1},
-       {    0x0AB5,     0x0AB9, 1},
-       {    0x0ABD,     0x0AC5, 1},
-       {    0x0AC7,     0x0AC9, 1},
-       {    0x0ACB,     0x0ACC, 1},
-       {    0x0AD0,          0, 0},
-       {    0x0AE0,     0x0AE3, 1},
-       {    0x0AE6,     0x0AEF, 1},
-       {    0x0AF9,     0x0AFC, 1},
-       {    0x0B01,     0x0B03, 1},
-       {    0x0B05,     0x0B0C, 1},
-       {    0x0B0F,     0x0B10, 1},
-       {    0x0B13,     0x0B28, 1},
-       {    0x0B2A,     0x0B30, 1},
-       {    0x0B32,     0x0B33, 1},
-       {    0x0B35,     0x0B39, 1},
-       {    0x0B3D,     0x0B44, 1},
-       {    0x0B47,     0x0B48, 1},
-       {    0x0B4B,     0x0B4C, 1},
-       {    0x0B56,     0x0B57, 1},
-       {    0x0B5C,     0x0B5D, 1},
-       {    0x0B5F,     0x0B63, 1},
-       {    0x0B66,     0x0B6F, 1},
-       {    0x0B71,          0, 0},
-       {    0x0B82,     0x0B83, 1},
-       {    0x0B85,     0x0B8A, 1},
-       {    0x0B8E,     0x0B90, 1},
-       {    0x0B92,     0x0B95, 1},
-       {    0x0B99,     0x0B9A, 1},
-       {    0x0B9C,          0, 0},
-       {    0x0B9E,     0x0B9F, 1},
-       {    0x0BA3,     0x0BA4, 1},
-       {    0x0BA8,     0x0BAA, 1},
-       {    0x0BAE,     0x0BB9, 1},
-       {    0x0BBE,     0x0BC2, 1},
-       {    0x0BC6,     0x0BC8, 1},
-       {    0x0BCA,     0x0BCC, 1},
-       {    0x0BD0,          0, 0},
-       {    0x0BD7,          0, 0},
-       {    0x0BE6,     0x0BEF, 1},
-       {    0x0C00,     0x0C03, 1},
-       {    0x0C05,     0x0C0C, 1},
-       {    0x0C0E,     0x0C10, 1},
-       {    0x0C12,     0x0C28, 1},
-       {    0x0C2A,     0x0C39, 1},
-       {    0x0C3D,     0x0C44, 1},
-       {    0x0C46,     0x0C48, 1},
-       {    0x0C4A,     0x0C4C, 1},
-       {    0x0C55,     0x0C56, 1},
-       {    0x0C58,     0x0C5A, 1},
-       {    0x0C60,     0x0C63, 1},
-       {    0x0C66,     0x0C6F, 1},
-       {    0x0C80,     0x0C83, 1},
-       {    0x0C85,     0x0C8C, 1},
-       {    0x0C8E,     0x0C90, 1},
-       {    0x0C92,     0x0CA8, 1},
-       {    0x0CAA,     0x0CB3, 1},
-       {    0x0CB5,     0x0CB9, 1},
-       {    0x0CBD,     0x0CC4, 1},
-       {    0x0CC6,     0x0CC8, 1},
-       {    0x0CCA,     0x0CCC, 1},
-       {    0x0CD5,     0x0CD6, 1},
-       {    0x0CDE,          0, 0},
-       {    0x0CE0,     0x0CE3, 1},
-       {    0x0CE6,     0x0CEF, 1},
-       {    0x0CF1,     0x0CF2, 1},
-       {    0x0D00,     0x0D03, 1},
-       {    0x0D05,     0x0D0C, 1},
-       {    0x0D0E,     0x0D10, 1},
-       {    0x0D12,     0x0D3A, 1},
-       {    0x0D3D,     0x0D44, 1},
-       {    0x0D46,     0x0D48, 1},
-       {    0x0D4A,     0x0D4C, 1},
-       {    0x0D4E,          0, 0},
-       {    0x0D54,     0x0D57, 1},
-       {    0x0D5F,     0x0D63, 1},
-       {    0x0D66,     0x0D6F, 1},
-       {    0x0D7A,     0x0D7F, 1},
-       {    0x0D82,     0x0D83, 1},
-       {    0x0D85,     0x0D96, 1},
-       {    0x0D9A,     0x0DB1, 1},
-       {    0x0DB3,     0x0DBB, 1},
-       {    0x0DBD,          0, 0},
-       {    0x0DC0,     0x0DC6, 1},
-       {    0x0DCF,     0x0DD4, 1},
-       {    0x0DD6,          0, 0},
-       {    0x0DD8,     0x0DDF, 1},
-       {    0x0DE6,     0x0DEF, 1},
-       {    0x0DF2,     0x0DF3, 1},
-       {    0x0E01,     0x0E3A, 1},
-       {    0x0E40,     0x0E46, 1},
-       {    0x0E4D,          0, 0},
-       {    0x0E50,     0x0E59, 1},
-       {    0x0E81,     0x0E82, 1},
-       {    0x0E84,          0, 0},
-       {    0x0E87,     0x0E88, 1},
-       {    0x0E8A,          0, 0},
-       {    0x0E8D,          0, 0},
-       {    0x0E94,     0x0E97, 1},
-       {    0x0E99,     0x0E9F, 1},
-       {    0x0EA1,     0x0EA3, 1},
-       {    0x0EA5,          0, 0},
-       {    0x0EA7,          0, 0},
-       {    0x0EAA,     0x0EAB, 1},
-       {    0x0EAD,     0x0EB9, 1},
-       {    0x0EBB,     0x0EBD, 1},
-       {    0x0EC0,     0x0EC4, 1},
-       {    0x0EC6,          0, 0},
-       {    0x0ECD,          0, 0},
-       {    0x0ED0,     0x0ED9, 1},
-       {    0x0EDC,     0x0EDF, 1},
-       {    0x0F00,          0, 0},
-       {    0x0F20,     0x0F29, 1},
-       {    0x0F40,     0x0F47, 1},
-       {    0x0F49,     0x0F6C, 1},
-       {    0x0F71,     0x0F81, 1},
-       {    0x0F88,     0x0F97, 1},
-       {    0x0F99,     0x0FBC, 1},
-       {    0x1000,     0x1036, 1},
-       {    0x1038,          0, 0},
-       {    0x103B,     0x1049, 1},
-       {    0x1050,     0x1062, 1},
-       {    0x1065,     0x1068, 1},
-       {    0x106E,     0x1086, 1},
-       {    0x108E,          0, 0},
-       {    0x1090,     0x1099, 1},
-       {    0x109C,     0x109D, 1},
-       {    0x10A0,     0x10C5, 1},
-       {    0x10C7,          0, 0},
-       {    0x10CD,          0, 0},
-       {    0x10D0,     0x10FA, 1},
-       {    0x10FC,     0x1248, 1},
-       {    0x124A,     0x124D, 1},
-       {    0x1250,     0x1256, 1},
-       {    0x1258,          0, 0},
-       {    0x125A,     0x125D, 1},
-       {    0x1260,     0x1288, 1},
-       {    0x128A,     0x128D, 1},
-       {    0x1290,     0x12B0, 1},
-       {    0x12B2,     0x12B5, 1},
-       {    0x12B8,     0x12BE, 1},
-       {    0x12C0,          0, 0},
-       {    0x12C2,     0x12C5, 1},
-       {    0x12C8,     0x12D6, 1},
-       {    0x12D8,     0x1310, 1},
-       {    0x1312,     0x1315, 1},
-       {    0x1318,     0x135A, 1},
-       {    0x135F,          0, 0},
-       {    0x1380,     0x138F, 1},
-       {    0x13A0,     0x13F5, 1},
-       {    0x13F8,     0x13FD, 1},
-       {    0x1401,     0x166C, 1},
-       {    0x166F,     0x167F, 1},
-       {    0x1681,     0x169A, 1},
-       {    0x16A0,     0x16EA, 1},
-       {    0x16EE,     0x16F8, 1},
-       {    0x1700,     0x170C, 1},
-       {    0x170E,     0x1713, 1},
-       {    0x1720,     0x1733, 1},
-       {    0x1740,     0x1753, 1},
-       {    0x1760,     0x176C, 1},
-       {    0x176E,     0x1770, 1},
-       {    0x1772,     0x1773, 1},
-       {    0x1780,     0x17B3, 1},
-       {    0x17B6,     0x17C8, 1},
-       {    0x17D7,          0, 0},
-       {    0x17DC,          0, 0},
-       {    0x17E0,     0x17E9, 1},
-       {    0x1810,     0x1819, 1},
-       {    0x1820,     0x1877, 1},
-       {    0x1880,     0x18AA, 1},
-       {    0x18B0,     0x18F5, 1},
-       {    0x1900,     0x191E, 1},
-       {    0x1920,     0x192B, 1},
-       {    0x1930,     0x1938, 1},
-       {    0x1946,     0x196D, 1},
-       {    0x1970,     0x1974, 1},
-       {    0x1980,     0x19AB, 1},
-       {    0x19B0,     0x19C9, 1},
-       {    0x19D0,     0x19D9, 1},
-       {    0x1A00,     0x1A1B, 1},
-       {    0x1A20,     0x1A5E, 1},
-       {    0x1A61,     0x1A74, 1},
-       {    0x1A80,     0x1A89, 1},
-       {    0x1A90,     0x1A99, 1},
-       {    0x1AA7,          0, 0},
-       {    0x1B00,     0x1B33, 1},
-       {    0x1B35,     0x1B43, 1},
-       {    0x1B45,     0x1B4B, 1},
-       {    0x1B50,     0x1B59, 1},
-       {    0x1B80,     0x1BA9, 1},
-       {    0x1BAC,     0x1BE5, 1},
-       {    0x1BE7,     0x1BF1, 1},
-       {    0x1C00,     0x1C35, 1},
-       {    0x1C40,     0x1C49, 1},
-       {    0x1C4D,     0x1C7D, 1},
-       {    0x1C80,     0x1C88, 1},
-       {    0x1CE9,     0x1CEC, 1},
-       {    0x1CEE,     0x1CF3, 1},
-       {    0x1CF5,     0x1CF6, 1},
-       {    0x1D00,     0x1DBF, 1},
-       {    0x1DE7,     0x1DF4, 1},
-       {    0x1E00,     0x1F15, 1},
-       {    0x1F18,     0x1F1D, 1},
-       {    0x1F20,     0x1F45, 1},
-       {    0x1F48,     0x1F4D, 1},
-       {    0x1F50,     0x1F57, 1},
-       {    0x1F59,          0, 0},
-       {    0x1F5B,          0, 0},
-       {    0x1F5D,          0, 0},
-       {    0x1F5F,     0x1F7D, 1},
-       {    0x1F80,     0x1FB4, 1},
-       {    0x1FB6,     0x1FBC, 1},
-       {    0x1FBE,          0, 0},
-       {    0x1FC2,     0x1FC4, 1},
-       {    0x1FC6,     0x1FCC, 1},
-       {    0x1FD0,     0x1FD3, 1},
-       {    0x1FD6,     0x1FDB, 1},
-       {    0x1FE0,     0x1FEC, 1},
-       {    0x1FF2,     0x1FF4, 1},
-       {    0x1FF6,     0x1FFC, 1},
-       {    0x2071,          0, 0},
-       {    0x207F,          0, 0},
-       {    0x2090,     0x209C, 1},
-       {    0x2102,          0, 0},
-       {    0x2107,          0, 0},
-       {    0x210A,     0x2113, 1},
-       {    0x2115,          0, 0},
-       {    0x2119,     0x211D, 1},
-       {    0x2124,          0, 0},
-       {    0x2126,          0, 0},
-       {    0x2128,          0, 0},
-       {    0x212A,     0x212D, 1},
-       {    0x212F,     0x2139, 1},
-       {    0x213C,     0x213F, 1},
-       {    0x2145,     0x2149, 1},
-       {    0x214E,          0, 0},
-       {    0x2160,     0x2188, 1},
-       {    0x24B6,     0x24E9, 1},
-       {    0x2C00,     0x2C2E, 1},
-       {    0x2C30,     0x2C5E, 1},
-       {    0x2C60,     0x2CE4, 1},
-       {    0x2CEB,     0x2CEE, 1},
-       {    0x2CF2,     0x2CF3, 1},
-       {    0x2D00,     0x2D25, 1},
-       {    0x2D27,          0, 0},
-       {    0x2D2D,          0, 0},
-       {    0x2D30,     0x2D67, 1},
-       {    0x2D6F,          0, 0},
-       {    0x2D80,     0x2D96, 1},
-       {    0x2DA0,     0x2DA6, 1},
-       {    0x2DA8,     0x2DAE, 1},
-       {    0x2DB0,     0x2DB6, 1},
-       {    0x2DB8,     0x2DBE, 1},
-       {    0x2DC0,     0x2DC6, 1},
-       {    0x2DC8,     0x2DCE, 1},
-       {    0x2DD0,     0x2DD6, 1},
-       {    0x2DD8,     0x2DDE, 1},
-       {    0x2DE0,     0x2DFF, 1},
-       {    0x2E2F,          0, 0},
-       {    0x3005,     0x3007, 1},
-       {    0x3021,     0x3029, 1},
-       {    0x3031,     0x3035, 1},
-       {    0x3038,     0x303C, 1},
-       {    0x3041,     0x3096, 1},
-       {    0x309D,     0x309F, 1},
-       {    0x30A1,     0x30FA, 1},
-       {    0x30FC,     0x30FF, 1},
-       {    0x3105,     0x312E, 1},
-       {    0x3131,     0x318E, 1},
-       {    0x31A0,     0x31BA, 1},
-       {    0x31F0,     0x31FF, 1},
-       {    0x3400,     0x4DB5, 1},
-       {    0x4E00,     0x9FEA, 1},
-       {    0xA000,     0xA48C, 1},
-       {    0xA4D0,     0xA4FD, 1},
-       {    0xA500,     0xA60C, 1},
-       {    0xA610,     0xA62B, 1},
-       {    0xA640,     0xA66E, 1},
-       {    0xA674,     0xA67B, 1},
-       {    0xA67F,     0xA6EF, 1},
-       {    0xA717,     0xA71F, 1},
-       {    0xA722,     0xA788, 1},
-       {    0xA78B,     0xA7AE, 1},
-       {    0xA7B0,     0xA7B7, 1},
-       {    0xA7F7,     0xA801, 1},
-       {    0xA803,     0xA805, 1},
-       {    0xA807,     0xA80A, 1},
-       {    0xA80C,     0xA827, 1},
-       {    0xA840,     0xA873, 1},
-       {    0xA880,     0xA8C3, 1},
-       {    0xA8C5,          0, 0},
-       {    0xA8D0,     0xA8D9, 1},
-       {    0xA8F2,     0xA8F7, 1},
-       {    0xA8FB,          0, 0},
-       {    0xA8FD,          0, 0},
-       {    0xA900,     0xA92A, 1},
-       {    0xA930,     0xA952, 1},
-       {    0xA960,     0xA97C, 1},
-       {    0xA980,     0xA9B2, 1},
-       {    0xA9B4,     0xA9BF, 1},
-       {    0xA9CF,     0xA9D9, 1},
-       {    0xA9E0,     0xA9E4, 1},
-       {    0xA9E6,     0xA9FE, 1},
-       {    0xAA00,     0xAA36, 1},
-       {    0xAA40,     0xAA4D, 1},
-       {    0xAA50,     0xAA59, 1},
-       {    0xAA60,     0xAA76, 1},
-       {    0xAA7A,          0, 0},
-       {    0xAA7E,     0xAABE, 1},
-       {    0xAAC0,          0, 0},
-       {    0xAAC2,          0, 0},
-       {    0xAADB,     0xAADD, 1},
-       {    0xAAE0,     0xAAEF, 1},
-       {    0xAAF2,     0xAAF5, 1},
-       {    0xAB01,     0xAB06, 1},
-       {    0xAB09,     0xAB0E, 1},
-       {    0xAB11,     0xAB16, 1},
-       {    0xAB20,     0xAB26, 1},
-       {    0xAB28,     0xAB2E, 1},
-       {    0xAB30,     0xAB5A, 1},
-       {    0xAB5C,     0xAB65, 1},
-       {    0xAB70,     0xABEA, 1},
-       {    0xABF0,     0xABF9, 1},
-       {    0xAC00,     0xD7A3, 1},
-       {    0xD7B0,     0xD7C6, 1},
-       {    0xD7CB,     0xD7FB, 1},
-       {    0xF900,     0xFA6D, 1},
-       {    0xFA70,     0xFAD9, 1},
-       {    0xFB00,     0xFB06, 1},
-       {    0xFB13,     0xFB17, 1},
-       {    0xFB1D,     0xFB28, 1},
-       {    0xFB2A,     0xFB36, 1},
-       {    0xFB38,     0xFB3C, 1},
-       {    0xFB3E,          0, 0},
-       {    0xFB40,     0xFB41, 1},
-       {    0xFB43,     0xFB44, 1},
-       {    0xFB46,     0xFBB1, 1},
-       {    0xFBD3,     0xFD3D, 1},
-       {    0xFD50,     0xFD8F, 1},
-       {    0xFD92,     0xFDC7, 1},
-       {    0xFDF0,     0xFDFB, 1},
-       {    0xFE70,     0xFE74, 1},
-       {    0xFE76,     0xFEFC, 1},
-       {    0xFF10,     0xFF19, 1},
-       {    0xFF21,     0xFF3A, 1},
-       {    0xFF41,     0xFF5A, 1},
-       {    0xFF66,     0xFFBE, 1},
-       {    0xFFC2,     0xFFC7, 1},
-       {    0xFFCA,     0xFFCF, 1},
-       {    0xFFD2,     0xFFD7, 1},
-       {    0xFFDA,     0xFFDC, 1},
-       {0x00010000, 0x0001000B, 1},
-       {0x0001000D, 0x00010026, 1},
-       {0x00010028, 0x0001003A, 1},
-       {0x0001003C, 0x0001003D, 1},
-       {0x0001003F, 0x0001004D, 1},
-       {0x00010050, 0x0001005D, 1},
-       {0x00010080, 0x000100FA, 1},
-       {0x00010140, 0x00010174, 1},
-       {0x00010280, 0x0001029C, 1},
-       {0x000102A0, 0x000102D0, 1},
-       {0x00010300, 0x0001031F, 1},
-       {0x0001032D, 0x0001034A, 1},
-       {0x00010350, 0x0001037A, 1},
-       {0x00010380, 0x0001039D, 1},
-       {0x000103A0, 0x000103C3, 1},
-       {0x000103C8, 0x000103CF, 1},
-       {0x000103D1, 0x000103D5, 1},
-       {0x00010400, 0x0001049D, 1},
-       {0x000104A0, 0x000104A9, 1},
-       {0x000104B0, 0x000104D3, 1},
-       {0x000104D8, 0x000104FB, 1},
-       {0x00010500, 0x00010527, 1},
-       {0x00010530, 0x00010563, 1},
-       {0x00010600, 0x00010736, 1},
-       {0x00010740, 0x00010755, 1},
-       {0x00010760, 0x00010767, 1},
-       {0x00010800, 0x00010805, 1},
-       {0x00010808,          0, 0},
-       {0x0001080A, 0x00010835, 1},
-       {0x00010837, 0x00010838, 1},
-       {0x0001083C,          0, 0},
-       {0x0001083F, 0x00010855, 1},
-       {0x00010860, 0x00010876, 1},
-       {0x00010880, 0x0001089E, 1},
-       {0x000108E0, 0x000108F2, 1},
-       {0x000108F4, 0x000108F5, 1},
-       {0x00010900, 0x00010915, 1},
-       {0x00010920, 0x00010939, 1},
-       {0x00010980, 0x000109B7, 1},
-       {0x000109BE, 0x000109BF, 1},
-       {0x00010A00, 0x00010A03, 1},
-       {0x00010A05, 0x00010A06, 1},
-       {0x00010A0C, 0x00010A13, 1},
-       {0x00010A15, 0x00010A17, 1},
-       {0x00010A19, 0x00010A33, 1},
-       {0x00010A60, 0x00010A7C, 1},
-       {0x00010A80, 0x00010A9C, 1},
-       {0x00010AC0, 0x00010AC7, 1},
-       {0x00010AC9, 0x00010AE4, 1},
-       {0x00010B00, 0x00010B35, 1},
-       {0x00010B40, 0x00010B55, 1},
-       {0x00010B60, 0x00010B72, 1},
-       {0x00010B80, 0x00010B91, 1},
-       {0x00010C00, 0x00010C48, 1},
-       {0x00010C80, 0x00010CB2, 1},
-       {0x00010CC0, 0x00010CF2, 1},
-       {0x00011000, 0x00011045, 1},
-       {0x00011066, 0x0001106F, 1},
-       {0x00011082, 0x000110B8, 1},
-       {0x000110D0, 0x000110E8, 1},
-       {0x000110F0, 0x000110F9, 1},
-       {0x00011100, 0x00011132, 1},
-       {0x00011136, 0x0001113F, 1},
-       {0x00011150, 0x00011172, 1},
-       {0x00011176,          0, 0},
-       {0x00011180, 0x000111BF, 1},
-       {0x000111C1, 0x000111C4, 1},
-       {0x000111D0, 0x000111DA, 1},
-       {0x000111DC,          0, 0},
-       {0x00011200, 0x00011211, 1},
-       {0x00011213, 0x00011234, 1},
-       {0x00011237,          0, 0},
-       {0x0001123E,          0, 0},
-       {0x00011280, 0x00011286, 1},
-       {0x00011288,          0, 0},
-       {0x0001128A, 0x0001128D, 1},
-       {0x0001128F, 0x0001129D, 1},
-       {0x0001129F, 0x000112A8, 1},
-       {0x000112B0, 0x000112E8, 1},
-       {0x000112F0, 0x000112F9, 1},
-       {0x00011300, 0x00011303, 1},
-       {0x00011305, 0x0001130C, 1},
-       {0x0001130F, 0x00011310, 1},
-       {0x00011313, 0x00011328, 1},
-       {0x0001132A, 0x00011330, 1},
-       {0x00011332, 0x00011333, 1},
-       {0x00011335, 0x00011339, 1},
-       {0x0001133D, 0x00011344, 1},
-       {0x00011347, 0x00011348, 1},
-       {0x0001134B, 0x0001134C, 1},
-       {0x00011350,          0, 0},
-       {0x00011357,          0, 0},
-       {0x0001135D, 0x00011363, 1},
-       {0x00011400, 0x00011441, 1},
-       {0x00011443, 0x00011445, 1},
-       {0x00011447, 0x0001144A, 1},
-       {0x00011450, 0x00011459, 1},
-       {0x00011480, 0x000114C1, 1},
-       {0x000114C4, 0x000114C5, 1},
-       {0x000114C7,          0, 0},
-       {0x000114D0, 0x000114D9, 1},
-       {0x00011580, 0x000115B5, 1},
-       {0x000115B8, 0x000115BE, 1},
-       {0x000115D8, 0x000115DD, 1},
-       {0x00011600, 0x0001163E, 1},
-       {0x00011640,          0, 0},
-       {0x00011644,          0, 0},
-       {0x00011650, 0x00011659, 1},
-       {0x00011680, 0x000116B5, 1},
-       {0x000116C0, 0x000116C9, 1},
-       {0x00011700, 0x00011719, 1},
-       {0x0001171D, 0x0001172A, 1},
-       {0x00011730, 0x00011739, 1},
-       {0x000118A0, 0x000118E9, 1},
-       {0x000118FF,          0, 0},
-       {0x00011A00, 0x00011A32, 1},
-       {0x00011A35, 0x00011A3E, 1},
-       {0x00011A50, 0x00011A83, 1},
-       {0x00011A86, 0x00011A97, 1},
-       {0x00011AC0, 0x00011AF8, 1},
-       {0x00011C00, 0x00011C08, 1},
-       {0x00011C0A, 0x00011C36, 1},
-       {0x00011C38, 0x00011C3E, 1},
-       {0x00011C40,          0, 0},
-       {0x00011C50, 0x00011C59, 1},
-       {0x00011C72, 0x00011C8F, 1},
-       {0x00011C92, 0x00011CA7, 1},
-       {0x00011CA9, 0x00011CB6, 1},
-       {0x00011D00, 0x00011D06, 1},
-       {0x00011D08, 0x00011D09, 1},
-       {0x00011D0B, 0x00011D36, 1},
-       {0x00011D3A,          0, 0},
-       {0x00011D3C, 0x00011D3D, 1},
-       {0x00011D3F, 0x00011D41, 1},
-       {0x00011D43,          0, 0},
-       {0x00011D46, 0x00011D47, 1},
-       {0x00011D50, 0x00011D59, 1},
-       {0x00012000, 0x00012399, 1},
-       {0x00012400, 0x0001246E, 1},
-       {0x00012480, 0x00012543, 1},
-       {0x00013000, 0x0001342E, 1},
-       {0x00014400, 0x00014646, 1},
-       {0x00016800, 0x00016A38, 1},
-       {0x00016A40, 0x00016A5E, 1},
-       {0x00016A60, 0x00016A69, 1},
-       {0x00016AD0, 0x00016AED, 1},
-       {0x00016B00, 0x00016B36, 1},
-       {0x00016B40, 0x00016B43, 1},
-       {0x00016B50, 0x00016B59, 1},
-       {0x00016B63, 0x00016B77, 1},
-       {0x00016B7D, 0x00016B8F, 1},
-       {0x00016F00, 0x00016F44, 1},
-       {0x00016F50, 0x00016F7E, 1},
-       {0x00016F93, 0x00016F9F, 1},
-       {0x00016FE0, 0x00016FE1, 1},
-       {0x00017000, 0x000187EC, 1},
-       {0x00018800, 0x00018AF2, 1},
-       {0x0001B000, 0x0001B11E, 1},
-       {0x0001B170, 0x0001B2FB, 1},
-       {0x0001BC00, 0x0001BC6A, 1},
-       {0x0001BC70, 0x0001BC7C, 1},
-       {0x0001BC80, 0x0001BC88, 1},
-       {0x0001BC90, 0x0001BC99, 1},
-       {0x0001BC9E,          0, 0},
-       {0x0001D400, 0x0001D454, 1},
-       {0x0001D456, 0x0001D49C, 1},
-       {0x0001D49E, 0x0001D49F, 1},
-       {0x0001D4A2,          0, 0},
-       {0x0001D4A5, 0x0001D4A6, 1},
-       {0x0001D4A9, 0x0001D4AC, 1},
-       {0x0001D4AE, 0x0001D4B9, 1},
-       {0x0001D4BB,          0, 0},
-       {0x0001D4BD, 0x0001D4C3, 1},
-       {0x0001D4C5, 0x0001D505, 1},
-       {0x0001D507, 0x0001D50A, 1},
-       {0x0001D50D, 0x0001D514, 1},
-       {0x0001D516, 0x0001D51C, 1},
-       {0x0001D51E, 0x0001D539, 1},
-       {0x0001D53B, 0x0001D53E, 1},
-       {0x0001D540, 0x0001D544, 1},
-       {0x0001D546,          0, 0},
-       {0x0001D54A, 0x0001D550, 1},
-       {0x0001D552, 0x0001D6A5, 1},
-       {0x0001D6A8, 0x0001D6C0, 1},
-       {0x0001D6C2, 0x0001D6DA, 1},
-       {0x0001D6DC, 0x0001D6FA, 1},
-       {0x0001D6FC, 0x0001D714, 1},
-       {0x0001D716, 0x0001D734, 1},
-       {0x0001D736, 0x0001D74E, 1},
-       {0x0001D750, 0x0001D76E, 1},
-       {0x0001D770, 0x0001D788, 1},
-       {0x0001D78A, 0x0001D7A8, 1},
-       {0x0001D7AA, 0x0001D7C2, 1},
-       {0x0001D7C4, 0x0001D7CB, 1},
-       {0x0001D7CE, 0x0001D7FF, 1},
-       {0x0001E000, 0x0001E006, 1},
-       {0x0001E008, 0x0001E018, 1},
-       {0x0001E01B, 0x0001E021, 1},
-       {0x0001E023, 0x0001E024, 1},
-       {0x0001E026, 0x0001E02A, 1},
-       {0x0001E800, 0x0001E8C4, 1},
-       {0x0001E900, 0x0001E943, 1},
-       {0x0001E947,          0, 0},
-       {0x0001E950, 0x0001E959, 1},
-       {0x0001EE00, 0x0001EE03, 1},
-       {0x0001EE05, 0x0001EE1F, 1},
-       {0x0001EE21, 0x0001EE22, 1},
-       {0x0001EE24,          0, 0},
-       {0x0001EE27,          0, 0},
-       {0x0001EE29, 0x0001EE32, 1},
-       {0x0001EE34, 0x0001EE37, 1},
-       {0x0001EE39,          0, 0},
-       {0x0001EE3B,          0, 0},
-       {0x0001EE42,          0, 0},
-       {0x0001EE47,          0, 0},
-       {0x0001EE49,          0, 0},
-       {0x0001EE4B,          0, 0},
-       {0x0001EE4D, 0x0001EE4F, 1},
-       {0x0001EE51, 0x0001EE52, 1},
-       {0x0001EE54,          0, 0},
-       {0x0001EE57,          0, 0},
-       {0x0001EE59,          0, 0},
-       {0x0001EE5B,          0, 0},
-       {0x0001EE5D,          0, 0},
-       {0x0001EE5F,          0, 0},
-       {0x0001EE61, 0x0001EE62, 1},
-       {0x0001EE64,          0, 0},
-       {0x0001EE67, 0x0001EE6A, 1},
-       {0x0001EE6C, 0x0001EE72, 1},
-       {0x0001EE74, 0x0001EE77, 1},
-       {0x0001EE79, 0x0001EE7C, 1},
-       {0x0001EE7E,          0, 0},
-       {0x0001EE80, 0x0001EE89, 1},
-       {0x0001EE8B, 0x0001EE9B, 1},
-       {0x0001EEA1, 0x0001EEA3, 1},
-       {0x0001EEA5, 0x0001EEA9, 1},
-       {0x0001EEAB, 0x0001EEBB, 1},
-       {0x0001F130, 0x0001F149, 1},
-       {0x0001F150, 0x0001F169, 1},
-       {0x0001F170, 0x0001F189, 1},
-       {0x00020000, 0x0002A6D6, 1},
-       {0x0002A700, 0x0002B734, 1},
-       {0x0002B740, 0x0002B81D, 1},
-       {0x0002B820, 0x0002CEA1, 1},
-       {0x0002CEB0, 0x0002EBE0, 1},
-       {0x0002F800, 0x0002FA1D, 1},
+static const utf8_range_t utf8_ranges[] = {
+       {0x00000041, 0x0000005A}, {0x00000061, 0x0000007A}, {0x000000C0, 0x000000D6},
+       {0x000000D8, 0x000000F6}, {0x000000F8, 0x000002C1}, {0x000002C6, 0x000002D1},
+       {0x000002E0, 0x000002E4}, {0x00000370, 0x00000374}, {0x00000376, 0x00000377},
+       {0x0000037A, 0x0000037D}, {0x00000388, 0x0000038A}, {0x0000038E, 0x000003A1},
+       {0x000003A3, 0x000003F5}, {0x000003F7, 0x00000481}, {0x0000048A, 0x0000052F},
+       {0x00000531, 0x00000556}, {0x00000561, 0x00000587}, {0x000005B0, 0x000005BD},
+       {0x000005C1, 0x000005C2}, {0x000005C4, 0x000005C5}, {0x000005D0, 0x000005EA},
+       {0x000005F0, 0x000005F2}, {0x00000610, 0x0000061A}, {0x00000620, 0x00000657},
+       {0x00000659, 0x00000669}, {0x0000066E, 0x000006D3}, {0x000006D5, 0x000006DC},
+       {0x000006E1, 0x000006E8}, {0x000006ED, 0x000006FC}, {0x00000710, 0x0000073F},
+       {0x0000074D, 0x000007B1}, {0x000007C0, 0x000007EA}, {0x000007F4, 0x000007F5},
+       {0x00000800, 0x00000817}, {0x0000081A, 0x0000082C}, {0x00000840, 0x00000858},
+       {0x00000860, 0x0000086A}, {0x000008A0, 0x000008B4}, {0x000008B6, 0x000008BD},
+       {0x000008D4, 0x000008DF}, {0x000008E3, 0x000008E9}, {0x000008F0, 0x0000093B},
+       {0x0000093D, 0x0000094C}, {0x0000094E, 0x00000950}, {0x00000955, 0x00000963},
+       {0x00000966, 0x0000096F}, {0x00000971, 0x00000983}, {0x00000985, 0x0000098C},
+       {0x0000098F, 0x00000990}, {0x00000993, 0x000009A8}, {0x000009AA, 0x000009B0},
+       {0x000009B6, 0x000009B9}, {0x000009BD, 0x000009C4}, {0x000009C7, 0x000009C8},
+       {0x000009CB, 0x000009CC}, {0x000009DC, 0x000009DD}, {0x000009DF, 0x000009E3},
+       {0x000009E6, 0x000009F1}, {0x00000A01, 0x00000A03}, {0x00000A05, 0x00000A0A},
+       {0x00000A0F, 0x00000A10}, {0x00000A13, 0x00000A28}, {0x00000A2A, 0x00000A30},
+       {0x00000A32, 0x00000A33}, {0x00000A35, 0x00000A36}, {0x00000A38, 0x00000A39},
+       {0x00000A3E, 0x00000A42}, {0x00000A47, 0x00000A48}, {0x00000A4B, 0x00000A4C},
+       {0x00000A59, 0x00000A5C}, {0x00000A66, 0x00000A75}, {0x00000A81, 0x00000A83},
+       {0x00000A85, 0x00000A8D}, {0x00000A8F, 0x00000A91}, {0x00000A93, 0x00000AA8},
+       {0x00000AAA, 0x00000AB0}, {0x00000AB2, 0x00000AB3}, {0x00000AB5, 0x00000AB9},
+       {0x00000ABD, 0x00000AC5}, {0x00000AC7, 0x00000AC9}, {0x00000ACB, 0x00000ACC},
+       {0x00000AE0, 0x00000AE3}, {0x00000AE6, 0x00000AEF}, {0x00000AF9, 0x00000AFC},
+       {0x00000B01, 0x00000B03}, {0x00000B05, 0x00000B0C}, {0x00000B0F, 0x00000B10},
+       {0x00000B13, 0x00000B28}, {0x00000B2A, 0x00000B30}, {0x00000B32, 0x00000B33},
+       {0x00000B35, 0x00000B39}, {0x00000B3D, 0x00000B44}, {0x00000B47, 0x00000B48},
+       {0x00000B4B, 0x00000B4C}, {0x00000B56, 0x00000B57}, {0x00000B5C, 0x00000B5D},
+       {0x00000B5F, 0x00000B63}, {0x00000B66, 0x00000B6F}, {0x00000B82, 0x00000B83},
+       {0x00000B85, 0x00000B8A}, {0x00000B8E, 0x00000B90}, {0x00000B92, 0x00000B95},
+       {0x00000B99, 0x00000B9A}, {0x00000B9E, 0x00000B9F}, {0x00000BA3, 0x00000BA4},
+       {0x00000BA8, 0x00000BAA}, {0x00000BAE, 0x00000BB9}, {0x00000BBE, 0x00000BC2},
+       {0x00000BC6, 0x00000BC8}, {0x00000BCA, 0x00000BCC}, {0x00000BE6, 0x00000BEF},
+       {0x00000C00, 0x00000C03}, {0x00000C05, 0x00000C0C}, {0x00000C0E, 0x00000C10},
+       {0x00000C12, 0x00000C28}, {0x00000C2A, 0x00000C39}, {0x00000C3D, 0x00000C44},
+       {0x00000C46, 0x00000C48}, {0x00000C4A, 0x00000C4C}, {0x00000C55, 0x00000C56},
+       {0x00000C58, 0x00000C5A}, {0x00000C60, 0x00000C63}, {0x00000C66, 0x00000C6F},
+       {0x00000C80, 0x00000C83}, {0x00000C85, 0x00000C8C}, {0x00000C8E, 0x00000C90},
+       {0x00000C92, 0x00000CA8}, {0x00000CAA, 0x00000CB3}, {0x00000CB5, 0x00000CB9},
+       {0x00000CBD, 0x00000CC4}, {0x00000CC6, 0x00000CC8}, {0x00000CCA, 0x00000CCC},
+       {0x00000CD5, 0x00000CD6}, {0x00000CE0, 0x00000CE3}, {0x00000CE6, 0x00000CEF},
+       {0x00000CF1, 0x00000CF2}, {0x00000D00, 0x00000D03}, {0x00000D05, 0x00000D0C},
+       {0x00000D0E, 0x00000D10}, {0x00000D12, 0x00000D3A}, {0x00000D3D, 0x00000D44},
+       {0x00000D46, 0x00000D48}, {0x00000D4A, 0x00000D4C}, {0x00000D54, 0x00000D57},
+       {0x00000D5F, 0x00000D63}, {0x00000D66, 0x00000D6F}, {0x00000D7A, 0x00000D7F},
+       {0x00000D82, 0x00000D83}, {0x00000D85, 0x00000D96}, {0x00000D9A, 0x00000DB1},
+       {0x00000DB3, 0x00000DBB}, {0x00000DC0, 0x00000DC6}, {0x00000DCF, 0x00000DD4},
+       {0x00000DD8, 0x00000DDF}, {0x00000DE6, 0x00000DEF}, {0x00000DF2, 0x00000DF3},
+       {0x00000E01, 0x00000E3A}, {0x00000E40, 0x00000E46}, {0x00000E50, 0x00000E59},
+       {0x00000E81, 0x00000E82}, {0x00000E87, 0x00000E88}, {0x00000E94, 0x00000E97},
+       {0x00000E99, 0x00000E9F}, {0x00000EA1, 0x00000EA3}, {0x00000EAA, 0x00000EAB},
+       {0x00000EAD, 0x00000EB9}, {0x00000EBB, 0x00000EBD}, {0x00000EC0, 0x00000EC4},
+       {0x00000ED0, 0x00000ED9}, {0x00000EDC, 0x00000EDF}, {0x00000F20, 0x00000F29},
+       {0x00000F40, 0x00000F47}, {0x00000F49, 0x00000F6C}, {0x00000F71, 0x00000F81},
+       {0x00000F88, 0x00000F97}, {0x00000F99, 0x00000FBC}, {0x00001000, 0x00001036},
+       {0x0000103B, 0x00001049}, {0x00001050, 0x00001062}, {0x00001065, 0x00001068},
+       {0x0000106E, 0x00001086}, {0x00001090, 0x00001099}, {0x0000109C, 0x0000109D},
+       {0x000010A0, 0x000010C5}, {0x000010D0, 0x000010FA}, {0x000010FC, 0x00001248},
+       {0x0000124A, 0x0000124D}, {0x00001250, 0x00001256}, {0x0000125A, 0x0000125D},
+       {0x00001260, 0x00001288}, {0x0000128A, 0x0000128D}, {0x00001290, 0x000012B0},
+       {0x000012B2, 0x000012B5}, {0x000012B8, 0x000012BE}, {0x000012C2, 0x000012C5},
+       {0x000012C8, 0x000012D6}, {0x000012D8, 0x00001310}, {0x00001312, 0x00001315},
+       {0x00001318, 0x0000135A}, {0x00001380, 0x0000138F}, {0x000013A0, 0x000013F5},
+       {0x000013F8, 0x000013FD}, {0x00001401, 0x0000166C}, {0x0000166F, 0x0000167F},
+       {0x00001681, 0x0000169A}, {0x000016A0, 0x000016EA}, {0x000016EE, 0x000016F8},
+       {0x00001700, 0x0000170C}, {0x0000170E, 0x00001713}, {0x00001720, 0x00001733},
+       {0x00001740, 0x00001753}, {0x00001760, 0x0000176C}, {0x0000176E, 0x00001770},
+       {0x00001772, 0x00001773}, {0x00001780, 0x000017B3}, {0x000017B6, 0x000017C8},
+       {0x000017E0, 0x000017E9}, {0x00001810, 0x00001819}, {0x00001820, 0x00001877},
+       {0x00001880, 0x000018AA}, {0x000018B0, 0x000018F5}, {0x00001900, 0x0000191E},
+       {0x00001920, 0x0000192B}, {0x00001930, 0x00001938}, {0x00001946, 0x0000196D},
+       {0x00001970, 0x00001974}, {0x00001980, 0x000019AB}, {0x000019B0, 0x000019C9},
+       {0x000019D0, 0x000019D9}, {0x00001A00, 0x00001A1B}, {0x00001A20, 0x00001A5E},
+       {0x00001A61, 0x00001A74}, {0x00001A80, 0x00001A89}, {0x00001A90, 0x00001A99},
+       {0x00001B00, 0x00001B33}, {0x00001B35, 0x00001B43}, {0x00001B45, 0x00001B4B},
+       {0x00001B50, 0x00001B59}, {0x00001B80, 0x00001BA9}, {0x00001BAC, 0x00001BE5},
+       {0x00001BE7, 0x00001BF1}, {0x00001C00, 0x00001C35}, {0x00001C40, 0x00001C49},
+       {0x00001C4D, 0x00001C7D}, {0x00001C80, 0x00001C88}, {0x00001CE9, 0x00001CEC},
+       {0x00001CEE, 0x00001CF3}, {0x00001CF5, 0x00001CF6}, {0x00001D00, 0x00001DBF},
+       {0x00001DE7, 0x00001DF4}, {0x00001E00, 0x00001F15}, {0x00001F18, 0x00001F1D},
+       {0x00001F20, 0x00001F45}, {0x00001F48, 0x00001F4D}, {0x00001F50, 0x00001F57},
+       {0x00001F5F, 0x00001F7D}, {0x00001F80, 0x00001FB4}, {0x00001FB6, 0x00001FBC},
+       {0x00001FC2, 0x00001FC4}, {0x00001FC6, 0x00001FCC}, {0x00001FD0, 0x00001FD3},
+       {0x00001FD6, 0x00001FDB}, {0x00001FE0, 0x00001FEC}, {0x00001FF2, 0x00001FF4},
+       {0x00001FF6, 0x00001FFC}, {0x00002090, 0x0000209C}, {0x0000210A, 0x00002113},
+       {0x00002119, 0x0000211D}, {0x0000212A, 0x0000212D}, {0x0000212F, 0x00002139},
+       {0x0000213C, 0x0000213F}, {0x00002145, 0x00002149}, {0x00002160, 0x00002188},
+       {0x000024B6, 0x000024E9}, {0x00002C00, 0x00002C2E}, {0x00002C30, 0x00002C5E},
+       {0x00002C60, 0x00002CE4}, {0x00002CEB, 0x00002CEE}, {0x00002CF2, 0x00002CF3},
+       {0x00002D00, 0x00002D25}, {0x00002D30, 0x00002D67}, {0x00002D80, 0x00002D96},
+       {0x00002DA0, 0x00002DA6}, {0x00002DA8, 0x00002DAE}, {0x00002DB0, 0x00002DB6},
+       {0x00002DB8, 0x00002DBE}, {0x00002DC0, 0x00002DC6}, {0x00002DC8, 0x00002DCE},
+       {0x00002DD0, 0x00002DD6}, {0x00002DD8, 0x00002DDE}, {0x00002DE0, 0x00002DFF},
+       {0x00003005, 0x00003007}, {0x00003021, 0x00003029}, {0x00003031, 0x00003035},
+       {0x00003038, 0x0000303C}, {0x00003041, 0x00003096}, {0x0000309D, 0x0000309F},
+       {0x000030A1, 0x000030FA}, {0x000030FC, 0x000030FF}, {0x00003105, 0x0000312E},
+       {0x00003131, 0x0000318E}, {0x000031A0, 0x000031BA}, {0x000031F0, 0x000031FF},
+       {0x00003400, 0x00004DB5}, {0x00004E00, 0x00009FEA}, {0x0000A000, 0x0000A48C},
+       {0x0000A4D0, 0x0000A4FD}, {0x0000A500, 0x0000A60C}, {0x0000A610, 0x0000A62B},
+       {0x0000A640, 0x0000A66E}, {0x0000A674, 0x0000A67B}, {0x0000A67F, 0x0000A6EF},
+       {0x0000A717, 0x0000A71F}, {0x0000A722, 0x0000A788}, {0x0000A78B, 0x0000A7AE},
+       {0x0000A7B0, 0x0000A7B7}, {0x0000A7F7, 0x0000A801}, {0x0000A803, 0x0000A805},
+       {0x0000A807, 0x0000A80A}, {0x0000A80C, 0x0000A827}, {0x0000A840, 0x0000A873},
+       {0x0000A880, 0x0000A8C3}, {0x0000A8D0, 0x0000A8D9}, {0x0000A8F2, 0x0000A8F7},
+       {0x0000A900, 0x0000A92A}, {0x0000A930, 0x0000A952}, {0x0000A960, 0x0000A97C},
+       {0x0000A980, 0x0000A9B2}, {0x0000A9B4, 0x0000A9BF}, {0x0000A9CF, 0x0000A9D9},
+       {0x0000A9E0, 0x0000A9E4}, {0x0000A9E6, 0x0000A9FE}, {0x0000AA00, 0x0000AA36},
+       {0x0000AA40, 0x0000AA4D}, {0x0000AA50, 0x0000AA59}, {0x0000AA60, 0x0000AA76},
+       {0x0000AA7E, 0x0000AABE}, {0x0000AADB, 0x0000AADD}, {0x0000AAE0, 0x0000AAEF},
+       {0x0000AAF2, 0x0000AAF5}, {0x0000AB01, 0x0000AB06}, {0x0000AB09, 0x0000AB0E},
+       {0x0000AB11, 0x0000AB16}, {0x0000AB20, 0x0000AB26}, {0x0000AB28, 0x0000AB2E},
+       {0x0000AB30, 0x0000AB5A}, {0x0000AB5C, 0x0000AB65}, {0x0000AB70, 0x0000ABEA},
+       {0x0000ABF0, 0x0000ABF9}, {0x0000AC00, 0x0000D7A3}, {0x0000D7B0, 0x0000D7C6},
+       {0x0000D7CB, 0x0000D7FB}, {0x0000F900, 0x0000FA6D}, {0x0000FA70, 0x0000FAD9},
+       {0x0000FB00, 0x0000FB06}, {0x0000FB13, 0x0000FB17}, {0x0000FB1D, 0x0000FB28},
+       {0x0000FB2A, 0x0000FB36}, {0x0000FB38, 0x0000FB3C}, {0x0000FB40, 0x0000FB41},
+       {0x0000FB43, 0x0000FB44}, {0x0000FB46, 0x0000FBB1}, {0x0000FBD3, 0x0000FD3D},
+       {0x0000FD50, 0x0000FD8F}, {0x0000FD92, 0x0000FDC7}, {0x0000FDF0, 0x0000FDFB},
+       {0x0000FE70, 0x0000FE74}, {0x0000FE76, 0x0000FEFC}, {0x0000FF10, 0x0000FF19},
+       {0x0000FF21, 0x0000FF3A}, {0x0000FF41, 0x0000FF5A}, {0x0000FF66, 0x0000FFBE},
+       {0x0000FFC2, 0x0000FFC7}, {0x0000FFCA, 0x0000FFCF}, {0x0000FFD2, 0x0000FFD7},
+       {0x0000FFDA, 0x0000FFDC}, {0x00010000, 0x0001000B}, {0x0001000D, 0x00010026},
+       {0x00010028, 0x0001003A}, {0x0001003C, 0x0001003D}, {0x0001003F, 0x0001004D},
+       {0x00010050, 0x0001005D}, {0x00010080, 0x000100FA}, {0x00010140, 0x00010174},
+       {0x00010280, 0x0001029C}, {0x000102A0, 0x000102D0}, {0x00010300, 0x0001031F},
+       {0x0001032D, 0x0001034A}, {0x00010350, 0x0001037A}, {0x00010380, 0x0001039D},
+       {0x000103A0, 0x000103C3}, {0x000103C8, 0x000103CF}, {0x000103D1, 0x000103D5},
+       {0x00010400, 0x0001049D}, {0x000104A0, 0x000104A9}, {0x000104B0, 0x000104D3},
+       {0x000104D8, 0x000104FB}, {0x00010500, 0x00010527}, {0x00010530, 0x00010563},
+       {0x00010600, 0x00010736}, {0x00010740, 0x00010755}, {0x00010760, 0x00010767},
+       {0x00010800, 0x00010805}, {0x0001080A, 0x00010835}, {0x00010837, 0x00010838},
+       {0x0001083F, 0x00010855}, {0x00010860, 0x00010876}, {0x00010880, 0x0001089E},
+       {0x000108E0, 0x000108F2}, {0x000108F4, 0x000108F5}, {0x00010900, 0x00010915},
+       {0x00010920, 0x00010939}, {0x00010980, 0x000109B7}, {0x000109BE, 0x000109BF},
+       {0x00010A00, 0x00010A03}, {0x00010A05, 0x00010A06}, {0x00010A0C, 0x00010A13},
+       {0x00010A15, 0x00010A17}, {0x00010A19, 0x00010A33}, {0x00010A60, 0x00010A7C},
+       {0x00010A80, 0x00010A9C}, {0x00010AC0, 0x00010AC7}, {0x00010AC9, 0x00010AE4},
+       {0x00010B00, 0x00010B35}, {0x00010B40, 0x00010B55}, {0x00010B60, 0x00010B72},
+       {0x00010B80, 0x00010B91}, {0x00010C00, 0x00010C48}, {0x00010C80, 0x00010CB2},
+       {0x00010CC0, 0x00010CF2}, {0x00011000, 0x00011045}, {0x00011066, 0x0001106F},
+       {0x00011082, 0x000110B8}, {0x000110D0, 0x000110E8}, {0x000110F0, 0x000110F9},
+       {0x00011100, 0x00011132}, {0x00011136, 0x0001113F}, {0x00011150, 0x00011172},
+       {0x00011180, 0x000111BF}, {0x000111C1, 0x000111C4}, {0x000111D0, 0x000111DA},
+       {0x00011200, 0x00011211}, {0x00011213, 0x00011234}, {0x00011280, 0x00011286},
+       {0x0001128A, 0x0001128D}, {0x0001128F, 0x0001129D}, {0x0001129F, 0x000112A8},
+       {0x000112B0, 0x000112E8}, {0x000112F0, 0x000112F9}, {0x00011300, 0x00011303},
+       {0x00011305, 0x0001130C}, {0x0001130F, 0x00011310}, {0x00011313, 0x00011328},
+       {0x0001132A, 0x00011330}, {0x00011332, 0x00011333}, {0x00011335, 0x00011339},
+       {0x0001133D, 0x00011344}, {0x00011347, 0x00011348}, {0x0001134B, 0x0001134C},
+       {0x0001135D, 0x00011363}, {0x00011400, 0x00011441}, {0x00011443, 0x00011445},
+       {0x00011447, 0x0001144A}, {0x00011450, 0x00011459}, {0x00011480, 0x000114C1},
+       {0x000114C4, 0x000114C5}, {0x000114D0, 0x000114D9}, {0x00011580, 0x000115B5},
+       {0x000115B8, 0x000115BE}, {0x000115D8, 0x000115DD}, {0x00011600, 0x0001163E},
+       {0x00011650, 0x00011659}, {0x00011680, 0x000116B5}, {0x000116C0, 0x000116C9},
+       {0x00011700, 0x00011719}, {0x0001171D, 0x0001172A}, {0x00011730, 0x00011739},
+       {0x000118A0, 0x000118E9}, {0x00011A00, 0x00011A32}, {0x00011A35, 0x00011A3E},
+       {0x00011A50, 0x00011A83}, {0x00011A86, 0x00011A97}, {0x00011AC0, 0x00011AF8},
+       {0x00011C00, 0x00011C08}, {0x00011C0A, 0x00011C36}, {0x00011C38, 0x00011C3E},
+       {0x00011C50, 0x00011C59}, {0x00011C72, 0x00011C8F}, {0x00011C92, 0x00011CA7},
+       {0x00011CA9, 0x00011CB6}, {0x00011D00, 0x00011D06}, {0x00011D08, 0x00011D09},
+       {0x00011D0B, 0x00011D36}, {0x00011D3C, 0x00011D3D}, {0x00011D3F, 0x00011D41},
+       {0x00011D46, 0x00011D47}, {0x00011D50, 0x00011D59}, {0x00012000, 0x00012399},
+       {0x00012400, 0x0001246E}, {0x00012480, 0x00012543}, {0x00013000, 0x0001342E},
+       {0x00014400, 0x00014646}, {0x00016800, 0x00016A38}, {0x00016A40, 0x00016A5E},
+       {0x00016A60, 0x00016A69}, {0x00016AD0, 0x00016AED}, {0x00016B00, 0x00016B36},
+       {0x00016B40, 0x00016B43}, {0x00016B50, 0x00016B59}, {0x00016B63, 0x00016B77},
+       {0x00016B7D, 0x00016B8F}, {0x00016F00, 0x00016F44}, {0x00016F50, 0x00016F7E},
+       {0x00016F93, 0x00016F9F}, {0x00016FE0, 0x00016FE1}, {0x00017000, 0x000187EC},
+       {0x00018800, 0x00018AF2}, {0x0001B000, 0x0001B11E}, {0x0001B170, 0x0001B2FB},
+       {0x0001BC00, 0x0001BC6A}, {0x0001BC70, 0x0001BC7C}, {0x0001BC80, 0x0001BC88},
+       {0x0001BC90, 0x0001BC99}, {0x0001D400, 0x0001D454}, {0x0001D456, 0x0001D49C},
+       {0x0001D49E, 0x0001D49F}, {0x0001D4A5, 0x0001D4A6}, {0x0001D4A9, 0x0001D4AC},
+       {0x0001D4AE, 0x0001D4B9}, {0x0001D4BD, 0x0001D4C3}, {0x0001D4C5, 0x0001D505},
+       {0x0001D507, 0x0001D50A}, {0x0001D50D, 0x0001D514}, {0x0001D516, 0x0001D51C},
+       {0x0001D51E, 0x0001D539}, {0x0001D53B, 0x0001D53E}, {0x0001D540, 0x0001D544},
+       {0x0001D54A, 0x0001D550}, {0x0001D552, 0x0001D6A5}, {0x0001D6A8, 0x0001D6C0},
+       {0x0001D6C2, 0x0001D6DA}, {0x0001D6DC, 0x0001D6FA}, {0x0001D6FC, 0x0001D714},
+       {0x0001D716, 0x0001D734}, {0x0001D736, 0x0001D74E}, {0x0001D750, 0x0001D76E},
+       {0x0001D770, 0x0001D788}, {0x0001D78A, 0x0001D7A8}, {0x0001D7AA, 0x0001D7C2},
+       {0x0001D7C4, 0x0001D7CB}, {0x0001D7CE, 0x0001D7FF}, {0x0001E000, 0x0001E006},
+       {0x0001E008, 0x0001E018}, {0x0001E01B, 0x0001E021}, {0x0001E023, 0x0001E024},
+       {0x0001E026, 0x0001E02A}, {0x0001E800, 0x0001E8C4}, {0x0001E900, 0x0001E943},
+       {0x0001E950, 0x0001E959}, {0x0001EE00, 0x0001EE03}, {0x0001EE05, 0x0001EE1F},
+       {0x0001EE21, 0x0001EE22}, {0x0001EE29, 0x0001EE32}, {0x0001EE34, 0x0001EE37},
+       {0x0001EE4D, 0x0001EE4F}, {0x0001EE51, 0x0001EE52}, {0x0001EE61, 0x0001EE62},
+       {0x0001EE67, 0x0001EE6A}, {0x0001EE6C, 0x0001EE72}, {0x0001EE74, 0x0001EE77},
+       {0x0001EE79, 0x0001EE7C}, {0x0001EE80, 0x0001EE89}, {0x0001EE8B, 0x0001EE9B},
+       {0x0001EEA1, 0x0001EEA3}, {0x0001EEA5, 0x0001EEA9}, {0x0001EEAB, 0x0001EEBB},
+       {0x0001F130, 0x0001F149}, {0x0001F150, 0x0001F169}, {0x0001F170, 0x0001F189},
+       {0x00020000, 0x0002A6D6}, {0x0002A700, 0x0002B734}, {0x0002B740, 0x0002B81D},
+       {0x0002B820, 0x0002CEA1}, {0x0002CEB0, 0x0002EBE0}, {0x0002F800, 0x0002FA1D}
+};
 
-/* END::UTF8TABLE */
+static const unsigned utf8_chars[] = {
+       0x000000AA, 0x000000B5, 0x000000BA, 0x000002EC, 0x000002EE, 0x00000345,
+       0x0000037F, 0x00000386, 0x0000038C, 0x00000559, 0x000005BF, 0x000005C7,
+       0x000006FF, 0x000007FA, 0x000009B2, 0x000009CE, 0x000009D7, 0x000009FC,
+       0x00000A51, 0x00000A5E, 0x00000AD0, 0x00000B71, 0x00000B9C, 0x00000BD0,
+       0x00000BD7, 0x00000CDE, 0x00000D4E, 0x00000DBD, 0x00000DD6, 0x00000E4D,
+       0x00000E84, 0x00000E8A, 0x00000E8D, 0x00000EA5, 0x00000EA7, 0x00000EC6,
+       0x00000ECD, 0x00000F00, 0x00001038, 0x0000108E, 0x000010C7, 0x000010CD,
+       0x00001258, 0x000012C0, 0x0000135F, 0x000017D7, 0x000017DC, 0x00001AA7,
+       0x00001F59, 0x00001F5B, 0x00001F5D, 0x00001FBE, 0x00002071, 0x0000207F,
+       0x00002102, 0x00002107, 0x00002115, 0x00002124, 0x00002126, 0x00002128,
+       0x0000214E, 0x00002D27, 0x00002D2D, 0x00002D6F, 0x00002E2F, 0x0000A8C5,
+       0x0000A8FB, 0x0000A8FD, 0x0000AA7A, 0x0000AAC0, 0x0000AAC2, 0x0000FB3E,
+       0x00010808, 0x0001083C, 0x00011176, 0x000111DC, 0x00011237, 0x0001123E,
+       0x00011288, 0x00011350, 0x00011357, 0x000114C7, 0x00011640, 0x00011644,
+       0x000118FF, 0x00011C40, 0x00011D3A, 0x00011D43, 0x0001BC9E, 0x0001D4A2,
+       0x0001D4BB, 0x0001D546, 0x0001E947, 0x0001EE24, 0x0001EE27, 0x0001EE39,
+       0x0001EE3B, 0x0001EE42, 0x0001EE47, 0x0001EE49, 0x0001EE4B, 0x0001EE54,
+       0x0001EE57, 0x0001EE59, 0x0001EE5B, 0x0001EE5D, 0x0001EE5F, 0x0001EE64,
+       0x0001EE7E
 };
 
+/* END::UTF8TABLE */
+
 static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len)
 {
        unsigned char ub = utf8_mblen[*uc];
@@ -786,11 +308,8 @@ static inline size_t utf8towc(unsigned *wc, const unsigned char *uc, size_t len)
 
 static inline zend_bool isualpha(unsigned ch)
 {
-       unsigned count = sizeof(utf8_ranges)/sizeof(utf8_range_t),
-                       lo = 0,
-                       hi = count-1,
-                       cur = (count-1)/2,
-                       prev;
+       unsigned hi = sizeof(utf8_ranges)/sizeof(utf8_ranges[0])-1,
+                       cur = hi/2, lo = 0, prev;
 
 #undef u
 #define u (utf8_ranges[cur])
@@ -799,20 +318,8 @@ static inline zend_bool isualpha(unsigned ch)
 #if 0
                fprintf(stderr, "=> cur=%u lo=%u hi=%u (%u in %u-%u)\n", cur, lo, hi, ch, u.start, u.end);
 #endif
-               if (u.start == ch) {
+               if (u.start <= ch && u.end >= ch) {
                        return 1;
-               } else if (u.start <= ch && u.end >= ch) {
-                       unsigned j;
-
-                       if (u.step == 1) {
-                               return 1;
-                       }
-                       for (j = u.start; j <= u.end; j+= u.step) {
-                               if (ch == j) {
-                                       return 1;
-                               }
-                       }
-                       return 0;
                }
 
                prev = cur;
@@ -824,13 +331,38 @@ static inline zend_bool isualpha(unsigned ch)
                        lo = cur;
                        cur += (hi - cur) / 2;
                } else {
-                       return 0;
+                       break;
                }
 
        } while (cur != prev);
 
 #undef u
-#undef check
+#define u (utf8_chars[cur])
+
+       hi = sizeof(utf8_chars)/sizeof(utf8_chars[0]);
+       cur = hi/2;
+       lo = 0;
+
+       do {
+#if 0
+               fprintf(stderr, "=> cur=%u lo=%u hi=%u (%u is %u)\n", cur, lo, hi, ch, u);
+#endif
+               if (u == ch) {
+                       return 1;
+               }
+
+               prev = cur;
+
+               if (u > ch) {
+                       hi = cur;
+                       cur -= (cur - lo) / 2;
+               } else {
+                       lo = cur;
+                       cur += (hi - cur) / 2;
+               }
+       } while (cur != prev);
+
+#undef u
 
        return 0;
 }