From: Michael Wallner Date: Wed, 5 Nov 2014 13:37:35 +0000 (+0100) Subject: strip comments X-Git-Tag: RELEASE_2_2_0_RC1~9^2~8 X-Git-Url: https://git.m6w6.name/?a=commitdiff_plain;h=14aacd330776c33d6852e9a43be68d89e96cce4c;p=m6w6%2Fext-http strip comments --- diff --git a/gen_utf8.php b/gen_utf8.php index 2675f15..865a2f5 100755 --- a/gen_utf8.php +++ b/gen_utf8.php @@ -24,9 +24,6 @@ while (!feof($f)) { } switch($line{0}) { case "%": - if ($a) { - printf("/* %s */\n", trim($line, "%\n/ ")); - } break; case "\n": if ($a) { diff --git a/php_http_utf8.h b/php_http_utf8.h index 2baec7d..c7bcb49 100644 --- a/php_http_utf8.h +++ b/php_http_utf8.h @@ -44,30 +44,22 @@ static const unsigned char utf8_mask[] = { static const utf8_range_t utf8_ranges[] = { /* BEGIN::UTF8TABLE */ -/* BASIC LATIN */ { 0x0041, 0x005A, 1}, { 0x0061, 0x007A, 1}, -/* LATIN-1 SUPPLEMENT */ { 0x00AA, 0, 0}, { 0x00B5, 0, 0}, { 0x00BA, 0, 0}, { 0x00C0, 0x00D6, 1}, { 0x00D8, 0x00F6, 1}, { 0x00F8, 0x00FF, 1}, -/* LATIN EXTENDED-A */ { 0x0100, 0x017F, 1}, -/* LATIN EXTENDED-B */ { 0x0180, 0x024F, 1}, -/* IPA EXTENSIONS */ { 0x0250, 0x02AF, 1}, -/* SPACING MODIFIER LETTERS */ { 0x02B0, 0x02C1, 1}, { 0x02C6, 0x02D1, 1}, { 0x02E0, 0x02E4, 1}, { 0x02EE, 0, 0}, -/* COMBINING DIACRITICAL MARKS */ { 0x0345, 0, 0}, -/* BASIC GREEK */ { 0x0370, 0x0373, 1}, { 0x0376, 0x0377, 1}, { 0x037A, 0x037D, 1}, @@ -76,22 +68,16 @@ static const utf8_range_t utf8_ranges[] = { { 0x038C, 0, 0}, { 0x038E, 0x03A1, 1}, { 0x03A3, 0x03CE, 1}, -/* GREEK SYMBOLS AND COPTIC */ { 0x03D0, 0x03F5, 1}, { 0x03F7, 0x03FF, 1}, -/* CYRILLIC */ { 0x0400, 0x0481, 1}, { 0x048A, 0x04FF, 1}, -/* CYRILLIC SUPPLEMENT */ { 0x0500, 0x0523, 1}, -/* ARMENIAN */ { 0x0531, 0x0556, 1}, { 0x0559, 0, 0}, { 0x0561, 0x0587, 1}, -/* HEBREW */ { 0x05D0, 0x05EA, 1}, { 0x05F0, 0x05F2, 1}, -/* ARABIC */ { 0x0621, 0x064A, 1}, { 0x066E, 0x066F, 1}, { 0x0671, 0x06D3, 1}, @@ -100,22 +86,15 @@ static const utf8_range_t utf8_ranges[] = { { 0x06EE, 0x06EF, 1}, { 0x06FA, 0x06FC, 1}, { 0x06FF, 0, 0}, -/* SYRIAC */ { 0x0710, 0, 0}, { 0x0712, 0x072F, 1}, { 0x074D, 0x074F, 1}, -/* ARABIC SUPPLEMENT */ { 0x0750, 0x077F, 1}, -/* THAANA */ { 0x0780, 0x07A5, 1}, { 0x07B1, 0, 0}, -/* NKO */ { 0x07C0, 0x07EA, 1}, { 0x07F4, 0x07F5, 1}, { 0x07FA, 0, 0}, -/* - All Matras of Indic and Sinhala are moved from punct to alpha class */ -/* - Added Unicode 5.1 charctares of Indic scripts */ -/* DEVANAGARI */ { 0x0901, 0x0939, 1}, { 0x093C, 0x094D, 1}, { 0x0950, 0x0954, 1}, @@ -124,7 +103,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0963, 0, 0}, { 0x0972, 0, 0}, { 0x097B, 0x097F, 1}, -/* TABLE 18 BENGALI */ { 0x0981, 0x0983, 1}, { 0x0985, 0x098C, 1}, { 0x098F, 0, 0}, @@ -142,7 +120,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x09DD, 0, 0}, { 0x09DF, 0x09E3, 1}, { 0x09F0, 0x09FA, 1}, -/* GURMUKHI */ { 0x0A01, 0x0A03, 1}, { 0x0A05, 0x0A0A, 1}, { 0x0A0F, 0, 0}, @@ -164,7 +141,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0A59, 0x0A5C, 1}, { 0x0A5E, 0, 0}, { 0x0A70, 0x0A75, 1}, -/* GUJARATI */ { 0x0A81, 0x0A83, 1}, { 0x0A85, 0x0A8D, 1}, { 0x0A8F, 0x0A91, 1}, @@ -179,7 +155,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0AD0, 0, 0}, { 0x0AE0, 0x0AE3, 1}, { 0x0AF1, 0, 0}, -/* ORIYA */ { 0x0B01, 0x0B03, 1}, { 0x0B05, 0x0B0C, 1}, { 0x0B0F, 0, 0}, @@ -198,7 +173,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0B5F, 0x0B63, 1}, { 0x0B70, 0, 0}, { 0x0B71, 0, 0}, -/* TAMIL */ { 0x0B82, 0, 0}, { 0x0B83, 0, 0}, { 0x0B85, 0x0B8A, 1}, @@ -219,7 +193,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0BD0, 0, 0}, { 0x0BD7, 0, 0}, { 0x0BF0, 0x0BFA, 1}, -/* TELUGU */ { 0x0C01, 0x0C03, 1}, { 0x0C05, 0x0C0C, 1}, { 0x0C0E, 0x0C10, 1}, @@ -232,7 +205,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0C55, 0x0C56, 1}, { 0x0C58, 0x0C59, 1}, { 0x0C60, 0x0C63, 1}, -/* KANNADA */ { 0x0C82, 0x0C83, 1}, { 0x0C85, 0x0C8C, 1}, { 0x0C8E, 0x0C90, 1}, @@ -247,7 +219,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0CE0, 0x0CE3, 1}, { 0x0CF1, 0, 0}, { 0x0CF2, 0, 0}, -/* MALAYALAM */ { 0x0D02, 0x0D03, 1}, { 0x0D05, 0x0D0C, 1}, { 0x0D0E, 0x0D10, 1}, @@ -259,7 +230,6 @@ static const utf8_range_t utf8_ranges[] = { { 0x0D57, 0, 0}, { 0x0D60, 0x0D63, 1}, { 0x0D79, 0x0D7F, 1}, -/* SINHALA */ { 0x0D82, 0x0D83, 1}, { 0x0D85, 0x0D96, 1}, { 0x0D9A, 0x0DB1, 1}, @@ -271,12 +241,10 @@ static const utf8_range_t utf8_ranges[] = { { 0x0DD6, 0, 0}, { 0x0DD8, 0x0DDF, 1}, { 0x0DF2, 0x0DF4, 1}, -/* THAI */ { 0x0E01, 0x0E2E, 1}, { 0x0E30, 0x0E3A, 1}, { 0x0E40, 0x0E45, 1}, { 0x0E47, 0x0E4E, 1}, -/* LAO */ { 0x0E81, 0x0E82, 1}, { 0x0E84, 0, 0}, { 0x0E87, 0x0E88, 1}, @@ -294,12 +262,10 @@ static const utf8_range_t utf8_ranges[] = { { 0x0EC0, 0x0EC4, 1}, { 0x0EC6, 0, 0}, { 0x0EDC, 0x0EDD, 1}, -/* TIBETAN */ { 0x0F00, 0, 0}, { 0x0F40, 0x0F47, 1}, { 0x0F49, 0x0F6C, 1}, { 0x0F88, 0x0F8B, 1}, -/* MYANMAR */ { 0x1000, 0x102A, 1}, { 0x1050, 0x1055, 1}, { 0x105A, 0x105D, 1}, @@ -309,15 +275,12 @@ static const utf8_range_t utf8_ranges[] = { { 0x106E, 0x1070, 1}, { 0x1075, 0x1081, 1}, { 0x108E, 0, 0}, -/* GEORGIAN */ { 0x10A0, 0x10C5, 1}, { 0x10D0, 0x10FA, 1}, { 0x10FC, 0, 0}, -/* HANGUL JAMO */ { 0x1100, 0x1159, 1}, { 0x115F, 0x11A2, 1}, { 0x11A8, 0x11F9, 1}, -/* ETHIOPIC */ { 0x1200, 0x1248, 1}, { 0x124A, 0x124D, 1}, { 0x1250, 0x1256, 1}, @@ -334,66 +297,44 @@ static const utf8_range_t utf8_ranges[] = { { 0x12D8, 0x1310, 1}, { 0x1312, 0x1315, 1}, { 0x1318, 0x135A, 1}, -/* ETHIOPIC EXTENDED */ { 0x1380, 0x138F, 1}, -/* CHEROKEE */ { 0x13A0, 0x13F4, 1}, -/* UNIFIED CANADIAN ABORIGINAL SYLLABICS */ { 0x1401, 0x166C, 1}, { 0x166F, 0x1676, 1}, -/* OGHAM */ { 0x1681, 0x169A, 1}, -/* RUNIC */ { 0x16A0, 0x16EA, 1}, { 0x16EE, 0x16F0, 1}, -/* TAGALOG */ { 0x1700, 0x170C, 1}, { 0x170E, 0x1711, 1}, -/* HANUNOO */ { 0x1720, 0x1731, 1}, -/* BUHID */ { 0x1740, 0x1751, 1}, -/* TAGBANWA */ { 0x1760, 0x176C, 1}, { 0x176E, 0x1770, 1}, -/* KHMER */ { 0x1780, 0x17B3, 1}, { 0x17D7, 0, 0}, { 0x17DC, 0, 0}, -/* MONGOLIAN */ { 0x1820, 0x1877, 1}, { 0x1880, 0x18A8, 1}, { 0x18AA, 0, 0}, -/* LIMBU */ { 0x1900, 0x191C, 1}, { 0x1946, 0x194F, 1}, -/* TAI LE */ { 0x1950, 0x196D, 1}, { 0x1970, 0x1974, 1}, -/* NEW TAI LUE */ { 0x1980, 0x19A9, 1}, { 0x19C1, 0x19C7, 1}, { 0x19D0, 0x19D9, 1}, -/* BUGINESE */ { 0x1A00, 0x1A16, 1}, -/* BALINESE */ { 0x1B05, 0x1B33, 1}, { 0x1B45, 0x1B4B, 1}, { 0x1B50, 0x1B59, 1}, -/* SUNDANESE */ { 0x1B83, 0x1BA0, 1}, { 0x1BAE, 0x1BAF, 1}, -/* LEPCHA */ { 0x1C00, 0x1C23, 1}, { 0x1C4D, 0x1C4F, 1}, -/* OL CHIKI */ { 0x1C5A, 0x1C7D, 1}, -/* PHONETIC EXTENSIONS */ { 0x1D00, 0x1DBF, 1}, -/* LATIN EXTENDED ADDITIONAL */ { 0x1E00, 0x1E9F, 1}, { 0x1EA0, 0x1EFF, 1}, -/* GREEK EXTENDED */ { 0x1F00, 0x1F15, 1}, { 0x1F18, 0x1F1D, 1}, { 0x1F20, 0x1F45, 1}, @@ -413,11 +354,9 @@ static const utf8_range_t utf8_ranges[] = { { 0x1FE0, 0x1FEC, 1}, { 0x1FF2, 0x1FF4, 1}, { 0x1FF6, 0x1FFC, 1}, -/* SUPERSCRIPTS AND SUBSCRIPTS */ { 0x2071, 0, 0}, { 0x207F, 0, 0}, { 0x2090, 0x2094, 1}, -/* LETTERLIKE SYMBOLS */ { 0x2102, 0, 0}, { 0x2107, 0, 0}, { 0x210A, 0x2113, 1}, @@ -430,24 +369,16 @@ static const utf8_range_t utf8_ranges[] = { { 0x213C, 0x213F, 1}, { 0x2145, 0x2149, 1}, { 0x214E, 0, 0}, -/* NUMBER FORMS */ { 0x2160, 0x2188, 1}, -/* ENCLOSED ALPHANUMERICS */ { 0x249C, 0x24E9, 1}, -/* GLAGOLITIC */ { 0x2C00, 0x2C2E, 1}, { 0x2C30, 0x2C5E, 1}, -/* LATIN EXTENDED-C */ { 0x2C60, 0x2C6F, 1}, { 0x2C71, 0x2C7D, 1}, -/* COPTIC */ { 0x2C80, 0x2CE4, 1}, -/* GEORGIAN SUPPLEMENT */ { 0x2D00, 0x2D25, 1}, -/* TIFINAGH */ { 0x2D30, 0x2D65, 1}, { 0x2D6F, 0, 0}, -/* ETHIOPIC EXTENDED */ { 0x2D80, 0x2D96, 1}, { 0x2DA0, 0x2DA6, 1}, { 0x2DA8, 0x2DAE, 1}, @@ -457,68 +388,46 @@ static const utf8_range_t utf8_ranges[] = { { 0x2DC8, 0x2DCE, 1}, { 0x2DD0, 0x2DD6, 1}, { 0x2DD8, 0x2DDE, 1}, -/* CJK SYMBOLS AND PUNCTUATION */ { 0x3005, 0x3007, 1}, { 0x3021, 0x3029, 1}, { 0x3031, 0x3035, 1}, { 0x3038, 0x303C, 1}, -/* HIRAGANA */ { 0x3041, 0x3096, 1}, { 0x309D, 0x309F, 1}, -/* KATAKANA */ { 0x30A1, 0x30FA, 1}, { 0x30FC, 0x30FF, 1}, -/* BOPOMOFO */ { 0x3105, 0x312D, 1}, -/* HANGUL COMPATIBILITY JAMO */ { 0x3131, 0x318E, 1}, -/* BOPOMOFO EXTENDED */ { 0x31A0, 0x31B7, 1}, -/* KATAKANA PHONETIC EXTENSIONS */ { 0x31F0, 0x31FF, 1}, -/* CJK UNIFIED IDEOGRAPHS EXTENSION */ { 0x3400, 0x4DB5, 1}, -/* CJK UNIFIED IDEOGRAPHS */ { 0x4E00, 0x9FBB, 1}, -/* YI SYLLABLES */ { 0xA000, 0xA48C, 1}, -/* VAI SYLLABLES */ { 0xA500, 0xA60B, 1}, { 0xA610, 0xA61F, 1}, { 0xA62A, 0xA62B, 1}, -/* CYRILLIC SUPPLEMENT 2 */ { 0xA640, 0xA65F, 1}, { 0xA662, 0xA66E, 1}, { 0xA680, 0xA697, 1}, -/* LATIN EXTENDED-D */ { 0xA717, 0xA71F, 1}, { 0xA722, 0xA78C, 1}, { 0xA7FB, 0xA7FF, 1}, -/* SYLOTI NEGRI */ { 0xA800, 0, 0}, { 0xA801, 0, 0}, { 0xA803, 0xA805, 1}, { 0xA807, 0xA80A, 1}, { 0xA80C, 0xA822, 1}, -/* PHAGS PA */ { 0xA840, 0xA873, 1}, -/* SAURASHTRA */ { 0xA882, 0xA8B3, 1}, -/* KAYAH LI */ { 0xA90A, 0xA92D, 1}, -/* REJANG */ { 0xA930, 0xA946, 1}, -/* CHAM */ { 0xAA00, 0xAA28, 1}, { 0xAA40, 0xAA42, 1}, { 0xAA44, 0xAA4B, 1}, -/* HANGUL SYLLABLES */ { 0xAC00, 0xD7A3, 1}, -/* CJK COMPATIBILITY IDEOGRAPHS */ { 0xF900, 0xFA2D, 1}, { 0xFA30, 0xFA6A, 1}, { 0xFA70, 0xFAD9, 1}, -/* ALPHABETIC PRESENTATION FORMS */ { 0xFB00, 0xFB06, 1}, { 0xFB13, 0xFB17, 1}, { 0xFB1D, 0, 0}, @@ -531,16 +440,13 @@ static const utf8_range_t utf8_ranges[] = { { 0xFB43, 0, 0}, { 0xFB44, 0, 0}, { 0xFB46, 0xFB4F, 1}, -/* ARABIC PRESENTATION FORMS-A */ { 0xFB50, 0xFBB1, 1}, { 0xFBD3, 0xFD3D, 1}, { 0xFD50, 0xFD8F, 1}, { 0xFD92, 0xFDC7, 1}, { 0xFDF0, 0xFDFB, 1}, -/* ARABIC PRESENTATION FORMS-B */ { 0xFE70, 0xFE74, 1}, { 0xFE76, 0xFEFC, 1}, -/* HALFWIDTH AND FULLWIDTH FORMS */ { 0xFF21, 0xFF3A, 1}, { 0xFF41, 0xFF5A, 1}, { 0xFF66, 0xFFBE, 1}, @@ -548,58 +454,39 @@ static const utf8_range_t utf8_ranges[] = { { 0xFFCA, 0xFFCF, 1}, { 0xFFD2, 0xFFD7, 1}, { 0xFFDA, 0xFFDC, 1}, -/* LINEAR B SYLLABARY */ {0x00010000, 0x0001000B, 1}, {0x0001000D, 0x00010026, 1}, {0x00010028, 0x0001003A, 1}, {0x0001003C, 0x0001003D, 1}, {0x0001003F, 0x0001004D, 1}, {0x00010050, 0x0001005D, 1}, -/* LINEAR B IDEOGRAMS */ {0x00010080, 0x000100FA, 1}, -/* ANCIENT GREEK NUMBERS */ {0x00010140, 0x00010174, 1}, -/* LYCIAN */ {0x00010280, 0x0001029C, 1}, -/* CARIAN */ {0x000102A0, 0x000102D0, 1}, -/* OLD ITALIC */ {0x00010300, 0x0001031E, 1}, -/* GOTHIC */ {0x00010330, 0x0001034A, 1}, -/* UGARITIC */ {0x00010380, 0x0001039D, 1}, -/* OLD PERSIAN */ {0x000103A0, 0x000103C3, 1}, {0x000103C8, 0x000103CF, 1}, {0x000103D1, 0x000103D5, 1}, -/* DESERET */ {0x00010400, 0x0001044F, 1}, -/* SHAVIAN */ {0x00010450, 0x0001047F, 1}, -/* OSMANYA */ {0x00010480, 0x0001049D, 1}, {0x000104A0, 0x000104A9, 1}, -/* CYPRIOT SYLLABARY */ {0x00010800, 0x00010805, 1}, {0x00010808, 0, 0}, {0x0001080A, 0x00010835, 1}, {0x00010837, 0x00010838, 1}, {0x0001083C, 0, 0}, {0x0001083F, 0, 0}, -/* PHOENICIAN */ {0x00010900, 0x00010915, 1}, {0x00010A00, 0, 0}, {0x00010A10, 0x00010A13, 1}, -/* KHAROSHTI */ {0x00010A15, 0x00010A17, 1}, {0x00010A19, 0x00010A33, 1}, -/* CUNEIFORM */ {0x00012000, 0x0001236E, 1}, -/* CUNEIFORM NUMBERS AND PONCTUATION */ {0x00012400, 0x00012462, 1}, -/* BYZANTINE MUSICAL SYMBOLS */ -/* MATHEMATICAL ALPHANUMERIC SYMBOLS */ {0x0001D400, 0x0001D454, 1}, {0x0001D456, 0x0001D49C, 1}, {0x0001D49E, 0x0001D49F, 1}, @@ -631,63 +518,34 @@ static const utf8_range_t utf8_ranges[] = { {0x0001D7AA, 0x0001D7C2, 1}, {0x0001D7C4, 0x0001D7CB, 1}, {0x0001D7CE, 0x0001D7FF, 1}, -/* CJK UNIFIED IDEOGRAPHS EXTENSION */ {0x00020000, 0x0002A6D6, 1}, -/* CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT */ {0x0002F800, 0x0002FA1D, 1}, -/* The non-ASCII number characters are included here because ISO C 99 */ -/* forbids us to classify them as digits; however, they behave more like */ -/* alphanumeric than like punctuation. */ -/* ARABIC */ { 0x0660, 0x0669, 1}, { 0x06F0, 0x06F9, 1}, -/* DEVANAGARI */ { 0x0966, 0x096F, 1}, -/* BENGALI */ { 0x09E6, 0x09EF, 1}, -/* GURMUKHI */ { 0x0A66, 0x0A6F, 1}, -/* GUJARATI */ { 0x0AE6, 0x0AEF, 1}, -/* ORIYA */ { 0x0B66, 0x0B6F, 1}, -/* TAMIL */ { 0x0BE6, 0x0BEF, 1}, -/* TELUGU */ { 0x0C66, 0x0C6F, 1}, { 0x0C78, 0x0C7F, 1}, -/* KANNADA */ { 0x0CE6, 0x0CEF, 1}, -/* MALAYALAM */ { 0x0D66, 0x0D75, 1}, { 0x0D70, 0x0D75, 1}, -/* THAI */ { 0x0E50, 0x0E59, 1}, -/* LAO */ { 0x0ED0, 0x0ED9, 1}, -/* TIBETAN */ { 0x0F20, 0x0F29, 1}, -/* MYANMAR */ { 0x1040, 0x1049, 1}, -/* KHMER */ { 0x17E0, 0x17E9, 1}, -/* MONGOLIAN */ { 0x1810, 0x1819, 1}, -/* SUNDANESE */ { 0x1BB0, 0x1BB9, 1}, -/* LEPCHA */ { 0x1C40, 0x1C49, 1}, -/* OL CHIKI */ { 0x1C50, 0x1C59, 1}, -/* VAI */ { 0xA620, 0xA629, 1}, -/* SAURASHTRA */ { 0xA8D0, 0xA8D9, 1}, -/* KAYAH LI */ { 0xA900, 0xA909, 1}, -/* CHAM */ { 0xAA50, 0xAA59, 1}, -/* HALFWIDTH AND FULLWIDTH FORMS */ { 0xFF10, 0xFF19, 1}, /* END::UTF8TABLE */