update NFC data from Unicode 12.1.0 to Unicode 13.0.0. :-)

MIME-14
Roker 3 years ago
parent 9492771941
commit b88f6cd298

@ -131,6 +131,7 @@ const std::set<unsigned> NFC_Maybe = {
0x11B0,0x11B1,0x11B2,0x11B3,0x11B4,0x11B5,0x11B6,0x11B7,0x11B8,0x11B9,
0x11BA,0x11BB,0x11BC,0x11BD,0x11BE,0x11BF,0x11C0,0x11C1,0x11C2,0x1B35,
0x3099,0x309A,0x110BA,0x11127,0x1133E,0x11357,0x114B0,0x114BA,0x114BD,0x115AF,
0x11930,
};
const std::map<unsigned, unsigned char> NFC_CombiningClass = {
@ -558,6 +559,8 @@ const std::map<unsigned, unsigned char> NFC_CombiningClass = {
{0x1ABB, 230},
{0x1ABC, 230},
{0x1ABD, 220},
{0x1ABF, 220},
{0x1AC0, 220},
{0x1B34, 7},
{0x1B44, 9},
{0x1B6B, 230},
@ -751,6 +754,7 @@ const std::map<unsigned, unsigned char> NFC_CombiningClass = {
{0xA6F0, 230},
{0xA6F1, 230},
{0xA806, 9},
{0xA82C, 9},
{0xA8C4, 9},
{0xA8E0, 230},
{0xA8E1, 230},
@ -823,6 +827,8 @@ const std::map<unsigned, unsigned char> NFC_CombiningClass = {
{0x10D25, 230},
{0x10D26, 230},
{0x10D27, 230},
{0x10EAB, 230},
{0x10EAC, 230},
{0x10F46, 220},
{0x10F47, 220},
{0x10F48, 230},
@ -878,6 +884,9 @@ const std::map<unsigned, unsigned char> NFC_CombiningClass = {
{0x1172B, 9},
{0x11839, 9},
{0x1183A, 7},
{0x1193D, 9},
{0x1193E, 9},
{0x11943, 7},
{0x119E0, 9},
{0x11A34, 9},
{0x11A47, 9},
@ -899,6 +908,8 @@ const std::map<unsigned, unsigned char> NFC_CombiningClass = {
{0x16B34, 230},
{0x16B35, 230},
{0x16B36, 230},
{0x16FF0, 6},
{0x16FF1, 6},
{0x1BC9E, 1},
{0x1D165, 216},
{0x1D166, 216},
@ -2504,6 +2515,7 @@ const std::map<unsigned, std::pair<int,int>> NFC_Decompose = {
{0x114BE, {0x114B9, 0x114BD}},
{0x115BA, {0x115B8, 0x115AF}},
{0x115BB, {0x115B9, 0x115AF}},
{0x11938, {0x11935, 0x11930}},
{0x1D15E, {0x1D157, 0x1D165}},
{0x1D15F, {0x1D158, 0x1D165}},
{0x1D160, {0x1D15F, 0x1D16E}},

@ -7,14 +7,15 @@
#
# Generates the file nfc_sets.hh and nfc_sets.cc
#
# Reads the file /usr/share/unicode/DerivedNormalizationProps.txt (Debian package: unicode-data)
# Reads the file DerivedNormalizationProps.txt and UnicodeData.txt
# (Debian package: unicode-data or download it from Unicode.org)
# and generates C++ code for the std::set<> containing the normalization properties
#
# RUN THIS SCRIPT TO UPDATE "nfc.sets.cc" TO NEW UNICODE VERSION!
#
################################# ### ## # # # #
cat /usr/share/unicode/DerivedNormalizationProps.txt | sed -e 's/#.*//g' | grep NFC_QC | sed -e 's/; NFC_QC;//g' |
cat DerivedNormalizationProps.txt | sed -e 's/#.*//g' | grep NFC_QC | sed -e 's/; NFC_QC;//g' |
(
@ -84,7 +85,7 @@ echo -en '\n\t};\n\n'
echo 'const std::map<unsigned, unsigned char> NFC_CombiningClass = {'
cat /usr/share/unicode/UnicodeData.txt | cut -d';' -f 1,4 | grep -v -E ';0$' | sed 's/\([0-9A-F]*\);\([0-9]*\)/ {0x\1, \2},/g'
cat UnicodeData.txt | cut -d';' -f 1,4 | grep -v -E ';0$' | sed 's/\([0-9A-F]*\);\([0-9]*\)/ {0x\1, \2},/g'
echo -en '};\n\n'
@ -92,7 +93,7 @@ echo -en '};\n\n'
echo 'const std::map<unsigned, std::pair<int,int>> NFC_Decompose = {'
# cut codepoint and Decomposition_Mapping, remove compat mappings (containing <…>), add -1 for one-element mappings:
cat /usr/share/unicode/UnicodeData.txt | cut -d';' -f 1,6 | grep -v '<' | \
cat UnicodeData.txt | cut -d';' -f 1,6 | grep -v '<' | \
sed -e 's/\([0-9A-F]*\);\([0-9A-F ]*\)/\1 @\2@/g' | grep -v @@ | \
sed -e 's/@\([0-9A-F]*\) \([0-9A-F]*\)@/0x\1 0x\2/' | \
sed -e 's/@\([0-9A-F]*\)@/0x\1 -1/' | \

Loading…
Cancel
Save