add NFC_Compose mapping, which is generated arithmetically from NFC_Decompose

JSON-95
Roker 5 years ago
parent 7f59b07420
commit 900aac34cb

@ -371,3 +371,19 @@ std::string toNFC(std::string s)
// TODO:
throw std::logic_error("NFC normalization is necessary, but unimplemented. Sorry.");
}
// used only to initialize the NFC Compose mapping:
std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose()
{
std::map< std::pair<unsigned, unsigned>, unsigned> m;
for(const auto& decomp : NFC_Decompose)
{
if(decomp.second.second >= 0) // skip singleton decompositions
{
m[ decomp.second ] = decomp.first;
}
}
return m;
}

@ -3013,3 +3013,5 @@ const std::map<unsigned, std::pair<int,int>> NFC_Decompose = {
{0x2FA1D, {0x2A600, -1}},
};
std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose();
const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose = generate_nfc_compose();

@ -8,6 +8,8 @@
// from Unicode's DerivedNormalizationProps.txt and UnicodeData.txt.
// see scripts/ subdirectory
// TODO: (maybe) Replace them by flat_map or sorted arrays, because these might be faster. But make benchmarks first!
// Contains all codepoints with NFC_No property.
extern const std::set<unsigned> NFC_No;
@ -20,4 +22,7 @@ extern const std::map<unsigned, unsigned char> NFC_CombiningClass;
// Contains the canonical decomposing pairs. second member might be -1 for single decompositions.
extern const std::map<unsigned, std::pair<int,int>> NFC_Decompose;
// canonical composing mapping, except excluded ones according to Unicode TR-15
extern const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose;
#endif // NFC_SETS_HH

@ -93,4 +93,7 @@ cat /usr/share/unicode/UnicodeData.txt | cut -d';' -f 1,6 | grep -v '<' | \
echo -en '};\n\n'
echo 'std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose();'
echo -en 'const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose = generate_nfc_compose();\n\n'
# end of file

Loading…
Cancel
Save