You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
766 lines
16 KiB
766 lines
16 KiB
// This file is under GNU General Public License 3.0
|
|
// see LICENSE.txt
|
|
|
|
// converts a C++ string into NFC form
|
|
|
|
#include "nfc.hh"
|
|
#include <cstdint>
|
|
#include <set>
|
|
#include <ostream>
|
|
#include <algorithm>
|
|
|
|
#include "nfc_sets.hh"
|
|
|
|
#include <pEp/pEp_string.h>
|
|
|
|
namespace
|
|
{
|
|
// unicode to hex string
|
|
std::string u2h(unsigned u)
|
|
{
|
|
char buf[16] = {0};
|
|
snprintf(buf, 15, "<U+%04X>", u );
|
|
return buf;
|
|
}
|
|
|
|
// octet to hex string
|
|
std::string o2h(uint8_t octet)
|
|
{
|
|
char buf[16] = {0};
|
|
snprintf(buf, 15, "0x%02hhX", octet);
|
|
return buf;
|
|
}
|
|
|
|
// hex string of a 16-bit value
|
|
std::string hex16(char16_t u)
|
|
{
|
|
char buf[16] = {0};
|
|
snprintf(buf, 15, "0x%04X", u);
|
|
return buf;
|
|
}
|
|
|
|
|
|
class utf_exception
|
|
{
|
|
public:
|
|
utf_exception(uint16_t u) : octet(u), value(u) {}
|
|
virtual ~utf_exception() = default;
|
|
virtual std::string reason() const = 0;
|
|
uint8_t octet;
|
|
uint16_t value;
|
|
};
|
|
|
|
|
|
class cont_without_start : public utf_exception
|
|
{
|
|
public:
|
|
cont_without_start(uint8_t u) : utf_exception(u) {}
|
|
std::string reason() const override { return "Continuation octet " + o2h(octet) + " without start octet"; }
|
|
};
|
|
|
|
|
|
class overlong_sequence : public utf_exception
|
|
{
|
|
public:
|
|
overlong_sequence(uint8_t octet, unsigned u) : utf_exception(octet), unicode(u) {}
|
|
std::string reason() const override { return "Overlong sequence for " + u2h(unicode); }
|
|
unsigned unicode;
|
|
};
|
|
|
|
|
|
class unexpected_end : public utf_exception
|
|
{
|
|
public:
|
|
unexpected_end(uint8_t u) : utf_exception(u) {}
|
|
std::string reason() const override { return "Unexpected end of string"; }
|
|
};
|
|
|
|
class surrogate : public utf_exception
|
|
{
|
|
public:
|
|
surrogate(uint8_t u, unsigned s) : utf_exception(u), surr(s) {}
|
|
std::string reason() const override { return "UTF-8-encoded UTF-16 surrogate " + u2h(surr) + " detected"; }
|
|
private:
|
|
unsigned surr;
|
|
};
|
|
|
|
class no_unicode : public utf_exception
|
|
{
|
|
public:
|
|
explicit no_unicode(uint8_t _octet) : utf_exception(_octet) {}
|
|
std::string reason() const override { return "Octet " + o2h(octet) + " is illegal in UTF-8"; }
|
|
};
|
|
|
|
class too_big : public utf_exception
|
|
{
|
|
public:
|
|
explicit too_big(uint8_t _octet, unsigned u) : utf_exception(_octet), unicode(u) {}
|
|
std::string reason() const override { return "Value " + u2h(unicode) + " is too big for Unicode"; }
|
|
unsigned unicode;
|
|
};
|
|
|
|
|
|
class unexpected_surrogate : public utf_exception
|
|
{
|
|
public:
|
|
explicit unexpected_surrogate(char16_t c) : utf_exception(c) {}
|
|
std::string reason() const override { return "Unexpected surogate " + hex16(value); }
|
|
};
|
|
|
|
|
|
class missing_low_surrogate : public utf_exception
|
|
{
|
|
public:
|
|
explicit missing_low_surrogate(char16_t c, char16_t _surr) : utf_exception(c), surr(_surr) {}
|
|
std::string reason() const override { return "Non-low surrogate value " + hex16(value) + " is unexpected after high surogate " + hex16(surr); }
|
|
private:
|
|
char16_t surr;
|
|
};
|
|
|
|
|
|
std::string escape(pEp::string_view s)
|
|
{
|
|
std::string ret; ret.reserve(s.size() + 16 );
|
|
for(char c : s)
|
|
{
|
|
const uint8_t u = c;
|
|
if(u>=32 && u<=126)
|
|
{
|
|
ret += c;
|
|
}else{
|
|
char buf[16];
|
|
snprintf(buf,15, "«%02x»", u );
|
|
ret += buf;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
std::string escape(pEp::u16string_view s)
|
|
{
|
|
std::string ret; ret.reserve(s.size() + 16 );
|
|
for(char16_t c : s)
|
|
{
|
|
if(c>=32 && c<=126)
|
|
{
|
|
ret += char(c);
|
|
}else{
|
|
char buf[16];
|
|
snprintf(buf,15, "«%04x»", c );
|
|
ret += buf;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// returns the "CanonicalCombinincClass" of the given Unicode codpoint u
|
|
unsigned canonicalClass(unsigned u)
|
|
{
|
|
const auto q = NFC_CombiningClass.find(u);
|
|
if(q==NFC_CombiningClass.end())
|
|
{
|
|
return 0; // not found in map.
|
|
}else{
|
|
return q->second;
|
|
}
|
|
}
|
|
|
|
std::pair<int,int> decompose(unsigned u)
|
|
{
|
|
const auto q = NFC_Decompose.find(u);
|
|
if(q==NFC_Decompose.end())
|
|
{
|
|
return std::make_pair(-1, -1);
|
|
}else{
|
|
return q->second;
|
|
}
|
|
}
|
|
|
|
std::u32string decompose_full(unsigned u)
|
|
{
|
|
const std::pair<int,int> d = decompose(u);
|
|
if(d.first<0)
|
|
{
|
|
return std::u32string( 1, char32_t(u) );
|
|
}else{
|
|
if(d.second<0)
|
|
{
|
|
return decompose_full(d.first);
|
|
}
|
|
}
|
|
return decompose_full(d.first) + decompose_full(d.second);
|
|
}
|
|
|
|
|
|
// according to Unicode Standard, clause D108:
|
|
bool isReorderablePair(unsigned a, unsigned b)
|
|
{
|
|
const unsigned cca = canonicalClass(a);
|
|
const unsigned ccb = canonicalClass(b);
|
|
|
|
return (cca > ccb) && (ccb>0);
|
|
}
|
|
|
|
// Unicode standard requires bubble sort, for stability reasons?
|
|
void canonicalOrdering(std::u32string& us)
|
|
{
|
|
if(us.size()<2)
|
|
return;
|
|
|
|
for(unsigned n=us.size(); n>1; --n)
|
|
for(unsigned i=0; i<n-1; ++i)
|
|
{
|
|
char32_t& a = us[i];
|
|
char32_t& b = us[i+1];
|
|
if( isReorderablePair(a,b) )
|
|
{
|
|
std::swap(a,b);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // end of anonymous namespace
|
|
|
|
|
|
namespace pEp {
|
|
|
|
std::string escape_utf16(u16string_view s)
|
|
{
|
|
return escape(s);
|
|
}
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& o, IsNFC is_nfc)
|
|
{
|
|
switch(is_nfc)
|
|
{
|
|
case IsNFC::No : return o << "No";
|
|
case IsNFC::Maybe : return o << "Maybe";
|
|
case IsNFC::Yes : return o << "Yes";
|
|
}
|
|
throw std::logic_error("Unknown value of IsNFC");
|
|
}
|
|
|
|
|
|
uint32_t parseUtf8(const char*& c, const char* end)
|
|
{
|
|
while(c<end)
|
|
{
|
|
const uint8_t u = uint8_t(*c);
|
|
|
|
if (u<=0x7f)
|
|
{
|
|
return u;
|
|
} else if (u<=0xBF)
|
|
{
|
|
throw cont_without_start(u);
|
|
} else if (u<=0xC1) // 0xC0, 0xC1 would form "overlong sequences" and are therefore always illegal in UTF-8
|
|
{
|
|
throw no_unicode(u);
|
|
} else if (u<=0xDF) // 2 octet sequence
|
|
{
|
|
++c;
|
|
if(c==end) throw unexpected_end(u);
|
|
const uint8_t uu = uint8_t(*c);
|
|
if((uu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uu);
|
|
}
|
|
return ((u & 0x1F) << 6) + (uu & 0x3F);
|
|
} else if (u<=0xEF) // 3 octet sequence
|
|
{
|
|
++c;
|
|
if(c==end) throw unexpected_end(u);
|
|
const uint8_t uu = uint8_t(*c);
|
|
if((uu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uu);
|
|
}
|
|
++c;
|
|
if(c==end) throw unexpected_end(uu);
|
|
const uint8_t uuu = uint8_t(*c);
|
|
if((uuu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uuu);
|
|
}
|
|
|
|
const uint32_t ret = ((u & 0xF) << 12) + ((uu & 0x3F)<<6) + (uuu & 0x3F);
|
|
if(ret<0x800) throw overlong_sequence(u, ret);
|
|
if(ret>=0xD800 && ret<=0xDFFF) throw surrogate(u, ret);
|
|
return ret;
|
|
} else if (u<=0xF4) // 4 octet sequence
|
|
{
|
|
++c;
|
|
if(c==end) throw unexpected_end(u);
|
|
const uint8_t uu = uint8_t(*c);
|
|
if((uu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uu);
|
|
}
|
|
++c;
|
|
if(c==end) throw unexpected_end(uu);
|
|
const uint8_t uuu = uint8_t(*c);
|
|
if((uuu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uuu);
|
|
}
|
|
++c;
|
|
if(c==end) throw unexpected_end(uuu);
|
|
const uint8_t uuuu = uint8_t(*c);
|
|
if((uuuu & 0xC0) != 0x80)
|
|
{
|
|
throw unexpected_end(uuuu);
|
|
}
|
|
|
|
const uint32_t ret = ((u & 0xF) << 18) + ((uu & 0x3F)<<12) + ((uuu & 0x3F)<<6) + (uuuu & 0x3F);
|
|
if(ret<0x10000) throw overlong_sequence(u, ret);
|
|
if(ret>0x10FFFF) throw too_big(u, ret);
|
|
return ret;
|
|
} else
|
|
{
|
|
throw no_unicode(u);
|
|
}
|
|
}
|
|
|
|
throw unexpected_end(-1);
|
|
}
|
|
|
|
|
|
uint32_t parseUtf16(const char16_t*& c, const char16_t* end)
|
|
{
|
|
while(c<end)
|
|
{
|
|
const char16_t u = *c;
|
|
if(u<0xD800 || u>=0xE000)
|
|
{
|
|
return u;
|
|
}else{
|
|
if(u>=0xDC00)
|
|
{
|
|
throw unexpected_surrogate(u);
|
|
}
|
|
++c;
|
|
if(c==end) throw unexpected_end(u);
|
|
const uint16_t low = *c;
|
|
if(low < 0xDC00 || low > 0xDFFF)
|
|
{
|
|
throw missing_low_surrogate(low, u);
|
|
}
|
|
return (u-0xD800) * 1024 + (low-0xDC00) + 0x10000;
|
|
}
|
|
}
|
|
throw unexpected_end(-1);
|
|
}
|
|
|
|
|
|
template<>
|
|
uint32_t UTF<char>::parse(const char*& c, const char* end)
|
|
{
|
|
return parseUtf8(c,end);
|
|
}
|
|
|
|
template<>
|
|
uint32_t UTF<char16_t>::parse(const char16_t*& c, const char16_t* end)
|
|
{
|
|
return parseUtf16(c,end);
|
|
}
|
|
|
|
template<>
|
|
template<class OutIter>
|
|
void UTF<char>::generate(const char32_t c, OutIter& out)
|
|
{
|
|
if(c<=0x7F)
|
|
{
|
|
*out++ = char(c);
|
|
}else if(c<=0x7FF)
|
|
{
|
|
*out++ = char( 0xC0 + (c>>6) );
|
|
*out++ = char( 0x80 + (c & 63));
|
|
}else if(c<=0xFFFF)
|
|
{
|
|
if(c>=0xD800 && c<=0xDFFF)
|
|
{
|
|
throw unexpected_surrogate(c);
|
|
}
|
|
*out++ = char( 0xE0 + (c>>12) );
|
|
*out++ = char( 0x80 + ((c>>6) & 63));
|
|
*out++ = char( 0x80 + (c & 63));
|
|
}else if(c<=0x10FFFF)
|
|
{
|
|
*out++ = char( 0xF0 + (c>>18) );
|
|
*out++ = char( 0x80 + ((c>>12) & 63));
|
|
*out++ = char( 0x80 + ((c>>6) & 63));
|
|
*out++ = char( 0x80 + (c & 63));
|
|
}else{
|
|
throw too_big(0, c);
|
|
}
|
|
}
|
|
|
|
|
|
template<>
|
|
template<class OutIter>
|
|
void UTF<char16_t>::generate(const char32_t c, OutIter& out)
|
|
{
|
|
if(c <= 0xFFFF)
|
|
{
|
|
if(c>=0xD800 && c<=0xDFFF)
|
|
{
|
|
throw unexpected_surrogate(c);
|
|
}else{
|
|
*out++ = char16_t(c);
|
|
}
|
|
}else{ // surrogate pair
|
|
if(c>0x10FFFF)
|
|
{
|
|
throw too_big(0, c);
|
|
}else{
|
|
const uint32_t c_reduced = c - 0x10000;
|
|
*out++ = char16_t(0xD800 + (c_reduced >> 10)); // High Surrogate
|
|
*out++ = char16_t(0xDC00 + (c_reduced & 0x3FF)); // Low Surrogate
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class CharT>
|
|
std::basic_string<CharT> UTF<CharT>::generate(const std::u32string& u32)
|
|
{
|
|
std::basic_string<CharT> ret;
|
|
auto out = std::back_inserter(ret);
|
|
for(char32_t c : u32)
|
|
{
|
|
generate(c, out);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
|
|
illegal_utf::illegal_utf( string_view s, unsigned position, const std::string& reason)
|
|
: std::runtime_error( "Illegal UTF-8 string \"" + escape(s) + "\" at position " + std::to_string(position) + ": " + reason )
|
|
{}
|
|
|
|
illegal_utf::illegal_utf( u16string_view s, unsigned position, const std::string& reason)
|
|
: std::runtime_error( "Illegal UTF-16 string \"" + escape(s) + "\" at position " + std::to_string(position) + ": " + reason )
|
|
{}
|
|
|
|
|
|
illegal_utf::illegal_utf( const std::string& msg )
|
|
: std::runtime_error( msg )
|
|
{}
|
|
|
|
|
|
void assert_utf8(string_view s)
|
|
{
|
|
const char* begin = s.data();
|
|
const char* const end = s.data() + s.size();
|
|
try
|
|
{
|
|
while(begin<end)
|
|
{
|
|
UTF8::parse(begin, end); // ignore the output
|
|
++begin;
|
|
}
|
|
}
|
|
catch(const utf_exception& e)
|
|
{
|
|
throw illegal_utf(s, begin - s.data(), e.reason());
|
|
}
|
|
}
|
|
|
|
|
|
// creates a NFD string from s
|
|
template<class CharT>
|
|
std::u32string UTF<CharT>::fromUtf_decompose(basic_string_view<CharT> s)
|
|
{
|
|
std::u32string u32s;
|
|
u32s.reserve( static_cast<std::size_t>(s.size()*1.25) );
|
|
const CharT* begin = s.data();
|
|
const CharT* end = s.data() + s.size();
|
|
for(; begin<end; ++begin)
|
|
{
|
|
unsigned u = parse(begin, end);
|
|
u32s += decompose_full(u);
|
|
}
|
|
canonicalOrdering(u32s); // works inplace.
|
|
return u32s;
|
|
}
|
|
|
|
|
|
template<class Iter>
|
|
bool blocked(Iter L, Iter C)
|
|
{
|
|
Iter B = L; ++B;
|
|
for(;B!=C;++B)
|
|
{
|
|
if(canonicalClass(*B)==0 || canonicalClass(*B)==canonicalClass(*C))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
template<class Iter>
|
|
void combine(std::u32string& nfc, Iter starter, Iter next_starter)
|
|
{
|
|
Iter c = starter; ++c;
|
|
for(;c!=next_starter; ++c)
|
|
{
|
|
if(!blocked(starter, c))
|
|
{
|
|
const unsigned starter_u = *starter;
|
|
const unsigned c_u = *c;
|
|
|
|
auto q = NFC_Compose.find( std::make_pair(starter_u,c_u) );
|
|
if(q!=NFC_Compose.end())
|
|
{
|
|
*starter = q->second;
|
|
*c = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// now add the remaining/changed characters to the NFC string:
|
|
for(Iter c = starter; c!=next_starter; ++c)
|
|
{
|
|
if( int(*c) >= 0)
|
|
{
|
|
nfc += *c;
|
|
}
|
|
}
|
|
}
|
|
|
|
// the nfd string is changed during composing process. So it works on a copy or call with std::move().
|
|
std::u32string createNFC(std::u32string nfd)
|
|
{
|
|
if(nfd.size()<=1)
|
|
return nfd;
|
|
|
|
std::u32string nfc;
|
|
nfc.reserve(nfd.size());
|
|
auto starter = nfd.begin();
|
|
while( starter != nfd.end() )
|
|
{
|
|
if( canonicalClass(*starter)!=0 )
|
|
{
|
|
nfc += *starter;
|
|
++starter;
|
|
}else{
|
|
auto next_starter = std::find_if(starter+1, nfd.end(), [](char32_t c){return canonicalClass(c)==0;} );
|
|
combine(nfc, starter, next_starter);
|
|
starter = next_starter;
|
|
}
|
|
}
|
|
return nfc;
|
|
}
|
|
|
|
|
|
template<class CharT>
|
|
IsNFC UTF<CharT>::isNFC_quick_check(basic_string_view<CharT> s)
|
|
{
|
|
const CharT* begin = s.data();
|
|
const CharT* const end = s.data() + s.size();
|
|
try
|
|
{
|
|
unsigned last_cc = 0;
|
|
while(begin<end)
|
|
{
|
|
const uint32_t u = parse(begin, end);
|
|
const unsigned cc = canonicalClass(u);
|
|
if( (cc!=0) && (last_cc > cc) )
|
|
{
|
|
return IsNFC::No;
|
|
}
|
|
if(NFC_No.count(u)) return IsNFC::No;
|
|
if(NFC_Maybe.count(u)) return IsNFC::Maybe;
|
|
++begin;
|
|
last_cc = cc;
|
|
}
|
|
}
|
|
catch(const utf_exception& e)
|
|
{
|
|
throw illegal_utf(s, begin - s.data(), e.reason());
|
|
}
|
|
return IsNFC::Yes;
|
|
}
|
|
|
|
|
|
template<class CharT>
|
|
bool UTF<CharT>::isNFC(basic_string_view<CharT> s)
|
|
{
|
|
switch( isNFC_quick_check(s) )
|
|
{
|
|
case IsNFC::Yes : return true;
|
|
case IsNFC::No : return false;
|
|
case IsNFC::Maybe:
|
|
{
|
|
return s == toNFC(s); // very expensive!
|
|
}
|
|
}
|
|
|
|
throw -1; // could never happen, but compiler is too dumb to see this.
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
bool UTF<char>::isUtf(const char* begin, const char* end)
|
|
try{
|
|
for(; begin<end; ++begin)
|
|
{
|
|
(void)parse(begin, end);
|
|
}
|
|
return true;
|
|
}catch(const illegal_utf&)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
|
|
// s is ''moved'' to the return value if possible so no copy is done here.
|
|
template<class CharT>
|
|
std::basic_string<CharT> UTF<CharT>::toNFC(basic_string_view<CharT> s)
|
|
{
|
|
if(isNFC_quick_check(s)==IsNFC::Yes)
|
|
return std::basic_string<CharT>{s};
|
|
|
|
return generate( createNFC( fromUtf_decompose(s) ));
|
|
}
|
|
|
|
|
|
template<>
|
|
size_t UTF<char>::utf_length(u32string_view s)
|
|
{
|
|
size_t len = 0;
|
|
for(const char32_t c : s)
|
|
{
|
|
if(c <= 0x7f)
|
|
{
|
|
len += 1;
|
|
}else if(c<=0x7ff)
|
|
{
|
|
len += 2;
|
|
}else if(c<=0xffff)
|
|
{
|
|
if(c>=0xD800 && c<=0xDFFF)
|
|
{
|
|
throw unexpected_surrogate(c);
|
|
}
|
|
len += 3;
|
|
}else if(c<=0x10ffff)
|
|
{
|
|
len += 4;
|
|
}else{
|
|
throw too_big(0, c);
|
|
}
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
template<>
|
|
size_t UTF<char16_t>::utf_length(u32string_view s)
|
|
{
|
|
size_t len = 0;
|
|
for(const char32_t c : s)
|
|
{
|
|
if(c <= 0xffff)
|
|
{
|
|
if(c>=0xD800 && c<=0xDFFF)
|
|
{
|
|
throw unexpected_surrogate(c);
|
|
}
|
|
len += 1;
|
|
}else if(c<=0x10ffff)
|
|
{
|
|
len += 2;
|
|
}else{
|
|
throw too_big(0, c);
|
|
}
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
// convenience function to avoid ::strdup(pEp::toNFC<char>(text).c_str());
|
|
// and unecessary temporary std::string etc.
|
|
char* strdup_NFC(string_view s)
|
|
{
|
|
if(UTF8::isNFC_quick_check(s)==IsNFC::Yes)
|
|
return ::new_string(s.data(), s.size());
|
|
|
|
// implement the hard way more efficient
|
|
const std::u32string& u32 = createNFC( UTF8::fromUtf_decompose(s) );
|
|
const size_t out_len = UTF8::utf_length(u32);
|
|
char* ret = ::new_string(nullptr, out_len );
|
|
char* iter{ret};
|
|
for(const char32_t c : u32)
|
|
{
|
|
UTF8::generate(c, iter);
|
|
}
|
|
|
|
if(iter > ret+out_len) // should never happen. ;)
|
|
{
|
|
throw std::logic_error("internal error: strdup_NFC() exceeded output string size");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
pEp_identity *identity_dup_NFC(const ::pEp_identity* value)
|
|
{
|
|
::pEp_identity* result = (::pEp_identity*) malloc(sizeof(::pEp_identity));
|
|
if (!result)
|
|
throw std::bad_alloc();
|
|
|
|
memcpy(result, value, sizeof(::pEp_identity));
|
|
|
|
result->address = pEp::strdup_NFC(value->address);
|
|
result->fpr = pEp::strdup_NFC(value->fpr);
|
|
result->user_id = pEp::strdup_NFC(value->user_id);
|
|
result->username = pEp::strdup_NFC(value->username);
|
|
|
|
return result;
|
|
}
|
|
|
|
::identity_list* identity_list_dup_NFC(const ::identity_list* value)
|
|
{
|
|
::identity_list* result = ::new_identity_list(nullptr);
|
|
if (!result)
|
|
throw std::bad_alloc();
|
|
|
|
const ::identity_list* il = value;
|
|
::identity_list* ir = result;
|
|
|
|
for (; il && il->ident; il = il->next) {
|
|
ir = ::identity_list_add(ir, identity_dup_NFC(il->ident));
|
|
if (!ir)
|
|
throw std::bad_alloc();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
template class UTF<char>;
|
|
template class UTF<char16_t>;
|
|
|
|
|
|
// used only to initialize the NFC Compose mapping:
|
|
std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose()
|
|
{
|
|
std::map< std::pair<unsigned, unsigned>, unsigned> m;
|
|
for(const auto& decomp : NFC_Decompose)
|
|
{
|
|
if(decomp.second.second >= 0) // skip singleton decompositions
|
|
{
|
|
m[ decomp.second ] = decomp.first;
|
|
}
|
|
}
|
|
|
|
return m;
|
|
}
|
|
|
|
} // end of namespace pEp
|