Browse Source

add UTF<char16_t>::utf_length(). Be more pedantic about surrogates where they're not allowed.

master
roker 2 months ago
parent
commit
f5f12e2105
1 changed files with 33 additions and 0 deletions
  1. +33
    -0
      src/nfc.cc

+ 33
- 0
src/nfc.cc View File

@ -378,6 +378,10 @@ void UTF<char>::generate(const char32_t c, OutIter& out)
*out++ = char( 0x80 + (c & 63));
}else if(c<=0xFFFF)
{
if(c>=0xD800 && c<=0xDFFF)
{
throw unexpected_surrogate(c);
}
*out++ = char( 0xE0 + (c>>12) );
*out++ = char( 0x80 + ((c>>6) & 63));
*out++ = char( 0x80 + (c & 63));
@ -637,6 +641,10 @@ size_t UTF<char>::utf_length(u32string_view s)
len += 2;
}else if(c<=0xffff)
{
if(c>=0xD800 && c<=0xDFFF)
{
throw unexpected_surrogate(c);
}
len += 3;
}else if(c<=0x10ffff)
{
@ -650,6 +658,31 @@ size_t UTF<char>::utf_length(u32string_view s)
}
template<>
size_t UTF<char16_t>::utf_length(u32string_view s)
{
size_t len = 0;
for(const char32_t c : s)
{
if(c <= 0xffff)
{
if(c>=0xD800 && c<=0xDFFF)
{
throw unexpected_surrogate(c);
}
len += 1;
}else if(c<=0x10ffff)
{
len += 2;
}else{
throw too_big(0, c);
}
}
return len;
}
// convenience function to avoid ::strdup(pEp::toNFC<char>(text).c_str());
// and unecessary temporary std::string etc.
char* strdup_NFC(string_view s)


Loading…
Cancel
Save