Browse Source

single-char isNFC() removed. Makes no sense. Add freestanding operator+() for nfc_string

master
roker 2 months ago
parent
commit
86bfec58c7
2 changed files with 53 additions and 15 deletions
  1. +14
    -13
      src/nfc.cc
  2. +39
    -2
      src/nfc.hh

+ 14
- 13
src/nfc.cc View File

@ -554,24 +554,25 @@ std::u32string createNFC(std::u32string nfd)
}
template<>
IsNFC UTF<char>::isNFC(char c)
template<class CharT>
bool UTF<CharT>::is_safe_NFC_start(std::basic_string_view<CharT> s)
{
if( c & 0x80 )
throw illegal_utf("Single octet >0x80 is invalid UTF-8");
if(s.empty() || (s[0] & 0x80)==0 ) // shortcut for empty string or starts with ASCII char
{
return true;
}
const CharT* begin = s.data();
const CharT* const end = s.data() + s.size();
return IsNFC::Yes; // all ASCII characters are valid NFC.
const uint32_t u = parse(begin, end);
if(NFC_No.count(u)) return false;
if(NFC_Maybe.count(u)) return false;
return true;
}
template<>
IsNFC UTF<char16_t>::isNFC(char16_t c)
{
if(NFC_No.count(c)) return IsNFC::No;
if(NFC_Maybe.count(c)) return IsNFC::Maybe;
return IsNFC::Yes;
}
template<class CharT>
IsNFC UTF<CharT>::isNFC_quick_check(std::basic_string_view<CharT> s)


+ 39
- 2
src/nfc.hh View File

@ -47,9 +47,9 @@ public:
void generate(const char32_t c, OutIter& out);
/// returns the NFC class of a single character
/// returns whether the sequence starts with IsNFC==Yes char
static
IsNFC isNFC(CharT c);
bool is_safe_NFC_start(std::basic_string_view<CharT> s);
/// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
/// might throw illegal_utf exception
@ -205,6 +205,10 @@ public:
return s.find( std::forward<Args>(args)... );
}
/// might throw illegal_utf, if a multi-char sequence is clipped.
nfc_string substr(std::size_t pos=0, std::size_t count=npos) const;
private:
std::basic_string<CharT> s;
@ -214,6 +218,39 @@ public:
};
};
/// can be more efficient than the operator+() below.
template<class CharT>
typename
UTF<CharT>::nfc_string operator+(
typename UTF<CharT>::nfc_string left,
const typename UTF<CharT>::nfc_string& right);
template<class CharT, class T>
inline
typename
UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string left, const T& right)
{
return left+=right;
}
template<class CharT, class T>
inline
typename
UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string&& left, const T& right)
{
return left+=right;
}
template<class CharT, class T>
inline
typename
UTF<CharT>::nfc_string operator+(const T& left, const typename UTF<CharT>::nfc_string& right)
{
UTF<CharT> left_s{left};
return left_s+=right;
}
/// convenient alias names:
using UTF8 = UTF<char>;
using UTF16 = UTF<char16_t>;


Loading…
Cancel
Save