|
|
- // This file is under GNU General Public License 3.0
- // see LICENSE.txt
-
- #ifndef LIBPEPDATATYPES_NFC_HH
- #define LIBPEPDATATYPES_NFC_HH
-
- #include <string_view>
- #include <string>
- #include <stdexcept>
- #include <iosfwd>
- #include <boost/operators.hpp>
- #include <pEp/identity_list.h>
-
- namespace pEp {
-
- enum class IsNFC
- {
- No=0, // contains a character that cannot occur in NFC
- Maybe=1, // contains a character that is only allowed in certain positions in NFC
- Yes=2 // contains no invalid or partially valid character
- };
-
- std::ostream& operator<<(std::ostream& o, IsNFC is_nfc);
-
-
- class illegal_utf : public std::runtime_error
- {
- public:
- illegal_utf( std::string_view, unsigned position, const std::string& reason);
- illegal_utf(std::u16string_view, unsigned position, const std::string& reason);
- explicit illegal_utf(const std::string& message);
- };
-
-
- /// Common class template to define the same functions for all 3 Unicode Transfer Formats.
- template<class CharT>
- class UTF
- {
- public:
- /// parses a sequence of input code units into one Unicode code point and updates the input iterator c.
- /// \todo change to iterator templates?
- static
- uint32_t parse(const CharT*& c, const CharT* end);
-
- /// generates a UTF sequence from a given Unicode code point.
- template<class OutIter>
- static
- void generate(const char32_t c, OutIter& out);
-
-
- /// returns whether the sequence starts with IsNFC==Yes char
- static
- bool is_safe_NFC_start(std::basic_string_view<CharT> s);
-
- /// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
- /// might throw illegal_utf exception
- static
- IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
-
- /// runs first quick check and a deep test if quick check returns "Maybe".
- static
- bool isNFC(std::basic_string_view<CharT> s);
-
- /// returns true if the sequence is valid UTF-8
- bool isUtf(const CharT* begin, const CharT* end);
-
- /// converts a C++ string (in UTF-8/-16) into NFC form
- static
- std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
-
- /// calculates the number of "code units" in the target Unicode Transfer Format.
- static
- size_t utf_length(std::u32string_view s);
-
- /// generates a whole u32string at once
- static
- std::basic_string<CharT> generate(const std::u32string& s);
-
- /// creates an NFD u32string from UTF-8/UTF-16 input string s
- static
- std::u32string fromUtf_decompose(std::basic_string_view<CharT> s);
-
-
- /// class holding a NFC-conform Unicode string.
- /// content is mostly read-only, because arbitrary modifications might destroy NFC conformacy.
- class nfc_string : public boost::totally_ordered2<nfc_string, std::basic_string_view<CharT>>
- {
- public:
- typedef std::basic_string<CharT> String;
- typedef std::basic_string_view<CharT> StringView;
-
- /// only const_reference is supported.
- typedef typename String::const_reference const_reference;
- typedef typename String::const_pointer const_pointer;
-
- /// only forward iterator. Does a backward_iterator make sense in UTF-encoded strings?
- typedef typename String::const_iterator const_iterator;
-
- static const size_t npos = String::npos;
-
-
- explicit nfc_string(StringView src);
- explicit nfc_string(String && src);
-
- /// construct from a NUL-terminated src
- explicit nfc_string(const CharT* src)
- : nfc_string{ StringView{src} }
- {}
-
- nfc_string(const CharT* src, size_t length)
- : nfc_string{ StringView{src, length} }
- {}
-
- nfc_string(const nfc_string& src) = default;
- nfc_string( nfc_string&& src) = default;
-
- nfc_string& operator=(const nfc_string& src) = default;
- nfc_string& operator=( nfc_string&& src) = default;
-
- nfc_string& assign(StringView src);
- nfc_string& assign(String && src);
-
-
- /// read-only: shares representation
- operator const String&() const noexcept { return s; }
-
- const String& get() const noexcept { return s;}
-
- /// read write: copy content
- operator String() const { return s; }
-
- const CharT* c_str() const noexcept { return s.c_str(); }
- const CharT* data() const noexcept { return s.data(); }
- std::size_t size() const noexcept { return s.size(); }
- bool empty() const noexcept { return s.empty(); }
-
- std::size_t capacity() const noexcept { return s.capacity(); }
- void reserve(std::size_t new_capacity) { s.reserve(new_capacity); }
- void shrink_to_fit() { s.shrink_to_fit(); }
-
- const_reference operator[](std::size_t ofs) const noexcept { return s[ofs]; }
- const_reference at(std::size_t ofs) const { return s.at(ofs); }
- const_reference front() const noexcept { return s.front(); }
- const_reference back() const noexcept { return s.back(); }
- operator StringView() const noexcept { return StringView{s}; }
-
- const_iterator begin() const noexcept { return s.cbegin(); }
- const_iterator cbegin() const noexcept { return s.cbegin(); } /// r/o access only
- const_iterator end() const noexcept { return s.cend(); }
- const_iterator cend() const noexcept { return s.cend(); } /// r/o access only
-
- void clear() { s.clear(); }
-
- /// I am lazy and delegate all the 10 different insert() overloads directly to s.
- template<typename... Args>
- nfc_string& insert(Args&& ...args)
- {
- s.insert( std::forward<Args>(args)... );
- normalize();
- return *this;
- }
-
- /// delegates all erase() overloads to s.
- template<typename... Args>
- nfc_string& erase(Args&& ...args)
- {
- s.erase( std::forward<Args>(args)... );
- normalize();
- return *this;
- }
-
- nfc_string& push_back(CharT c);
-
- /// delegates all 9 append() overloads to s.
- template<typename... Args>
- nfc_string& append(Args&& ...args)
- {
- s.append( std::forward<Args>(args)... );
- normalize();
- return *this;
- }
-
- nfc_string& operator+=(StringView s);
-
- /// optimization possible to avoid re-normalization in most cases.
- nfc_string& operator+=(const nfc_string& s);
-
- /// optimization possible to avoid re-normalization in most cases.
- nfc_string& operator+=(CharT c) { push_back(c); return *this; }
-
- /// delegates all 9 compare() overloads to s
- template<typename... Args>
- int compare(Args&& ...args) const
- {
- return s.compare( std::forward<Args>(args)... );
- }
-
- /// stolen from C++20
- bool starts_with(StringView s) const noexcept;
-
- /// stolen from C++20
- bool ends_with(StringView s) const noexcept;
-
- /// delegates all 5 find() overloads to s
- template<typename... Args>
- std::size_t find(Args&& ...args) const
- {
- return s.find( std::forward<Args>(args)... );
- }
-
- /// might throw illegal_utf, if a multi-char sequence is clipped.
- nfc_string substr(std::size_t pos=0, std::size_t count=npos) const;
-
- private:
- std::basic_string<CharT> s;
-
- /// (re-)normalize the content string s.
- void normalize();
- };
- };
-
- /// can be more efficient than the operator+() below.
- template<class CharT>
- typename
- UTF<CharT>::nfc_string operator+(
- typename UTF<CharT>::nfc_string left,
- const typename UTF<CharT>::nfc_string& right);
-
- template<class CharT, class T>
- inline
- typename
- UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string left, const T& right)
- {
- return left+=right;
- }
-
- template<class CharT, class T>
- inline
- typename
- UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string&& left, const T& right)
- {
- return left+=right;
- }
-
-
- template<class CharT, class T>
- inline
- typename
- UTF<CharT>::nfc_string operator+(const T& left, const typename UTF<CharT>::nfc_string& right)
- {
- typename UTF<CharT>::nfc_string left_s{left};
- return left_s+=right;
- }
-
- template<class CharT>
- inline
- bool operator<(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
- {
- return left<right;
- }
-
- template<class CharT>
- inline
- bool operator==(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
- {
- return left==right;
- }
-
-
- /// convenient alias names:
- using UTF8 = UTF<char>;
- using UTF16 = UTF<char16_t>;
-
- using nfc_string = UTF8::nfc_string;
- using nfc_u16string = UTF16::nfc_string;
-
- // throws illegal_utf8 exception if s is not valid UTF-8
- void assert_utf8(std::string_view s);
-
-
- // convert NFD to NFC
- std::u32string createNFC(std::u32string nfd_string);
-
- /*
- // return No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
- // might throw illegal_utf exception
- template<class CharT>
- IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
-
- // runs first quick check and a deep test if quick check returns "Maybe".
- template<class CharT>
- bool isNFC(std::basic_string_view<CharT> s);
-
- // returns true if the sequence is valid UTF-8
- bool isUtf8(const char* begin, const char* end);
-
- // converts a C++ string (in UTF-8) into NFC form
- // s is ''moved'' to the return value if possible so no copy is done here.
- template<class CharT>
- std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
- */
-
- // creates a UTF-8-encoded NFC string from s
- std::string toNFC_8(std::u16string_view s);
-
- // convenience functions to avoid ::strdup(pEp::toNFC<char>(text).c_str());
- // and unecessary temporary std::string etc.
- char* strdup_NFC(std::string_view s);
-
- pEp_identity *identity_dup_NFC(const ::pEp_identity* value);
- ::identity_list* identity_list_dup_NFC(const ::identity_list* value);
-
-
- } // end of namespace pEp
-
- #endif // LIBPEPDATATYPES_NFC_HH
|