A C++ wrapper for the basic C datatypes defined by the pEpEngine.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

327 lines
9.8 KiB

  1. // This file is under GNU General Public License 3.0
  2. // see LICENSE.txt
  3. #ifndef LIBPEPDATATYPES_NFC_HH
  4. #define LIBPEPDATATYPES_NFC_HH
  5. #include <string_view>
  6. #include <string>
  7. #include <stdexcept>
  8. #include <iosfwd>
  9. #include <boost/operators.hpp>
  10. #include <pEp/identity_list.h>
  11. namespace pEp {
  12. /// Tri-sate return value of isNFC_quick_check()
  13. enum class IsNFC
  14. {
  15. No=0, //!< string contains a character that cannot occur in NFC
  16. Maybe=1, //!< string contains a character that is only allowed in certain positions in NFC
  17. Yes=2 //!< string contains no invalid or partially valid character
  18. };
  19. std::ostream& operator<<(std::ostream& o, IsNFC is_nfc);
  20. /// Exception class thrown whenever a string is parsed that is not a valid
  21. /// UTF-8 or UTF-16 sequence.
  22. class illegal_utf : public std::runtime_error
  23. {
  24. public:
  25. illegal_utf( std::string_view, unsigned position, const std::string& reason);
  26. illegal_utf(std::u16string_view, unsigned position, const std::string& reason);
  27. explicit illegal_utf(const std::string& message);
  28. };
  29. /// Common class template to define the same functions for all 3 Unicode Transfer Formats.
  30. template<class CharT>
  31. class UTF
  32. {
  33. public:
  34. /// parses a sequence of input code units into one Unicode code point and updates the input iterator c.
  35. /// \todo change to iterator templates?
  36. static
  37. uint32_t parse(const CharT*& c, const CharT* end);
  38. /// generates a UTF sequence from a given Unicode code point.
  39. template<class OutIter>
  40. static
  41. void generate(const char32_t c, OutIter& out);
  42. /// returns whether the sequence starts with IsNFC==Yes char
  43. static
  44. bool is_safe_NFC_start(std::basic_string_view<CharT> s);
  45. /// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe".
  46. /// use isNFC() for a comprehensive NFC check.
  47. /// Might throw illegal_utf exception
  48. static
  49. IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
  50. /// runs first quick check and a deep test if quick check returns "Maybe".
  51. static
  52. bool isNFC(std::basic_string_view<CharT> s);
  53. /// returns true if the sequence is valid UTF-8
  54. static
  55. bool isUtf(const CharT* begin, const CharT* end);
  56. /// converts a C++ string (in UTF-8/-16) into NFC form
  57. static
  58. std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
  59. /// calculates the number of "code units" in the target Unicode Transfer Format.
  60. static
  61. size_t utf_length(std::u32string_view s);
  62. /// generates a whole u32string at once
  63. static
  64. std::basic_string<CharT> generate(const std::u32string& s);
  65. /// creates an NFD u32string from UTF-8/UTF-16 input string s
  66. static
  67. std::u32string fromUtf_decompose(std::basic_string_view<CharT> s);
  68. /// class holding a NFC-conform Unicode string.
  69. /// content is mostly read-only, because arbitrary modifications might destroy NFC conformacy.
  70. class nfc_string : public boost::totally_ordered2<nfc_string, std::basic_string_view<CharT>>
  71. {
  72. public:
  73. typedef std::basic_string<CharT> String;
  74. typedef std::basic_string_view<CharT> StringView;
  75. /// only const_reference is supported.
  76. typedef typename String::const_reference const_reference;
  77. typedef typename String::const_pointer const_pointer;
  78. /// only forward iterator. Does a backward_iterator make sense in UTF-encoded strings?
  79. typedef typename String::const_iterator const_iterator;
  80. static
  81. constexpr size_t npos = String::npos;
  82. explicit nfc_string(StringView src);
  83. explicit nfc_string(String && src);
  84. /// construct from a NUL-terminated src
  85. explicit nfc_string(const CharT* src)
  86. : nfc_string{ StringView{src} }
  87. {}
  88. nfc_string(const CharT* src, size_t length)
  89. : nfc_string{ StringView{src, length} }
  90. {}
  91. nfc_string() = default;
  92. nfc_string(const nfc_string& src) = default;
  93. nfc_string( nfc_string&& src) = default;
  94. nfc_string& operator=(const nfc_string& src) = default;
  95. nfc_string& operator=( nfc_string&& src) = default;
  96. nfc_string& assign(StringView src);
  97. nfc_string& assign(String && src);
  98. nfc_string& assign(const CharT* src) { return this->assign(StringView{src}); }
  99. nfc_string& operator=(StringView src) { return this->assign(src); }
  100. nfc_string& operator=(String && src) { return this->assign(std::move(src)); }
  101. nfc_string& operator=(const CharT* src) { return this->assign(StringView{src}); }
  102. /// read-only: shares representation
  103. operator const String&() const noexcept { return s; }
  104. /// read-only: shares representation
  105. const String& get() const noexcept { return s;}
  106. /// read write: copy content
  107. operator String() const { return s; }
  108. const CharT* c_str() const noexcept { return s.c_str(); }
  109. const CharT* data() const noexcept { return s.data(); }
  110. std::size_t size() const noexcept { return s.size(); }
  111. bool empty() const noexcept { return s.empty(); }
  112. std::size_t capacity() const noexcept { return s.capacity(); }
  113. void reserve(std::size_t new_capacity) { s.reserve(new_capacity); }
  114. void shrink_to_fit() { s.shrink_to_fit(); }
  115. const_reference operator[](std::size_t ofs) const noexcept { return s[ofs]; }
  116. const_reference at(std::size_t ofs) const { return s.at(ofs); }
  117. const_reference front() const noexcept { return s.front(); }
  118. const_reference back() const noexcept { return s.back(); }
  119. operator StringView() const noexcept { return StringView{s}; }
  120. const_iterator begin() const noexcept { return s.cbegin(); }
  121. const_iterator cbegin() const noexcept { return s.cbegin(); } /// r/o access only
  122. const_iterator end() const noexcept { return s.cend(); }
  123. const_iterator cend() const noexcept { return s.cend(); } /// r/o access only
  124. void clear() { s.clear(); }
  125. /// I am lazy and delegate all the 10 different insert() overloads directly to s.
  126. template<typename... Args>
  127. nfc_string& insert(Args&& ...args)
  128. {
  129. s.insert( std::forward<Args>(args)... );
  130. normalize();
  131. return *this;
  132. }
  133. /// delegates all erase() overloads to s.
  134. template<typename... Args>
  135. nfc_string& erase(Args&& ...args)
  136. {
  137. s.erase( std::forward<Args>(args)... );
  138. normalize();
  139. return *this;
  140. }
  141. nfc_string& push_back(CharT c);
  142. /// delegates all 9 append() overloads to s.
  143. template<typename... Args>
  144. nfc_string& append(Args&& ...args)
  145. {
  146. s.append( std::forward<Args>(args)... );
  147. normalize();
  148. return *this;
  149. }
  150. /// more expensive, because 's' might not be in NFC.
  151. nfc_string& operator+=(StringView s);
  152. /// optimization possible to avoid re-normalization in most cases.
  153. nfc_string& operator+=(const nfc_string& s);
  154. /// optimization possible to avoid re-normalization in most cases.
  155. nfc_string& operator+=(CharT c) { push_back(c); return *this; }
  156. /// delegates all 9 compare() overloads to s
  157. template<typename... Args>
  158. int compare(Args&& ...args) const
  159. {
  160. return s.compare( std::forward<Args>(args)... );
  161. }
  162. /// stolen from C++20
  163. bool starts_with(StringView s) const noexcept;
  164. /// stolen from C++20
  165. bool ends_with(StringView s) const noexcept;
  166. /// delegates all 5 find() overloads to s
  167. template<typename... Args>
  168. std::size_t find(Args&& ...args) const
  169. {
  170. return s.find( std::forward<Args>(args)... );
  171. }
  172. /// might throw illegal_utf, if a multi-char sequence is clipped.
  173. nfc_string substr(std::size_t pos=0, std::size_t count=npos) const;
  174. private:
  175. std::basic_string<CharT> s;
  176. /// (re-)normalize the content string s.
  177. void normalize();
  178. };
  179. };
  180. /// can be more efficient than the operator+() below.
  181. template<class CharT>
  182. typename
  183. UTF<CharT>::nfc_string operator+(
  184. typename UTF<CharT>::nfc_string left,
  185. const typename UTF<CharT>::nfc_string& right);
  186. template<class CharT, class T>
  187. inline
  188. typename
  189. UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string left, const T& right)
  190. {
  191. return left+=right;
  192. }
  193. template<class CharT, class T>
  194. inline
  195. typename
  196. UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string&& left, const T& right)
  197. {
  198. return left+=right;
  199. }
  200. template<class CharT, class T>
  201. inline
  202. typename
  203. UTF<CharT>::nfc_string operator+(const T& left, const typename UTF<CharT>::nfc_string& right)
  204. {
  205. typename UTF<CharT>::nfc_string left_s{left};
  206. return left_s+=right;
  207. }
  208. template<class CharT>
  209. inline
  210. bool operator<(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
  211. {
  212. return left<right;
  213. }
  214. template<class CharT>
  215. inline
  216. bool operator==(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
  217. {
  218. return left==right;
  219. }
  220. /// convenient alias names:
  221. using UTF8 = UTF<char>;
  222. using UTF16 = UTF<char16_t>;
  223. using nfc_string = UTF8::nfc_string;
  224. using nfc_u16string = UTF16::nfc_string;
  225. // throws illegal_utf8 exception if s is not valid UTF-8
  226. void assert_utf8(std::string_view s);
  227. // convert NFD to NFC
  228. std::u32string createNFC(std::u32string nfd_string);
  229. /*
  230. // return No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
  231. // might throw illegal_utf exception
  232. template<class CharT>
  233. IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
  234. // runs first quick check and a deep test if quick check returns "Maybe".
  235. template<class CharT>
  236. bool isNFC(std::basic_string_view<CharT> s);
  237. // returns true if the sequence is valid UTF-8
  238. bool isUtf8(const char* begin, const char* end);
  239. // converts a C++ string (in UTF-8) into NFC form
  240. // s is ''moved'' to the return value if possible so no copy is done here.
  241. template<class CharT>
  242. std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
  243. */
  244. // creates a UTF-8-encoded NFC string from s
  245. std::string toNFC_8(std::u16string_view s);
  246. // convenience functions to avoid ::strdup(pEp::toNFC<char>(text).c_str());
  247. // and unecessary temporary std::string etc.
  248. char* strdup_NFC(std::string_view s);
  249. pEp_identity *identity_dup_NFC(const ::pEp_identity* value);
  250. ::identity_list* identity_list_dup_NFC(const ::identity_list* value);
  251. } // end of namespace pEp
  252. #endif // LIBPEPDATATYPES_NFC_HH