A C++ wrapper for the basic C datatypes defined by the pEpEngine.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

316 lines
9.1 KiB

  1. // This file is under GNU General Public License 3.0
  2. // see LICENSE.txt
  3. #ifndef LIBPEPDATATYPES_NFC_HH
  4. #define LIBPEPDATATYPES_NFC_HH
  5. #include <string_view>
  6. #include <string>
  7. #include <stdexcept>
  8. #include <iosfwd>
  9. #include <boost/operators.hpp>
  10. #include <pEp/identity_list.h>
  11. namespace pEp {
  12. enum class IsNFC
  13. {
  14. No=0, // contains a character that cannot occur in NFC
  15. Maybe=1, // contains a character that is only allowed in certain positions in NFC
  16. Yes=2 // contains no invalid or partially valid character
  17. };
  18. std::ostream& operator<<(std::ostream& o, IsNFC is_nfc);
  19. class illegal_utf : public std::runtime_error
  20. {
  21. public:
  22. illegal_utf( std::string_view, unsigned position, const std::string& reason);
  23. illegal_utf(std::u16string_view, unsigned position, const std::string& reason);
  24. explicit illegal_utf(const std::string& message);
  25. };
  26. /// Common class template to define the same functions for all 3 Unicode Transfer Formats.
  27. template<class CharT>
  28. class UTF
  29. {
  30. public:
  31. /// parses a sequence of input code units into one Unicode code point and updates the input iterator c.
  32. /// \todo change to iterator templates?
  33. static
  34. uint32_t parse(const CharT*& c, const CharT* end);
  35. /// generates a UTF sequence from a given Unicode code point.
  36. template<class OutIter>
  37. static
  38. void generate(const char32_t c, OutIter& out);
  39. /// returns whether the sequence starts with IsNFC==Yes char
  40. static
  41. bool is_safe_NFC_start(std::basic_string_view<CharT> s);
  42. /// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
  43. /// might throw illegal_utf exception
  44. static
  45. IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
  46. /// runs first quick check and a deep test if quick check returns "Maybe".
  47. static
  48. bool isNFC(std::basic_string_view<CharT> s);
  49. /// returns true if the sequence is valid UTF-8
  50. bool isUtf(const CharT* begin, const CharT* end);
  51. /// converts a C++ string (in UTF-8/-16) into NFC form
  52. static
  53. std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
  54. /// calculates the number of "code units" in the target Unicode Transfer Format.
  55. static
  56. size_t utf_length(std::u32string_view s);
  57. /// generates a whole u32string at once
  58. static
  59. std::basic_string<CharT> generate(const std::u32string& s);
  60. /// creates an NFD u32string from UTF-8/UTF-16 input string s
  61. static
  62. std::u32string fromUtf_decompose(std::basic_string_view<CharT> s);
  63. /// class holding a NFC-conform Unicode string.
  64. /// content is mostly read-only, because arbitrary modifications might destroy NFC conformacy.
  65. class nfc_string : public boost::totally_ordered2<nfc_string, std::basic_string_view<CharT>>
  66. {
  67. public:
  68. typedef std::basic_string<CharT> String;
  69. typedef std::basic_string_view<CharT> StringView;
  70. /// only const_reference is supported.
  71. typedef typename String::const_reference const_reference;
  72. typedef typename String::const_pointer const_pointer;
  73. /// only forward iterator. Does a backward_iterator make sense in UTF-encoded strings?
  74. typedef typename String::const_iterator const_iterator;
  75. static const size_t npos = String::npos;
  76. explicit nfc_string(StringView src);
  77. explicit nfc_string(String && src);
  78. /// construct from a NUL-terminated src
  79. explicit nfc_string(const CharT* src)
  80. : nfc_string{ StringView{src} }
  81. {}
  82. nfc_string(const CharT* src, size_t length)
  83. : nfc_string{ StringView{src, length} }
  84. {}
  85. nfc_string(const nfc_string& src) = default;
  86. nfc_string( nfc_string&& src) = default;
  87. nfc_string& operator=(const nfc_string& src) = default;
  88. nfc_string& operator=( nfc_string&& src) = default;
  89. nfc_string& assign(StringView src);
  90. nfc_string& assign(String && src);
  91. /// read-only: shares representation
  92. operator const String&() const noexcept { return s; }
  93. const String& get() const noexcept { return s;}
  94. /// read write: copy content
  95. operator String() const { return s; }
  96. const CharT* c_str() const noexcept { return s.c_str(); }
  97. const CharT* data() const noexcept { return s.data(); }
  98. std::size_t size() const noexcept { return s.size(); }
  99. bool empty() const noexcept { return s.empty(); }
  100. std::size_t capacity() const noexcept { return s.capacity(); }
  101. void reserve(std::size_t new_capacity) { s.reserve(new_capacity); }
  102. void shrink_to_fit() { s.shrink_to_fit(); }
  103. const_reference operator[](std::size_t ofs) const noexcept { return s[ofs]; }
  104. const_reference at(std::size_t ofs) const { return s.at(ofs); }
  105. const_reference front() const noexcept { return s.front(); }
  106. const_reference back() const noexcept { return s.back(); }
  107. operator StringView() const noexcept { return StringView{s}; }
  108. const_iterator begin() const noexcept { return s.cbegin(); }
  109. const_iterator cbegin() const noexcept { return s.cbegin(); } /// r/o access only
  110. const_iterator end() const noexcept { return s.cend(); }
  111. const_iterator cend() const noexcept { return s.cend(); } /// r/o access only
  112. void clear() { s.clear(); }
  113. /// I am lazy and delegate all the 10 different insert() overloads directly to s.
  114. template<typename... Args>
  115. nfc_string& insert(Args&& ...args)
  116. {
  117. s.insert( std::forward<Args>(args)... );
  118. normalize();
  119. return *this;
  120. }
  121. /// delegates all erase() overloads to s.
  122. template<typename... Args>
  123. nfc_string& erase(Args&& ...args)
  124. {
  125. s.erase( std::forward<Args>(args)... );
  126. normalize();
  127. return *this;
  128. }
  129. nfc_string& push_back(CharT c);
  130. /// delegates all 9 append() overloads to s.
  131. template<typename... Args>
  132. nfc_string& append(Args&& ...args)
  133. {
  134. s.append( std::forward<Args>(args)... );
  135. normalize();
  136. return *this;
  137. }
  138. nfc_string& operator+=(StringView s);
  139. /// optimization possible to avoid re-normalization in most cases.
  140. nfc_string& operator+=(const nfc_string& s);
  141. /// optimization possible to avoid re-normalization in most cases.
  142. nfc_string& operator+=(CharT c) { push_back(c); return *this; }
  143. /// delegates all 9 compare() overloads to s
  144. template<typename... Args>
  145. int compare(Args&& ...args) const
  146. {
  147. return s.compare( std::forward<Args>(args)... );
  148. }
  149. /// stolen from C++20
  150. bool starts_with(StringView s) const noexcept;
  151. /// stolen from C++20
  152. bool ends_with(StringView s) const noexcept;
  153. /// delegates all 5 find() overloads to s
  154. template<typename... Args>
  155. std::size_t find(Args&& ...args) const
  156. {
  157. return s.find( std::forward<Args>(args)... );
  158. }
  159. /// might throw illegal_utf, if a multi-char sequence is clipped.
  160. nfc_string substr(std::size_t pos=0, std::size_t count=npos) const;
  161. private:
  162. std::basic_string<CharT> s;
  163. /// (re-)normalize the content string s.
  164. void normalize();
  165. };
  166. };
  167. /// can be more efficient than the operator+() below.
  168. template<class CharT>
  169. typename
  170. UTF<CharT>::nfc_string operator+(
  171. typename UTF<CharT>::nfc_string left,
  172. const typename UTF<CharT>::nfc_string& right);
  173. template<class CharT, class T>
  174. inline
  175. typename
  176. UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string left, const T& right)
  177. {
  178. return left+=right;
  179. }
  180. template<class CharT, class T>
  181. inline
  182. typename
  183. UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string&& left, const T& right)
  184. {
  185. return left+=right;
  186. }
  187. template<class CharT, class T>
  188. inline
  189. typename
  190. UTF<CharT>::nfc_string operator+(const T& left, const typename UTF<CharT>::nfc_string& right)
  191. {
  192. typename UTF<CharT>::nfc_string left_s{left};
  193. return left_s+=right;
  194. }
  195. template<class CharT>
  196. inline
  197. bool operator<(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
  198. {
  199. return left<right;
  200. }
  201. template<class CharT>
  202. inline
  203. bool operator==(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
  204. {
  205. return left==right;
  206. }
  207. /// convenient alias names:
  208. using UTF8 = UTF<char>;
  209. using UTF16 = UTF<char16_t>;
  210. using nfc_string = UTF8::nfc_string;
  211. using nfc_u16string = UTF16::nfc_string;
  212. // throws illegal_utf8 exception if s is not valid UTF-8
  213. void assert_utf8(std::string_view s);
  214. // convert NFD to NFC
  215. std::u32string createNFC(std::u32string nfd_string);
  216. /*
  217. // return No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
  218. // might throw illegal_utf exception
  219. template<class CharT>
  220. IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
  221. // runs first quick check and a deep test if quick check returns "Maybe".
  222. template<class CharT>
  223. bool isNFC(std::basic_string_view<CharT> s);
  224. // returns true if the sequence is valid UTF-8
  225. bool isUtf8(const char* begin, const char* end);
  226. // converts a C++ string (in UTF-8) into NFC form
  227. // s is ''moved'' to the return value if possible so no copy is done here.
  228. template<class CharT>
  229. std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
  230. */
  231. // creates a UTF-8-encoded NFC string from s
  232. std::string toNFC_8(std::u16string_view s);
  233. // convenience functions to avoid ::strdup(pEp::toNFC<char>(text).c_str());
  234. // and unecessary temporary std::string etc.
  235. char* strdup_NFC(std::string_view s);
  236. pEp_identity *identity_dup_NFC(const ::pEp_identity* value);
  237. ::identity_list* identity_list_dup_NFC(const ::identity_list* value);
  238. } // end of namespace pEp
  239. #endif // LIBPEPDATATYPES_NFC_HH