A C++ wrapper for the basic C datatypes defined by the pEpEngine.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

887 lines
18 KiB

  1. // This file is under GNU General Public License 3.0
  2. // see LICENSE.txt
  3. // converts a C++ string into NFC form
  4. #include "nfc.hh"
  5. #include <cstdint>
  6. #include <set>
  7. #include <ostream>
  8. #include <algorithm>
  9. #include "nfc_sets.hh"
  10. #include <pEp/pEp_string.h>
  11. namespace
  12. {
  13. // unicode to hex string
  14. std::string u2h(unsigned u)
  15. {
  16. char buf[16] = {0};
  17. snprintf(buf, 15, "<U+%04X>", u );
  18. return buf;
  19. }
  20. // octet to hex string
  21. std::string o2h(uint8_t octet)
  22. {
  23. char buf[16] = {0};
  24. snprintf(buf, 15, "0x%02hhX", octet);
  25. return buf;
  26. }
  27. // hex string of a 16-bit value
  28. std::string hex16(char16_t u)
  29. {
  30. char buf[16] = {0};
  31. snprintf(buf, 15, "0x%04X", u);
  32. return buf;
  33. }
  34. class utf_exception
  35. {
  36. public:
  37. utf_exception(uint16_t u) : octet(u), value(u) {}
  38. virtual ~utf_exception() = default;
  39. virtual std::string reason() const = 0;
  40. uint8_t octet;
  41. uint16_t value;
  42. };
  43. class cont_without_start : public utf_exception
  44. {
  45. public:
  46. cont_without_start(uint8_t u) : utf_exception(u) {}
  47. std::string reason() const override { return "Continuation octet " + o2h(octet) + " without start octet"; }
  48. };
  49. class overlong_sequence : public utf_exception
  50. {
  51. public:
  52. overlong_sequence(uint8_t octet, unsigned u) : utf_exception(octet), unicode(u) {}
  53. std::string reason() const override { return "Overlong sequence for " + u2h(unicode); }
  54. unsigned unicode;
  55. };
  56. class unexpected_end : public utf_exception
  57. {
  58. public:
  59. unexpected_end(uint8_t u) : utf_exception(u) {}
  60. std::string reason() const override { return "Unexpected end of string"; }
  61. };
  62. class surrogate : public utf_exception
  63. {
  64. public:
  65. surrogate(uint8_t u, unsigned s) : utf_exception(u), surr(s) {}
  66. std::string reason() const override { return "UTF-8-encoded UTF-16 surrogate " + u2h(surr) + " detected"; }
  67. private:
  68. unsigned surr;
  69. };
  70. class no_unicode : public utf_exception
  71. {
  72. public:
  73. explicit no_unicode(uint8_t _octet) : utf_exception(_octet) {}
  74. std::string reason() const override { return "Octet " + o2h(octet) + " is illegal in UTF-8"; }
  75. };
  76. class too_big : public utf_exception
  77. {
  78. public:
  79. explicit too_big(uint8_t _octet, unsigned u) : utf_exception(_octet), unicode(u) {}
  80. std::string reason() const override { return "Value " + u2h(unicode) + " is too big for Unicode"; }
  81. unsigned unicode;
  82. };
  83. class unexpected_surrogate : public utf_exception
  84. {
  85. public:
  86. explicit unexpected_surrogate(char16_t c) : utf_exception(c) {}
  87. std::string reason() const override { return "Unexpected surogate " + hex16(value); }
  88. };
  89. class missing_low_surrogate : public utf_exception
  90. {
  91. public:
  92. explicit missing_low_surrogate(char16_t c, char16_t _surr) : utf_exception(c), surr(_surr) {}
  93. std::string reason() const override { return "Non-low surrogate value " + hex16(value) + " is unexpected after high surogate " + hex16(surr); }
  94. private:
  95. char16_t surr;
  96. };
  97. std::string escape(std::string_view s)
  98. {
  99. std::string ret; ret.reserve(s.size() + 16 );
  100. for(char c : s)
  101. {
  102. const uint8_t u = c;
  103. if(u>=32 && u<=126)
  104. {
  105. ret += c;
  106. }else{
  107. char buf[16];
  108. snprintf(buf,15, "«%02x»", u );
  109. ret += buf;
  110. }
  111. }
  112. return ret;
  113. }
  114. std::string escape(std::u16string_view s)
  115. {
  116. std::string ret; ret.reserve(s.size() + 16 );
  117. for(char16_t c : s)
  118. {
  119. if(c>=32 && c<=126)
  120. {
  121. ret += char(c);
  122. }else{
  123. char buf[16];
  124. snprintf(buf,15, "«%04x»", c );
  125. ret += buf;
  126. }
  127. }
  128. return ret;
  129. }
  130. // returns the "CanonicalCombinincClass" of the given Unicode codpoint u
  131. unsigned canonicalClass(unsigned u)
  132. {
  133. const auto q = NFC_CombiningClass.find(u);
  134. if(q==NFC_CombiningClass.end())
  135. {
  136. return 0; // not found in map.
  137. }else{
  138. return q->second;
  139. }
  140. }
  141. std::pair<int,int> decompose(unsigned u)
  142. {
  143. const auto q = NFC_Decompose.find(u);
  144. if(q==NFC_Decompose.end())
  145. {
  146. return std::make_pair(-1, -1);
  147. }else{
  148. return q->second;
  149. }
  150. }
  151. std::u32string decompose_full(unsigned u)
  152. {
  153. const std::pair<int,int> d = decompose(u);
  154. if(d.first<0)
  155. {
  156. return std::u32string( 1, char32_t(u) );
  157. }else{
  158. if(d.second<0)
  159. {
  160. return decompose_full(d.first);
  161. }
  162. }
  163. return decompose_full(d.first) + decompose_full(d.second);
  164. }
  165. // according to Unicode Standard, clause D108:
  166. bool isReorderablePair(unsigned a, unsigned b)
  167. {
  168. const unsigned cca = canonicalClass(a);
  169. const unsigned ccb = canonicalClass(b);
  170. return (cca > ccb) && (ccb>0);
  171. }
  172. // Unicode standard requires bubble sort, for stability reasons?
  173. void canonicalOrdering(std::u32string& us)
  174. {
  175. if(us.size()<2)
  176. return;
  177. for(unsigned n=us.size(); n>1; --n)
  178. for(unsigned i=0; i<n-1; ++i)
  179. {
  180. char32_t& a = us[i];
  181. char32_t& b = us[i+1];
  182. if( isReorderablePair(a,b) )
  183. {
  184. std::swap(a,b);
  185. }
  186. }
  187. }
  188. } // end of anonymous namespace
  189. namespace pEp {
  190. std::string escape_utf16(std::u16string_view s)
  191. {
  192. return escape(s);
  193. }
  194. std::ostream& operator<<(std::ostream& o, IsNFC is_nfc)
  195. {
  196. switch(is_nfc)
  197. {
  198. case IsNFC::No : return o << "No";
  199. case IsNFC::Maybe : return o << "Maybe";
  200. case IsNFC::Yes : return o << "Yes";
  201. }
  202. throw std::logic_error("Unknown value of IsNFC");
  203. }
  204. uint32_t parseUtf8(const char*& c, const char* end)
  205. {
  206. while(c<end)
  207. {
  208. const uint8_t u = uint8_t(*c);
  209. if (u<=0x7f)
  210. {
  211. return u;
  212. } else if (u<=0xBF)
  213. {
  214. throw cont_without_start(u);
  215. } else if (u<=0xC1) // 0xC0, 0xC1 would form "overlong sequences" and are therefore always illegal in UTF-8
  216. {
  217. throw no_unicode(u);
  218. } else if (u<=0xDF) // 2 octet sequence
  219. {
  220. ++c;
  221. if(c==end) throw unexpected_end(u);
  222. const uint8_t uu = uint8_t(*c);
  223. if((uu & 0xC0) != 0x80)
  224. {
  225. throw unexpected_end(uu);
  226. }
  227. return ((u & 0x1F) << 6) + (uu & 0x3F);
  228. } else if (u<=0xEF) // 3 octet sequence
  229. {
  230. ++c;
  231. if(c==end) throw unexpected_end(u);
  232. const uint8_t uu = uint8_t(*c);
  233. if((uu & 0xC0) != 0x80)
  234. {
  235. throw unexpected_end(uu);
  236. }
  237. ++c;
  238. if(c==end) throw unexpected_end(uu);
  239. const uint8_t uuu = uint8_t(*c);
  240. if((uuu & 0xC0) != 0x80)
  241. {
  242. throw unexpected_end(uuu);
  243. }
  244. const uint32_t ret = ((u & 0xF) << 12) + ((uu & 0x3F)<<6) + (uuu & 0x3F);
  245. if(ret<0x800) throw overlong_sequence(u, ret);
  246. if(ret>=0xD800 && ret<=0xDFFF) throw surrogate(u, ret);
  247. return ret;
  248. } else if (u<=0xF4) // 4 octet sequence
  249. {
  250. ++c;
  251. if(c==end) throw unexpected_end(u);
  252. const uint8_t uu = uint8_t(*c);
  253. if((uu & 0xC0) != 0x80)
  254. {
  255. throw unexpected_end(uu);
  256. }
  257. ++c;
  258. if(c==end) throw unexpected_end(uu);
  259. const uint8_t uuu = uint8_t(*c);
  260. if((uuu & 0xC0) != 0x80)
  261. {
  262. throw unexpected_end(uuu);
  263. }
  264. ++c;
  265. if(c==end) throw unexpected_end(uuu);
  266. const uint8_t uuuu = uint8_t(*c);
  267. if((uuuu & 0xC0) != 0x80)
  268. {
  269. throw unexpected_end(uuuu);
  270. }
  271. const uint32_t ret = ((u & 0xF) << 18) + ((uu & 0x3F)<<12) + ((uuu & 0x3F)<<6) + (uuuu & 0x3F);
  272. if(ret<0x10000) throw overlong_sequence(u, ret);
  273. if(ret>0x10FFFF) throw too_big(u, ret);
  274. return ret;
  275. } else
  276. {
  277. throw no_unicode(u);
  278. }
  279. }
  280. throw unexpected_end(-1);
  281. }
  282. uint32_t parseUtf16(const char16_t*& c, const char16_t* end)
  283. {
  284. while(c<end)
  285. {
  286. const char16_t u = *c;
  287. if(u<0xD800 || u>=0xE000)
  288. {
  289. return u;
  290. }else{
  291. if(u>=0xDC00)
  292. {
  293. throw unexpected_surrogate(u);
  294. }
  295. ++c;
  296. if(c==end) throw unexpected_end(u);
  297. const uint16_t low = *c;
  298. if(low < 0xDC00 || low > 0xDFFF)
  299. {
  300. throw missing_low_surrogate(low, u);
  301. }
  302. return (u-0xD800) * 1024 + (low-0xDC00) + 0x10000;
  303. }
  304. }
  305. throw unexpected_end(-1);
  306. }
  307. template<>
  308. uint32_t UTF<char>::parse(const char*& c, const char* end)
  309. {
  310. return parseUtf8(c,end);
  311. }
  312. template<>
  313. uint32_t UTF<char16_t>::parse(const char16_t*& c, const char16_t* end)
  314. {
  315. return parseUtf16(c,end);
  316. }
  317. template<>
  318. template<class OutIter>
  319. void UTF<char>::generate(const char32_t c, OutIter& out)
  320. {
  321. if(c<=0x7F)
  322. {
  323. *out++ = char(c);
  324. }else if(c<=0x7FF)
  325. {
  326. *out++ = char( 0xC0 + (c>>6) );
  327. *out++ = char( 0x80 + (c & 63));
  328. }else if(c<=0xFFFF)
  329. {
  330. if(c>=0xD800 && c<=0xDFFF)
  331. {
  332. throw unexpected_surrogate(c);
  333. }
  334. *out++ = char( 0xE0 + (c>>12) );
  335. *out++ = char( 0x80 + ((c>>6) & 63));
  336. *out++ = char( 0x80 + (c & 63));
  337. }else if(c<=0x10FFFF)
  338. {
  339. *out++ = char( 0xF0 + (c>>18) );
  340. *out++ = char( 0x80 + ((c>>12) & 63));
  341. *out++ = char( 0x80 + ((c>>6) & 63));
  342. *out++ = char( 0x80 + (c & 63));
  343. }else{
  344. throw too_big(0, c);
  345. }
  346. }
  347. template<>
  348. template<class OutIter>
  349. void UTF<char16_t>::generate(const char32_t c, OutIter& out)
  350. {
  351. if(c <= 0xFFFF)
  352. {
  353. if(c>=0xD800 && c<=0xDFFF)
  354. {
  355. throw unexpected_surrogate(c);
  356. }else{
  357. *out++ = char16_t(c);
  358. }
  359. }else{ // surrogate pair
  360. if(c>0x10FFFF)
  361. {
  362. throw too_big(0, c);
  363. }else{
  364. const uint32_t c_reduced = c - 0x10000;
  365. *out++ = char16_t(0xD800 + (c_reduced >> 10)); // High Surrogate
  366. *out++ = char16_t(0xDC00 + (c_reduced & 0x3FF)); // Low Surrogate
  367. }
  368. }
  369. }
  370. template<class CharT>
  371. std::basic_string<CharT> UTF<CharT>::generate(const std::u32string& u32)
  372. {
  373. std::basic_string<CharT> ret;
  374. auto out = std::back_inserter(ret);
  375. for(char32_t c : u32)
  376. {
  377. generate(c, out);
  378. }
  379. return ret;
  380. }
  381. illegal_utf::illegal_utf( std::string_view s, unsigned position, const std::string& reason)
  382. : std::runtime_error( "Illegal UTF-8 string \"" + escape(s) + "\" at position " + std::to_string(position) + ": " + reason )
  383. {}
  384. illegal_utf::illegal_utf( std::u16string_view s, unsigned position, const std::string& reason)
  385. : std::runtime_error( "Illegal UTF-16 string \"" + escape(s) + "\" at position " + std::to_string(position) + ": " + reason )
  386. {}
  387. illegal_utf::illegal_utf( const std::string& msg )
  388. : std::runtime_error( msg )
  389. {}
  390. void assert_utf8(std::string_view s)
  391. {
  392. const char* begin = s.data();
  393. const char* const end = s.data() + s.size();
  394. try
  395. {
  396. while(begin<end)
  397. {
  398. UTF8::parse(begin, end); // ignore the output
  399. ++begin;
  400. }
  401. }
  402. catch(const utf_exception& e)
  403. {
  404. throw illegal_utf(s, begin - s.data(), e.reason());
  405. }
  406. }
  407. // creates a NFD string from s
  408. template<class CharT>
  409. std::u32string UTF<CharT>::fromUtf_decompose(std::basic_string_view<CharT> s)
  410. {
  411. std::u32string u32s;
  412. u32s.reserve( static_cast<std::size_t>(s.size()*1.25) );
  413. const CharT* begin = s.data();
  414. const CharT* end = s.data() + s.size();
  415. for(; begin<end; ++begin)
  416. {
  417. unsigned u = parse(begin, end);
  418. u32s += decompose_full(u);
  419. }
  420. canonicalOrdering(u32s); // works inplace.
  421. return u32s;
  422. }
  423. template<class Iter>
  424. bool blocked(Iter L, Iter C)
  425. {
  426. Iter B = L; ++B;
  427. for(;B!=C;++B)
  428. {
  429. if(canonicalClass(*B)==0 || canonicalClass(*B)==canonicalClass(*C))
  430. return true;
  431. }
  432. return false;
  433. }
  434. template<class Iter>
  435. void combine(std::u32string& nfc, Iter starter, Iter next_starter)
  436. {
  437. Iter c = starter; ++c;
  438. for(;c!=next_starter; ++c)
  439. {
  440. if(!blocked(starter, c))
  441. {
  442. const unsigned starter_u = *starter;
  443. const unsigned c_u = *c;
  444. auto q = NFC_Compose.find( std::make_pair(starter_u,c_u) );
  445. if(q!=NFC_Compose.end())
  446. {
  447. *starter = q->second;
  448. *c = -1;
  449. }
  450. }
  451. }
  452. // now add the remaining/changed characters to the NFC string:
  453. for(Iter c = starter; c!=next_starter; ++c)
  454. {
  455. if( int(*c) >= 0)
  456. {
  457. nfc += *c;
  458. }
  459. }
  460. }
  461. // the nfd string is changed during composing process. So it works on a copy or call with std::move().
  462. std::u32string createNFC(std::u32string nfd)
  463. {
  464. if(nfd.size()<=1)
  465. return nfd;
  466. std::u32string nfc;
  467. nfc.reserve(nfd.size());
  468. auto starter = nfd.begin();
  469. while( starter != nfd.end() )
  470. {
  471. if( canonicalClass(*starter)!=0 )
  472. {
  473. nfc += *starter;
  474. ++starter;
  475. }else{
  476. auto next_starter = std::find_if(starter+1, nfd.end(), [](char32_t c){return canonicalClass(c)==0;} );
  477. combine(nfc, starter, next_starter);
  478. starter = next_starter;
  479. }
  480. }
  481. return nfc;
  482. }
  483. template<class CharT>
  484. bool UTF<CharT>::is_safe_NFC_start(std::basic_string_view<CharT> s)
  485. {
  486. if(s.empty() || (s[0] & 0x80)==0 ) // shortcut for empty string or starts with ASCII char
  487. {
  488. return true;
  489. }
  490. const CharT* begin = s.data();
  491. const CharT* const end = s.data() + s.size();
  492. try
  493. {
  494. const uint32_t u = parse(begin, end);
  495. if(NFC_No.count(u)) return false;
  496. if(NFC_Maybe.count(u)) return false;
  497. return true;
  498. }
  499. catch(const utf_exception& ue)
  500. {
  501. throw illegal_utf(s, begin-s.data(), ue.reason());
  502. }
  503. }
  504. template<class CharT>
  505. IsNFC UTF<CharT>::isNFC_quick_check(std::basic_string_view<CharT> s)
  506. {
  507. const CharT* begin = s.data();
  508. const CharT* const end = s.data() + s.size();
  509. try
  510. {
  511. unsigned last_cc = 0;
  512. while(begin<end)
  513. {
  514. const uint32_t u = parse(begin, end);
  515. const unsigned cc = canonicalClass(u);
  516. if( (cc!=0) && (last_cc > cc) )
  517. {
  518. return IsNFC::No;
  519. }
  520. if(NFC_No.count(u)) return IsNFC::No;
  521. if(NFC_Maybe.count(u)) return IsNFC::Maybe;
  522. ++begin;
  523. last_cc = cc;
  524. }
  525. }
  526. catch(const utf_exception& e)
  527. {
  528. throw illegal_utf(s, begin - s.data(), e.reason());
  529. }
  530. return IsNFC::Yes;
  531. }
  532. template<class CharT>
  533. bool UTF<CharT>::isNFC(std::basic_string_view<CharT> s)
  534. {
  535. switch( isNFC_quick_check(s) )
  536. {
  537. case IsNFC::Yes : return true;
  538. case IsNFC::No : return false;
  539. case IsNFC::Maybe:
  540. {
  541. return s == toNFC(s); // very expensive!
  542. }
  543. }
  544. throw -1; // could never happen, but compiler is too dumb to see this.
  545. }
  546. template<>
  547. bool UTF<char>::isUtf(const char* begin, const char* end)
  548. try{
  549. for(; begin<end; ++begin)
  550. {
  551. (void)parse(begin, end);
  552. }
  553. return true;
  554. }catch(const illegal_utf&)
  555. {
  556. return false;
  557. }
  558. // s is ''moved'' to the return value if possible so no copy is done here.
  559. template<class CharT>
  560. std::basic_string<CharT> UTF<CharT>::toNFC(std::basic_string_view<CharT> s)
  561. {
  562. if(isNFC_quick_check(s)==IsNFC::Yes)
  563. return std::basic_string<CharT>{s};
  564. return generate( createNFC( fromUtf_decompose(s) ));
  565. }
  566. template<>
  567. size_t UTF<char>::utf_length(std::u32string_view s)
  568. {
  569. size_t len = 0;
  570. for(const char32_t c : s)
  571. {
  572. if(c <= 0x7f)
  573. {
  574. len += 1;
  575. }else if(c<=0x7ff)
  576. {
  577. len += 2;
  578. }else if(c<=0xffff)
  579. {
  580. if(c>=0xD800 && c<=0xDFFF)
  581. {
  582. throw unexpected_surrogate(c);
  583. }
  584. len += 3;
  585. }else if(c<=0x10ffff)
  586. {
  587. len += 4;
  588. }else{
  589. throw too_big(0, c);
  590. }
  591. }
  592. return len;
  593. }
  594. template<>
  595. size_t UTF<char16_t>::utf_length(std::u32string_view s)
  596. {
  597. size_t len = 0;
  598. for(const char32_t c : s)
  599. {
  600. if(c <= 0xffff)
  601. {
  602. if(c>=0xD800 && c<=0xDFFF)
  603. {
  604. throw unexpected_surrogate(c);
  605. }
  606. len += 1;
  607. }else if(c<=0x10ffff)
  608. {
  609. len += 2;
  610. }else{
  611. throw too_big(0, c);
  612. }
  613. }
  614. return len;
  615. }
  616. template<class CharT>
  617. UTF<CharT>::nfc_string::nfc_string(StringView src)
  618. : s{ UTF<CharT>::toNFC(src) }
  619. {}
  620. template<class CharT>
  621. UTF<CharT>::nfc_string::nfc_string(String&& src)
  622. : s{ isNFC_quick_check(src)==IsNFC::Yes ? std::move(src) : toNFC(src) }
  623. {}
  624. template<class CharT>
  625. typename
  626. UTF<CharT>::nfc_string& UTF<CharT>::nfc_string::assign(StringView src)
  627. {
  628. s = toNFC(src);
  629. return *this;
  630. }
  631. template<class CharT>
  632. typename
  633. UTF<CharT>::nfc_string& UTF<CharT>::nfc_string::assign(String&& src)
  634. {
  635. s = (isNFC_quick_check(src)==IsNFC::Yes) ? std::move(src) : toNFC(src);
  636. return *this;
  637. }
  638. template<class CharT>
  639. typename
  640. UTF<CharT>::nfc_string& UTF<CharT>::nfc_string::push_back(CharT c)
  641. {
  642. s += c;
  643. if( !is_safe_NFC_start(StringView{&c, 1}) )
  644. {
  645. normalize();
  646. }
  647. return *this;
  648. }
  649. template<class CharT>
  650. typename
  651. UTF<CharT>::nfc_string& UTF<CharT>::nfc_string::operator+=(StringView sv)
  652. {
  653. const String& sv_nfc = toNFC(sv);
  654. s += sv_nfc;
  655. if( !is_safe_NFC_start(sv_nfc) )
  656. {
  657. normalize();
  658. }
  659. return *this;
  660. }
  661. template<class CharT>
  662. typename
  663. UTF<CharT>::nfc_string& UTF<CharT>::nfc_string::operator+=(const UTF<CharT>::nfc_string& ns)
  664. {
  665. s += ns.get();
  666. if( !is_safe_NFC_start(ns) )
  667. {
  668. normalize();
  669. }
  670. return *this;
  671. }
  672. template<class CharT>
  673. bool UTF<CharT>::nfc_string::starts_with(StringView sv) const noexcept
  674. {
  675. return (s.size() >= sv.size())
  676. && (StringView{s.data(), sv.size()} == sv);
  677. }
  678. template<class CharT>
  679. bool UTF<CharT>::nfc_string::ends_with(StringView sv) const noexcept
  680. {
  681. return (s.size() >= sv.size())
  682. && (StringView{s.data() + s.size() - sv.size(), sv.size()} == sv);
  683. }
  684. template<class CharT>
  685. typename
  686. UTF<CharT>::nfc_string UTF<CharT>::nfc_string::substr(std::size_t pos, std::size_t count) const
  687. {
  688. return nfc_string{s.substr(pos,count)};
  689. }
  690. template<class CharT>
  691. void UTF<CharT>::nfc_string::normalize()
  692. {
  693. if(isNFC_quick_check(s) != IsNFC::Yes)
  694. {
  695. s = generate( createNFC( fromUtf_decompose(s) ));
  696. }
  697. }
  698. // convenience function to avoid ::strdup(pEp::toNFC<char>(text).c_str());
  699. // and unecessary temporary std::string etc.
  700. char* strdup_NFC(std::string_view s)
  701. {
  702. if(UTF8::isNFC_quick_check(s)==IsNFC::Yes)
  703. return ::new_string(s.data(), s.size());
  704. // implement the hard way more efficient
  705. const std::u32string& u32 = createNFC( UTF8::fromUtf_decompose(s) );
  706. const size_t out_len = UTF8::utf_length(u32);
  707. char* ret = ::new_string(nullptr, out_len );
  708. char* iter{ret};
  709. for(const char32_t c : u32)
  710. {
  711. UTF8::generate(c, iter);
  712. }
  713. if(iter > ret+out_len) // should never happen. ;)
  714. {
  715. throw std::logic_error("internal error: strdup_NFC() exceeded output string size");
  716. }
  717. return ret;
  718. }
  719. pEp_identity *identity_dup_NFC(const ::pEp_identity* value)
  720. {
  721. ::pEp_identity* result = (::pEp_identity*) malloc(sizeof(::pEp_identity));
  722. if (!result)
  723. throw std::bad_alloc();
  724. memcpy(result, value, sizeof(::pEp_identity));
  725. result->address = pEp::strdup_NFC(value->address);
  726. result->fpr = pEp::strdup_NFC(value->fpr);
  727. result->user_id = pEp::strdup_NFC(value->user_id);
  728. result->username = pEp::strdup_NFC(value->username);
  729. return result;
  730. }
  731. ::identity_list* identity_list_dup_NFC(const ::identity_list* value)
  732. {
  733. ::identity_list* result = ::new_identity_list(nullptr);
  734. if (!result)
  735. throw std::bad_alloc();
  736. const ::identity_list* il = value;
  737. ::identity_list* ir = result;
  738. for (; il && il->ident; il = il->next) {
  739. ir = ::identity_list_add(ir, identity_dup_NFC(il->ident));
  740. if (!ir)
  741. throw std::bad_alloc();
  742. }
  743. return result;
  744. }
  745. template class UTF<char>;
  746. template class UTF<char16_t>;
  747. // used only to initialize the NFC Compose mapping:
  748. std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose()
  749. {
  750. std::map< std::pair<unsigned, unsigned>, unsigned> m;
  751. for(const auto& decomp : NFC_Decompose)
  752. {
  753. if(decomp.second.second >= 0) // skip singleton decompositions
  754. {
  755. m[ decomp.second ] = decomp.first;
  756. }
  757. }
  758. return m;
  759. }
  760. } // end of namespace pEp