p≡p MIME library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

270 lines
8.6 KiB

2 years ago
2 years ago
2 years ago
2 years ago
  1. // This file is under GNU General Public License 3.0
  2. // see LICENSE.txt
  3. #include "bodyparser.hh"
  4. #include "pEpMIME_internal.hh"
  5. #include "mime_headers.hh"
  6. #include "message.hh"
  7. #include "rules.hh"
  8. #include "base64.hxx"
  9. #include "headerparser.hh" // for add_opt_field()
  10. #include "quoted_printable.hxx"
  11. #include "string_case.hh"
  12. #include "nfc.hh"
  13. #include "to_utf8.hh"
  14. #include <pEp/pEp_string.h>
  15. #include <boost/spirit/include/qi.hpp>
  16. #include <boost/spirit/include/phoenix.hpp>
  17. #include <boost/fusion/include/adapt_struct.hpp>
  18. namespace qi = boost::spirit::qi;
  19. namespace px = boost::phoenix;
  20. namespace pEpMIME
  21. {
  22. typedef std::vector<Message> MultipartMessage;
  23. MultipartMessage parse_multipart(const BodyLines& body, const sv& boundary)
  24. {
  25. bool is_last = false;
  26. qi::rule<const char*, qi::unused_type()> is_delimiter_parser = qi::lit("--") >> qi::lit(boundary.data())
  27. >> -qi::lit("--")[ px::ref(is_last) = true] >> qi::omit[*qi::char_(" \t")];
  28. MultipartMessage vm;
  29. bool after_preamble = false;
  30. BodyLines part;
  31. LOG << "Parse_Multipart: " << body.size() << " body lines. bounardy=“" << boundary << "”. \n";
  32. for(const auto& line : body)
  33. {
  34. is_last = false;
  35. auto begin = line.cbegin();
  36. const bool is_delimiter = qi::parse( begin, line.cend(), is_delimiter_parser );
  37. if(is_delimiter)
  38. {
  39. LOG << "\t Line “" << line << "” is " << (is_last ? "LAST ":"") << "delimiter!\n";
  40. if(after_preamble)
  41. {
  42. vm.emplace_back( part );
  43. part.clear();
  44. }else{
  45. after_preamble = true;
  46. }
  47. if(is_last == true)
  48. {
  49. break;
  50. }
  51. }else{
  52. if(after_preamble)
  53. {
  54. part.emplace_back(line);
  55. }
  56. }
  57. }
  58. return vm;
  59. }
  60. char* create_string(const BodyLines& body, const sv& charset, MimeHeaders::Decoder decoder)
  61. {
  62. if(body.empty())
  63. return nullptr;
  64. size_t decoded_size = 0;
  65. char* decoded = decoder(body, decoded_size);
  66. LOG << "CREATE_STRING: " << body.size() << " body lines into " << decoded_size << " raw octets (charset=\"" << charset << "\")\n";
  67. if(charset=="UTF-8" || charset=="UTF8")
  68. {
  69. // Move all NUL bytes to the end where they don't hurt.
  70. std::remove(decoded, decoded + decoded_size, '\0' );
  71. return decoded; // fine. :-)
  72. }else{
  73. // Sigh, the hard way. At the moment with a lot of unecessary copying. :-/
  74. // Rule 1: Make it work. Profile. Make it fast. In this order.
  75. std::string converted = to_utf8((charset.empty() ? "us-ascii" : charset), sv(decoded, decoded_size) ); // 1st copy...
  76. // remove any NUL bytes
  77. converted.erase( std::remove(converted.begin(), converted.end(), '\0'), converted.end() );
  78. pEp_free(decoded);
  79. return new_string( converted.data(), converted.size() ); // copy again. :'-(
  80. }
  81. }
  82. void add_attachment(message* msg, const BodyLines& body, const MimeHeaders& mh)
  83. {
  84. size_t decoded_size = 0;
  85. char* decoded = mh.decoder(body, decoded_size);
  86. sv filename = header_value(mh.dparams, "filename");
  87. LOG << "ATTACHMENT filename=“" << filename << "\n";
  88. if(filename.empty()) // no "filename" field in Content-Disposition?
  89. {
  90. filename = header_value(mh.tparams, "name"); // legacy: use "name" field from Content-Type header
  91. LOG << "ATTACHMENT name=“" << filename << "\n";
  92. }
  93. const std::string content_type = mh.mime_type();
  94. if( (msg->attachments==nullptr) && (content_type=="message/rfc822") ) // very special requirement. See MIME-12
  95. {
  96. const sv forwarded = header_value( mh.tparams, "forwarded");
  97. if(forwarded.size())
  98. {
  99. add_opt_field(msg, Pseudo_Header_Forwarded, forwarded);
  100. }
  101. }
  102. bloblist_t* bl = bloblist_add(msg->attachments, decoded, decoded_size, content_type.c_str(), (filename.empty()? nullptr : filename.data()) );
  103. if(msg->attachments==nullptr)
  104. {
  105. msg->attachments = bl;
  106. }
  107. }
  108. struct has_mimetype
  109. {
  110. has_mimetype(const char* _mime_type)
  111. : mt(_mime_type)
  112. {}
  113. bool operator()(const Message& m) const
  114. {
  115. return m.mh.mime_type() == mt;
  116. }
  117. const char* mt;
  118. };
  119. void set_longmsg(message* msg, const MimeHeaders& mh, const BodyLines& body)
  120. {
  121. const sv txt_charset = header_value( mh.tparams, "charset" );
  122. const sv format = header_value( mh.tparams, "format");
  123. const sv delsp = header_value( mh.tparams, "delsp");
  124. msg->longmsg = create_string(body, txt_charset, mh.decoder );
  125. if(format.size())
  126. {
  127. add_opt_field(msg, Pseudo_Header_Format, format);
  128. }
  129. if(delsp.size())
  130. {
  131. add_opt_field(msg, Pseudo_Header_Delsp, delsp);
  132. }
  133. }
  134. void handle_multipart(message* msg, const MimeHeaders& mh, const BodyLines& body, unsigned level = 1)
  135. {
  136. const sv boundary = header_value(mh.tparams, "boundary");
  137. MultipartMessage mm = parse_multipart( body, boundary );
  138. LOG << "MULTIPART/" << mh.subtype << ": " << mm.size() << " parts. Boundary = “" << boundary << "” :\n";
  139. LOG << "MM.size=" << mm.size() << ", level=" << level << ":\n";
  140. for(const auto& m : mm)
  141. {
  142. LOG << "°°M: " << m << "\n";
  143. }
  144. // All "multipart" MimeTypes: handle as "multipart/mixed":
  145. for(const Message& m : mm)
  146. {
  147. if(m.mh.type == "multipart")
  148. {
  149. if(level < MaxMultipartNestingLevel)
  150. {
  151. handle_multipart(msg, m.mh, m.body, level+1);
  152. }else{
  153. add_attachment(msg, m.body, m.mh);
  154. }
  155. continue;
  156. }
  157. if(m.mh.dispo_type == PEP_CONTENT_DISP_INLINE)
  158. {
  159. const auto mime_type = m.mh.mime_type();
  160. if(mime_type=="text/plain" && msg->longmsg==nullptr)
  161. {
  162. // the first "text/plain" part is handeld specially:
  163. set_longmsg(msg, m.mh, m.body);
  164. continue;
  165. }else if(mime_type=="text/html" && msg->longmsg_formatted==nullptr)
  166. {
  167. // first inline "text/html" part goes to longmsg_formatted
  168. const sv mc_charset = header_value( m.mh.tparams, "charset" );
  169. msg->longmsg_formatted = create_string(m.body, mc_charset, m.mh.decoder );
  170. continue;
  171. }
  172. }
  173. add_attachment(msg, m.body, m.mh);
  174. }
  175. }
  176. void handle_mime(message* msg, const MimeHeaders& mh, const BodyLines& body)
  177. {
  178. if(mh.type == "text")
  179. {
  180. const sv charset = header_value( mh.tparams, "charset" );
  181. LOG << "\t Content-Type: " << mh.mime_type() << ", mh: " << mh << "\n";
  182. if(mh.subtype == "plain" && msg->longmsg==nullptr)
  183. {
  184. // put it in msg->longmsg
  185. set_longmsg(msg, mh, body);
  186. }else if(mh.subtype=="html" && msg->longmsg_formatted==nullptr)
  187. {
  188. // put it in msg->longmsg_formatted
  189. msg->longmsg_formatted = create_string(body, charset, mh.decoder);
  190. }else{
  191. // add it as attachment
  192. add_attachment(msg, body, mh);
  193. }
  194. }else if(mh.type == "multipart")
  195. {
  196. handle_multipart(msg, mh, body, 1);
  197. }else if(mh.type == "message")
  198. {
  199. // TODO: What shall I do with this MimeType?
  200. add_attachment(msg, body, mh);
  201. }else{
  202. // all other MIME types
  203. add_attachment(msg, body, mh);
  204. }
  205. }
  206. // parses the header and fill the parts in msg
  207. void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body)
  208. {
  209. LOG << "ParseBody: " << body.size() << " body lines.\n";
  210. // anything that might be a MIME mail I try to parse as a MIME mail:
  211. if( header_value(headers, "mime-version").size() || header_value(headers, "content-type").size() )
  212. {
  213. MimeHeaders mh(headers);
  214. handle_mime(msg, mh, body);
  215. }else{ // Non-MIME mail
  216. LOG << "<<< NO_MIME_MAIL >>> " << body.size() << " body lines.\n";
  217. sv combined_body = combineLines(body);
  218. if(isUtf8(combined_body.begin(), combined_body.end()) )
  219. {
  220. const std::string& nfc_string = toNFC( combined_body ); // FIXME: double copy! :-((
  221. msg->longmsg = new_string(nfc_string.c_str(), nfc_string.size()); // FIXME: 3rd copy! :-(((
  222. }else{
  223. char* pbody = msg->longmsg = new_string(combined_body.data(), combined_body.size());
  224. // no valid UTF-8? Hum, whatever it is, make it 7-bit ASCII for safety.
  225. std::for_each(pbody, pbody+combined_body.size(), [](char& c) { c &= 0x7f; } );
  226. }
  227. }
  228. }
  229. } // end of namespace pEpMIME