diff --git a/src/bodyparser.cc b/src/bodyparser.cc index 1ce64f5..aa94d97 100644 --- a/src/bodyparser.cc +++ b/src/bodyparser.cc @@ -129,13 +129,13 @@ struct has_mimetype }; -void set_longmsg(message* msg, const Message& m) +void set_longmsg(message* msg, const MimeHeaders& mh, const BodyLines& body) { - const sv txt_charset = header_value( m.mh.tparams, "charset" ); - const sv format = header_value( m.mh.tparams, "format"); - const sv delsp = header_value( m.mh.tparams, "delsp"); + const sv txt_charset = header_value( mh.tparams, "charset" ); + const sv format = header_value( mh.tparams, "format"); + const sv delsp = header_value( mh.tparams, "delsp"); - msg->longmsg = create_string(m.body, txt_charset, m.mh.decoder ); + msg->longmsg = create_string(body, txt_charset, mh.decoder ); if(format.size()) { add_opt_field(msg, Pseudo_Header_Format, format); @@ -146,98 +146,51 @@ void set_longmsg(message* msg, const Message& m) } } -// handle multipart/alternative -void handle_multipart_alternative(message* msg, MultipartMessage& vm) + +void handle_multipart(message* msg, const MimeHeaders& mh, const BodyLines& body, unsigned level = 1) { - // only add to msg->longmsg if not already set - auto first_text = msg->longmsg ? vm.cend() : std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/plain") ); - if(first_text != vm.end()) + const sv boundary = header_value(mh.tparams, "boundary"); + MultipartMessage mm = parse_multipart( body, boundary ); + LOG << "MULTIPART/" << mh.subtype << ": " << mm.size() << " parts. Boundary = “" << boundary << "” :\n"; + + LOG << "MM.size=" << mm.size() << ", level=" << level << ":\n"; + for(const auto& m : mm) { - LOG << "ALT-TEXT: MH" << first_text->mh << "\n"; - set_longmsg(msg, *first_text); + LOG << "°°M: " << m << "\n"; } - // only add to msg->longmsg_formatted if not already set - auto first_html = msg->longmsg_formatted ? vm.cend() : std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/html") ); - if(first_html != vm.end()) + // All "multipart" MimeTypes: handle as "multipart/mixed": + for(const Message& m : mm) { - LOG << "ALT-HTML: MH" << first_html->mh << "\n"; - const sv html_charset = header_value( first_html->mh.tparams, "charset" ); - msg->longmsg_formatted = create_string(first_html->body, html_charset, first_html->mh.decoder ); - }else - { - auto mrel = std::find_if(vm.cbegin(), vm.cend(), has_mimetype("multipart/related") ); - if(mrel != vm.end()) + if(m.mh.type == "multipart") { - MultipartMessage vmr = parse_multipart( mrel->body, mrel->boundary() ); - first_html = msg->longmsg_formatted ? vmr.cend() : std::find_if(vmr.cbegin(), vmr.cend(), has_mimetype("text/html") ); - if(first_html != vmr.cend()) - { - LOG << "ALT-RELATED-HTML: MH" << first_html->mh << "\n"; - const sv html_charset = header_value( first_html->mh.tparams, "charset" ); - msg->longmsg_formatted = create_string(first_html->body, html_charset, first_html->mh.decoder ); - } - - // move the remaining parts to the topmost MIME tree - for(auto m = vmr.cbegin(); m != vmr.cend(); ++m) + if(level < MaxMultipartNestingLevel) { - if(m != first_html) - { - add_attachment(msg, m->body, m->mh); - } + handle_multipart(msg, m.mh, m.body, level+1); + }else{ + add_attachment(msg, m.body, m.mh); } - vm.erase(mrel); // don't handle that part as a remaining attachment - first_html = vm.end(); // to avoid to add it to msg->longmsg_formatted again - } - } - - for(auto m = vm.cbegin(); m != vm.cend(); ++m) - { - if(m != first_html && m!=first_text) - { - add_attachment(msg, m->body, m->mh); + continue; } - } -} - - -void handle_multipart(message* msg, const MimeHeaders& mh, MultipartMessage& mm) -{ - LOG << "MM.size=" << mm.size() << '\n'; - for(const auto& m : mm) - { - LOG << "°°M: " << m << "\n"; - } - if(mh.subtype == "alternative" || mh.subtype == "related" ) - { - handle_multipart_alternative(msg, mm); - }else // All other "multipart" MimeTypes: handle as "multipart/mixed": - { - for(auto q=mm.cbegin(); q!=mm.cend(); ++q) + if(m.mh.dispo_type == PEP_CONTENT_DISP_INLINE) { - const auto mime_type = q->mh.mime_type(); - if(mime_type=="multipart/alternative" || mime_type=="multipart/related" ) - { - const sv boundary = header_value(q->mh.tparams, "boundary"); - MultipartMessage mmm = parse_multipart(q->body, boundary ); - LOG << "Alternative in MM: " << mmm.size() << " parts. Boundary = “" << boundary << "” :\n"; - handle_multipart_alternative(msg, mmm); - }else if(mime_type=="text/plain" && msg->longmsg==nullptr) + const auto mime_type = m.mh.mime_type(); + if(mime_type=="text/plain" && msg->longmsg==nullptr) { // the first "text/plain" part is handeld specially: - const sv mc_charset = header_value( q->mh.tparams, "charset" ); - msg->longmsg = create_string(q->body, mc_charset, q->mh.decoder ); + set_longmsg(msg, m.mh, m.body); + continue; }else if(mime_type=="text/html" && msg->longmsg_formatted==nullptr) { - // stange mailer that send HTML body, no plaintext body: - const sv mc_charset = header_value( q->mh.tparams, "charset" ); - msg->longmsg_formatted = create_string(q->body, mc_charset, q->mh.decoder ); - }else - { - add_attachment(msg, q->body, q->mh); + // first inline "text/html" part goes to longmsg_formatted + const sv mc_charset = header_value( m.mh.tparams, "charset" ); + msg->longmsg_formatted = create_string(m.body, mc_charset, m.mh.decoder ); + continue; } } + + add_attachment(msg, m.body, m.mh); } } @@ -251,7 +204,7 @@ void handle_mime(message* msg, const MimeHeaders& mh, const BodyLines& body) if(mh.subtype == "plain" && msg->longmsg==nullptr) { // put it in msg->longmsg - msg->longmsg = create_string(body, charset, mh.decoder); + set_longmsg(msg, mh, body); }else if(mh.subtype=="html" && msg->longmsg_formatted==nullptr) { // put it in msg->longmsg_formatted @@ -262,10 +215,7 @@ void handle_mime(message* msg, const MimeHeaders& mh, const BodyLines& body) } }else if(mh.type == "multipart") { - const sv boundary = header_value(mh.tparams, "boundary"); - MultipartMessage mm = parse_multipart( body, boundary ); - LOG << "MULTIPART/" << mh.subtype << ": " << mm.size() << " parts. Boundary = “" << boundary << "” :\n"; - handle_multipart(msg, mh, mm); + handle_multipart(msg, mh, body, 1); }else if(mh.type == "message") { // TODO: What shall I do with this MimeType? @@ -290,7 +240,7 @@ void parse_body(message* msg, const HeaderSection& headers, const BodyLines& bod }else{ // Non-MIME mail LOG << "<<< NO_MIME_MAIL >>> " << body.size() << " body lines.\n"; sv combined_body = combineLines(body); - if(isUtf8(combined_body.data(), combined_body.data()+combined_body.size()) ) + if(isUtf8(combined_body.begin(), combined_body.end()) ) { const std::string& nfc_string = toNFC( combined_body ); // FIXME: double copy! :-(( msg->longmsg = new_string(nfc_string.c_str(), nfc_string.size()); // FIXME: 3rd copy! :-((( diff --git a/src/mime_headers.hh b/src/mime_headers.hh index dc47feb..d989225 100644 --- a/src/mime_headers.hh +++ b/src/mime_headers.hh @@ -32,7 +32,7 @@ namespace pEpMIME { explicit ContentDisposition(sv header_line); - content_disposition_type dispo_type; + content_disposition_type dispo_type = PEP_CONTENT_DISP_INLINE; std::vector dparams; }; diff --git a/src/pEpMIME.hh b/src/pEpMIME.hh index 7f91fd0..c92aefe 100644 --- a/src/pEpMIME.hh +++ b/src/pEpMIME.hh @@ -17,6 +17,14 @@ namespace pEpMIME { +// multipart messages are parsed recursively up to a maximum nesting level. +// It should be large enough that all real-world mails can be parsed, but no +// stack overflow occurs on maliciously crafted messages. +// Deeper nested multipart messages are just put as attachment. +// 100 seems to be a good default value, I think. +const unsigned MaxMultipartNestingLevel = 100; + + // Parse the given string loosely as an "Internet Message" that aims to be RFC 5322 // and MIME compliant (RFC 2046 etc.) // diff --git a/src/unittest_mime.cc b/src/unittest_mime.cc index 7a5a1d4..0fcc837 100644 --- a/src/unittest_mime.cc +++ b/src/unittest_mime.cc @@ -132,6 +132,12 @@ namespace unsigned nr_of_attachments; }; + std::ostream& operator<<(std::ostream& o, const TestEntry& te) + { + return o << "TE{ subject=«" << te.subject << "» } "; + } + + const std::string mail_types_common = "From: Alice \r\n" "To: Bob \r\n"