Merge with default

build-windows
Thomas 4 years ago
commit c641b2ce11

@ -26,6 +26,7 @@ libpEpMIME.a: pEpMIME.o pEpMIME_internal.o rules.o bodyparser.o \
unittests: unittest_mime.o unittest_nfc.o unittest_timestamp.o \
unittest_stringcase.o unittest_toutf8.o unittest_address.o \
unittest_rule.o unittest_subject.o unittest_base64.o \
unittest_qp.o print_message.o \
gtest-all.o gtest_main.o libpEpMIME.a
${CXX} ${LDFLAGS} -L${PREFIX}/lib -o $@ $^ -lpEpAdapter -lpEpEngine -lpthread -liconv

@ -1,5 +1,6 @@
#include "attachment.hh"
#include "pEpMIME_internal.hh"
#include "base64.hh"
#include <cstdio> // for snprintf()
#include <boost/algorithm/string/predicate.hpp>
@ -67,7 +68,11 @@ void Attachment::write_mime_headers(std::string& out) const
if(filename.empty())
return;
if(filename.size()<19)
// Nota bene: Don't support overlong Content-IDs because the RFCs don't allow line splitting here.
if(boost::algorithm::starts_with(filename, "cid://"))
{
out += "Content-ID: <" + filename.substr(6) + ">\r\n";
}else if(filename.size()<19)
{
out += " filename*=utf-8''";
percent_encode(out, filename);
@ -91,26 +96,84 @@ SAttachments parse_attachments(const bloblist_t* b)
SAttachments ret;
while(b)
{
if(b->filename && boost::algorithm::starts_with(b->filename, "cid://"))
ret.emplace_back(b);
b = b->next;
}
return ret;
}
bool is_inline(const bloblist_t* b)
{
return b->filename && boost::algorithm::starts_with(b->filename, "cid://");
}
// body needs transport encoding if it not "NETASCII with max. 78 chars per line".
bool need_transport_encoding(const sv body)
{
unsigned line_length = 0u;
unsigned state = 0u;
for(char b : body)
{
const unsigned char u = b;
if(u>126)
return true;
switch(u)
{
// "Content-ID" / inline image, e.g. for HTML mails
ret.i_att.emplace_back(b);
}else{
// "real" attachments.
ret.a_att.emplace_back(b);
case '\r' : if(state==0)
{
state='\r';
break;
}else{
return true;
}
case '\n' : if(state=='\r')
{
line_length=state=0; break;
}else{
return true;
}
default: {
if(u<' ' && u!='\t') // control characters except TAB
return true;
if(state!=0 || ++line_length > 78)
return true;
}
}
b=b->next;
}
return ret;
return false;
}
std::vector<Attachment> SAttachments::all() const
void generate_attachments(std::string& out, const SAttachments& att, sv delimiter, bool(*filter)(const Attachment&))
{
std::vector<Attachment> v; v.reserve(i_att.size() + a_att.size() );
v.insert(v.end(), i_att.begin(), i_att.end());
v.insert(v.end(), a_att.begin(), a_att.end());
return v;
for(const auto& q : att)
{
if(filter(q)==true)
{
out += "--";
out += delimiter;
out += "\r\n";
q.write_mime_headers(out);
if(q.need_te)
{
out += "Content-Transfer-Encoding: base64\r\n"
"\r\n";
base64::encode(out, q.data, 78, "\r\n");
}else{
out += "\r\n";
out += q.data;
out += "\r\n";
}
}
}
out += "--" + delimiter + "--\r\n";
}
} // end of namespace pEpMIME

@ -7,35 +7,67 @@
namespace pEpMIME
{
// body needs transport encoding if it not "NETASCII with max. 78 chars per line".
bool need_transport_encoding(const sv body);
// true if b->filename starts with "cid://"
bool is_inline(const bloblist_t* b);
// refers to data in a bloblist_t. Does NOT any data!
struct Attachment
{
explicit Attachment(const bloblist_t* b)
: data(b->size ? sv{b->value, b->size} : sv{})
, mime_type(b->mime_type)
, filename(exists(b->filename) ? b->filename : sv{})
, dtype(b->disposition)
{}
: data{b->size ? sv{b->value, b->size} : sv{}}
, mime_type{b->mime_type}
, filename{exists(b->filename) ? b->filename : sv{}}
, dtype{b->disposition}
, need_te{need_transport_encoding(data)}
{
if(::pEpMIME::is_inline(b))
{
dtype = PEP_CONTENT_DISP_INLINE;
}
}
Attachment(sv _data, sv _mime_type)
: data{_data}
, mime_type{_mime_type}
, filename{}
, dtype{ PEP_CONTENT_DISP_OTHER }
, need_te{ need_transport_encoding(data) }
{ }
void write_mime_headers(std::string& out) const;
bool is_inline() const
{
return dtype == PEP_CONTENT_DISP_INLINE;
}
sv data;
sv mime_type;
sv filename;
content_disposition_type dtype;
bool need_te; // need transport encoding
};
struct SAttachments
{
std::vector<Attachment> i_att; // inline attachments
std::vector<Attachment> a_att; // non-inline attachments;
// returns a vector containing i_att and a_att at once.
std::vector<Attachment> all() const;
};
typedef std::vector<Attachment> SAttachments;
SAttachments parse_attachments(const bloblist_t* att);
inline
bool all(const Attachment&) { return true; }
inline
bool is_inline(const Attachment& att) { return att.is_inline(); }
inline
bool is_not_inline(const Attachment& att) { return att.is_inline() == false; }
void generate_attachments(std::string& out, const SAttachments& att, sv delimiter, bool(*filter)(const Attachment&) = all);
}
#endif // PEP_MIME_ATTACHMENT_HH

@ -60,29 +60,53 @@ extern const int8_t values[256];
return false;
}
//
// |-+-+-+-+-+-|-+-+-+-+-+-|-+-+-+-+-+-|-+-+-+-+-+-|
// | u0 | u1 | u2 | u3 |
// | | | | |
// |5 4 3 2 1 0|5 4 3 2 1 0|5 4 3 2 1 0|5 4 3 2 1 0|
// |-+-+-+-+-+-|-+-+-+-+-+-|-+-+-+-+-+-|-+-+-+-+-+-|
// |7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|
// |-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|
// | | | |
// | Byte 0 | Byte 1 | Byte 2 |
// |-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|
//
// decodes base64-encoded 'input', skip whitespaces, throw if illegal character found in string
template<class InIter, class OutIter, class OutIter2>
void decode_iter(InIter begin, InIter end, OutIter& out, OutIter2 out_end)
{
uint8_t u0, u1, u2, u3;
while(begin != end)
{
if(out == out_end) throw OutputOverflow{};
const uint8_t u0 = fetch(begin, end);
if(u0==255)
break; // end of input data
u0 = fetch(begin, end);
u1 = fetch(begin, end);
u2 = fetch(begin, end);
u3 = fetch(begin, end);
const uint8_t u1 = fetch(begin, end);
const uint8_t u2 = fetch(begin, end);
const uint8_t u3 = fetch(begin, end);
if(u0!=255 && u1!=255) { *out = char( (u0 << 2) | (u1 >> 4) ); ++out; }
if(out == out_end) throw OutputOverflow{};
if(u1!=255)
{
if(out == out_end) throw OutputOverflow{};
*out = char( (u0 << 2) | (u1 >> 4) );
++out;
}
if(u2!=255) { *out = char( (u1 << 4) | (u2 >> 2) ); ++out;}
if(out == out_end) throw OutputOverflow{};
if(u2!=255)
{
if(out == out_end) throw OutputOverflow{};
*out = char( (u1 << 4) | (u2 >> 2) );
++out;
}
if(u3!=255) { *out = char( (u2 << 6) | (u3 ) ); ++out;}
if(u3!=255)
{
if(out == out_end) throw OutputOverflow{};
*out = char( (u2 << 6) | (u3 ) );
++out;
}
}
}

@ -24,45 +24,26 @@ bool contains_delimiter(sv haystack, sv delimiter_base)
}
// NUL-terminated body needs transport encoding if it not "NETASCII with max. 78 chars per line".
bool need_transport_encoding(const sv body)
std::string create_delimiter(const std::vector<Attachment>& a)
{
unsigned line_length = 0u;
unsigned state = 0u;
for(char b : body)
{
const unsigned char u = b;
if(u>126)
return true;
switch(u)
{
case '\r' : if(state==0)
{
state='\r';
break;
}else{
return true;
}
case '\n' : if(state=='\r')
unsigned long long counter=0;
std::string delimiter = generate_delimiter(counter);
while( std::any_of(a.begin(), a.end(),
[&delimiter](const Attachment& att)
{
line_length=state=0; break;
}else{
return true;
return att.need_te==false && contains_delimiter( att.data, delimiter);
}
default: {
if(u<' ' && u!='\t') // control characters except TAB
return true;
if(state!=0 || ++line_length > 78)
return true;
}
}
)
)
{
++counter;
delimiter = generate_delimiter(counter);
}
return false;
return delimiter;
}
void generate_body(std::string& smsg, sv mime_type, sv body, bool transport_encode)
{
smsg += "Content-Type: "s + mime_type + "; charset=UTF-8;\r\n"s;
@ -107,28 +88,10 @@ void generate_ma_body(std::string& smsg, sv plain, sv html, bool transport_encod
void generate_mm_body(std::string& smsg, sv mime_type, sv body, const std::vector<Attachment>& a, bool transport_encode)
{
if(a.empty())
return;
std::vector<Attachment> a2{a};
a2.emplace_back(body, mime_type);
std::set<unsigned> unencoded_attachments;
for(unsigned u=0; u<a.size(); ++u)
{
if(transport_encode==false && !need_transport_encoding(a[u].data))
{
unencoded_attachments.insert(u);
}
}
unsigned long long counter=0;
std::string delimiter = generate_delimiter(counter);
while( std::any_of(unencoded_attachments.begin(), unencoded_attachments.end(),
[&](unsigned u) { return contains_delimiter( a[u].data, delimiter); }
)
)
{
++counter;
delimiter = generate_delimiter(counter);
}
const std::string delimiter = create_delimiter(a2);
smsg += "Content-Type: multipart/mixed; boundary=\"" + delimiter + "\";\r\n"
"\r\n"; // end of header section
@ -139,17 +102,110 @@ void generate_mm_body(std::string& smsg, sv mime_type, sv body, const std::vecto
generate_body(smsg, mime_type, body, transport_encode);
}
for(const auto& q : a)
generate_attachments(smsg, a, delimiter);
}
// complex MIME structures, depending on "det"
// see: https://dev.pep.foundation/libpEpMIME
void generate_complex_body(std::string& smsg, unsigned det, const message* msg, const std::vector<Attachment>& a, bool transport_encode)
{
std::vector<Attachment> a2{a};
if(msg->longmsg)
a2.emplace_back(msg->longmsg, "text/plain");
if(msg->longmsg_formatted)
a2.emplace_back(msg->longmsg_formatted, "text/html");
// basic delimiter:
const std::string delimiter = create_delimiter(a2) + "/" + std::to_string(det) + "/";
switch(det)
{
smsg += "--" + delimiter + "\r\n";
q.write_mime_headers(smsg);
smsg += "Content-Transfer-Encoding: base64\r\n"
"\r\n";
base64::encode(smsg, q.data, 78, "\r\n");
case 6: smsg += "Content-Type: multipart/related; boundary=\"" + delimiter + "\";\r\n"
"\r\n" // end of header section
"--" + delimiter + "\r\n";
generate_body(smsg, "text/html", msg->longmsg_formatted, transport_encode);
generate_attachments(smsg, a, delimiter);
break;
case 7:
{ // m/a{ text/plain, m/rel{ text/html, att } }
const std::string delimiter_A = delimiter + "A=";
smsg += "Content-Type: multipart/alternative; boundary=\"" + delimiter_A + "\";\r\n"
"\r\n"
"--" + delimiter_A + "\r\n";
generate_body(smsg, "text/plain", msg->longmsg, transport_encode);
smsg += "--" + delimiter_A + "\r\n";
const std::string delimiter_R = delimiter + "R=";
smsg += "Content-Type: multipart/related; boundary=\"" + delimiter_R + "\";\r\n"
"\r\n"
"--" + delimiter_R + "\r\n";
generate_body(smsg, "text/html", msg->longmsg_formatted, transport_encode);
generate_attachments(smsg, a, delimiter_R);
smsg += "--" + delimiter_A + "--\r\n";
break;
}
case 11:
{ // m/mix{ m/a{ text/plain, text/html}, att }
const std::string delimiter_M = delimiter + "M=";
const std::string delimiter_A = delimiter + "A=";
smsg += "Content-Type: multipart/mixed; boundary=\"" + delimiter_M + "\";\r\n"
"\r\n"
"--" + delimiter_M + "\r\n";
smsg += "Content-Type: multipart/alternative; boundary=\"" + delimiter_A + "\";\r\n"
"\r\n"
"--" + delimiter_A + "\r\n";
generate_body(smsg, "text/plain", msg->longmsg, transport_encode);
smsg += "--" + delimiter_A + "\r\n";
generate_body(smsg, "text/html", msg->longmsg_formatted, transport_encode);
smsg += "--" + delimiter_A + "--\r\n"
"\r\n";
generate_attachments(smsg, a, delimiter_M);
break;
}
case 14:
{ // m/mix{ m/rel{ html, inline_att}, att }
const std::string delimiter_M = delimiter + "M=";
const std::string delimiter_R = delimiter + "R=";
smsg += "Content-Type: multipart/mixed; boundary=\"" + delimiter_M + "\";\r\n"
"\r\n"
"--" + delimiter_M + "\r\n";
smsg += "Content-Type: multipart/related; boundary=\"" + delimiter_R + "\";\r\n"
"\r\n"
"--" + delimiter_R + "\r\n";
generate_body(smsg, "text/html", msg->longmsg_formatted, transport_encode);
generate_attachments(smsg, a, delimiter_R, &is_inline );
generate_attachments(smsg, a, delimiter_M, &is_not_inline );
break;
}
case 15: // all doodads, bells and whistles
{ // m/mix { m/a{ text, m/rel{ html, inline } }, att }
const std::string delimiter_M = delimiter + "M=";
const std::string delimiter_A = delimiter + "A=";
const std::string delimiter_R = delimiter + "R=";
smsg += "Content-Type: multipart/mixed; boundary=\"" + delimiter_M + "\";\r\n"
"\r\n"
"--" + delimiter_M + "\r\n";
smsg += "Content-Type: multipart/alternative; boundary=\"" + delimiter_A + "\";\r\n"
"\r\n"
"--" + delimiter_A + "\r\n";
generate_body(smsg, "text/plain", msg->longmsg, transport_encode);
smsg += "--" + delimiter_A + "\r\n"
"Content-Type: multipart/related; boundary=\"" + delimiter_R + "\";\r\n"
"\r\n"
"--" + delimiter_R + "\r\n";
generate_body(smsg, "text/html", msg->longmsg_formatted, transport_encode);
generate_attachments(smsg, a, delimiter_R, &is_inline );
// closing of delimiter_R is done in generate_attachments()
smsg += "--" + delimiter_A + "--\r\n";
generate_attachments(smsg, a, delimiter_M, &is_not_inline);
break;
}
default:
throw std::logic_error( "Generate_complex_body() with det=" + std::to_string(det) + " is iffy." );
}
smsg += "--" + delimiter + "--\r\n";
}
} // end of namespace pEpMIME

@ -15,6 +15,10 @@ namespace pEpMIME
// generate "multipart/mixed" body
void generate_mm_body(std::string& smsg, sv mime_type, sv body, const std::vector<Attachment>& a, bool transport_encode);
// complex MIME structures, depending on "det"
// see: https://dev.pep.foundation/libpEpMIME
void generate_complex_body(std::string& smsg, unsigned det, const message* msg, const std::vector<Attachment>& a, bool transport_encode);
} // end of namespace pEpMIME
#endif // PEP_MIME_BODYGENERATOR_HH

@ -25,14 +25,15 @@ namespace px = boost::phoenix;
namespace pEpMIME
{
typedef std::vector<Message> MultipartMessage;
std::vector<Message> parse_multipart(const BodyLines& body, const sv& boundary)
MultipartMessage parse_multipart(const BodyLines& body, const sv& boundary)
{
bool is_last = false;
qi::rule<const char*, qi::unused_type()> is_delimiter_parser = qi::lit("--") >> qi::lit(boundary.data())
>> -qi::lit("--")[ px::ref(is_last) = true] >> qi::omit[*qi::char_(" \t")];
std::vector<Message> vm;
MultipartMessage vm;
bool after_preamble = false;
BodyLines part;
LOG << "Parse_Multipart: " << body.size() << " body lines. bounardy=“" << boundary << "”. \n";
@ -70,8 +71,14 @@ std::vector<Message> parse_multipart(const BodyLines& body, const sv& boundary)
char* create_string(const BodyLines& body, const sv& charset, MimeHeaders::Decoder decoder)
{
if(body.empty())
return nullptr;
size_t decoded_size = 0;
char* decoded = decoder(body, decoded_size);
LOG << "CREATE_STRING: " << body.size() << " body lines into " << decoded_size << " raw octets (charset=\"" << charset << "\")\n";
if(charset=="UTF-8" || charset=="UTF8")
{
return decoded; // fine. :-)
@ -119,85 +126,151 @@ struct has_mimetype
const char* mt;
};
// parses the header and fill the parts in msg
void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body)
// handle multipart/alternative
void handle_multipart_alternative(message* msg, MultipartMessage& vm)
{
const std::string mime_version = header_value(headers, "mime-version").to_string();
MimeHeaders mh(headers);
// only add to msg->longmsg if not already set
auto first_text = msg->longmsg ? vm.cend() : std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/plain") );
if(first_text != vm.end())
{
LOG << "ALT-TEXT: MH" << first_text->mh << "\n";
const sv txt_charset = header_value( first_text->mh.tparams, "charset" );
msg->longmsg = create_string(first_text->body, txt_charset, first_text->mh.decoder );
}
if( mime_version == "1.0" ) // TODO: According to RFC 2048 there can be comments in the header field value. -.-
// only add to msg->longmsg_formatted if not already set
auto first_html = msg->longmsg_formatted ? vm.cend() : std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/html") );
if(first_html != vm.end())
{
// TODO: for whatever reason "string_view cts" does not work with qi::parse(). WTF!
if(mh.type == "text")
{
const sv charset = header_value( mh.tparams, "charset" );
if(mh.subtype == "plain")
{
// put it in msg->longmsg
msg->longmsg = create_string(body, charset, mh.decoder);
}else if(mh.subtype=="html")
{
// put it in msg->longmsg_formatted
msg->longmsg_formatted = create_string(body, charset, mh.decoder);
}else{
// add it as attachment
add_attachment(msg, body, mh);
}
}else if(mh.type == "multipart")
LOG << "ALT-HTML: MH" << first_html->mh << "\n";
const sv html_charset = header_value( first_html->mh.tparams, "charset" );
msg->longmsg_formatted = create_string(first_html->body, html_charset, first_html->mh.decoder );
}else
{
auto mrel = std::find_if(vm.cbegin(), vm.cend(), has_mimetype("multipart/related") );
if(mrel != vm.end())
{
const sv boundary = header_value(mh.tparams, "boundary");
std::vector<Message> vm = parse_multipart( body, boundary );
LOG << "MULTIPART: " << vm.size() << " parts. Boundary = “" << boundary << "” :\n";
for(const auto& m : vm)
MultipartMessage vmr = parse_multipart( mrel->body, mrel->boundary() );
first_html = msg->longmsg_formatted ? vmr.cend() : std::find_if(vmr.cbegin(), vmr.cend(), has_mimetype("text/html") );
if(first_html != vmr.cend())
{
LOG << "####\n" << m << "\n";
LOG << "ALT-RELATED-HTML: MH" << first_html->mh << "\n";
const sv html_charset = header_value( first_html->mh.tparams, "charset" );
msg->longmsg_formatted = create_string(first_html->body, html_charset, first_html->mh.decoder );
}
if(mh.subtype == "alternative")
// move the remaining parts to the topmost MIME tree
for(auto m = vmr.cbegin(); m != vmr.cend(); ++m)
{
auto first_text = std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/plain") );
if(first_text != vm.end())
if(m != first_html)
{
const sv charset = header_value( first_text->mh.tparams, "charset" );
msg->longmsg = create_string(first_text->body, charset, first_text->mh.decoder );
add_attachment(msg, m->body, m->mh);
}
auto first_html = std::find_if(vm.cbegin(), vm.cend(), has_mimetype("text/html") );
if(first_html != vm.end())
{
const sv charset = header_value( first_html->mh.tparams, "charset" );
msg->longmsg_formatted = create_string(first_html->body, charset, first_html->mh.decoder );
}
for(auto m = vm.cbegin(); m != vm.cend(); ++m)
{
if(m != first_text && m!=first_html)
{
add_attachment(msg, m->body, m->mh);
}
}
}else // All other "multipart" MimeTypes: handle as "multipart/mixed":
}
vm.erase(mrel); // don't handle that part as a remaining attachment
first_html = vm.end(); // to avoid to add it to msg->longmsg_formatted again
}
}
for(auto m = vm.cbegin(); m != vm.cend(); ++m)
{
if(m != first_html && m!=first_text)
{
add_attachment(msg, m->body, m->mh);
}
}
}
void handle_multipart(message* msg, const MimeHeaders& mh, MultipartMessage& mm)
{
for(const auto& m : mm)
{
LOG << "####\n" << m << "\n";
}
if(mh.subtype == "alternative")
{
handle_multipart_alternative(msg, mm);
}else // All other "multipart" MimeTypes: handle as "multipart/mixed":
{
auto main_content = msg->longmsg ? mm.cend() : std::find_if(mm.cbegin(), mm.cend(), has_mimetype("text/plain") );
if(main_content != mm.cend())
{
// the first "text/plain" part is handeld specially:
const sv mc_charset = header_value( main_content->mh.tparams, "charset" );
msg->longmsg = create_string(main_content->body, mc_charset, main_content->mh.decoder );
}else{
// stange mailer that send HTML body, no plaintext body:
main_content = msg->longmsg_formatted ? mm.cend() : std::find_if(mm.cbegin(), mm.cend(), has_mimetype("text/html") );
if(main_content != mm.cend())
{
for(const auto& m : vm)
{
add_attachment(msg, m.body, m.mh);
}
const sv mc_charset = header_value( main_content->mh.tparams, "charset" );
msg->longmsg_formatted = create_string(main_content->body, mc_charset, main_content->mh.decoder );
}
}else if(mh.type == "message")
}
for(auto q=mm.cbegin(); q!=mm.cend(); ++q)
{
// TODO: What shall I do with this MimeType?
add_attachment(msg, body, mh);
if(q != main_content)
add_attachment(msg, q->body, q->mh);
}
}
}
void handle_mime(message* msg, const MimeHeaders& mh, const BodyLines& body)
{
if(mh.type == "text")
{
const sv charset = header_value( mh.tparams, "charset" );
LOG << "\t Content-Type: " << mh.mime_type() << ", mh: " << mh << "\n";
if(mh.subtype == "plain" && msg->longmsg==nullptr)
{
// put it in msg->longmsg
msg->longmsg = create_string(body, charset, mh.decoder);
}else if(mh.subtype=="html" && msg->longmsg_formatted==nullptr)
{
// put it in msg->longmsg_formatted
msg->longmsg_formatted = create_string(body, charset, mh.decoder);
}else{
// all other MIME types
// add it as attachment
add_attachment(msg, body, mh);
}
}else if(mh.type == "multipart")
{
const sv boundary = header_value(mh.tparams, "boundary");
MultipartMessage mm = parse_multipart( body, boundary );
LOG << "MULTIPART/" << mh.subtype << ": " << mm.size() << " parts. Boundary = “" << boundary << "” :\n";
handle_multipart(msg, mh, mm);
}else if(mh.type == "message")
{
// TODO: What shall I do with this MimeType?
add_attachment(msg, body, mh);
}else{
// all other MIME types
add_attachment(msg, body, mh);
}
}
// parses the header and fill the parts in msg
void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body)
{
const std::string mime_version = header_value(headers, "mime-version").to_string();
LOG << "ParseBody: " << body.size() << " body lines.\n";
if( mime_version == "1.0" ) // TODO: According to RFC 2048 there can be comments in the header field value. -.-
{
MimeHeaders mh(headers);
handle_mime(msg, mh, body);
}else{ // Non-MIME mail
LOG << "<<< NO_MIME_MAIL >>> " << body.size() << " body lines.\n";
sv combined_body = combineLines(body);
if(isUtf8(combined_body.data(), combined_body.data()+combined_body.size()) )
{
const std::string& nfc_string = toNFC( std::string(combined_body) ); // FIXME: double copy! :-((
const std::string& nfc_string = toNFC( combined_body ); // FIXME: double copy! :-((
msg->longmsg = new_string(nfc_string.c_str(), nfc_string.size()); // FIXME: 3rd copy! :-(((
}else{
char* pbody = msg->longmsg = new_string(combined_body.data(), combined_body.size());

@ -11,28 +11,75 @@ using namespace std::string_literals;
namespace
{
typedef std::string (*TransportEncoder)(const std::string& name, const std::string& value);
using qp::HeaderType;
std::string dont_encode(const std::string& name, const std::string& value)
typedef std::string (*TransportEncoder)(const Rule& rule, sv name, sv value, HeaderType type);
std::string dont_encode(const Rule&, sv name, sv value, HeaderType type)
{
return name.empty() ? value : (name + ": " + value);
return name.empty() ? std::string(value) : (std::string(name) + ": " + value);
}
std::string just_fold(sv name, sv value, HeaderType type)
{
if((name.size() + value.size() + 2) < 76)
return dont_encode(cfws /* just a dummy */, name, value, type);
std::string ret{name};
ret += ':';
size_t line_len = ret.size();
size_t value_pos = 0;
while(value_pos<value.size())
{
const size_t next_ws = value.find(' ', value_pos);
const sv next_word = value.substr(value_pos, next_ws - value_pos);
if(line_len + next_word.size() < 76)
{
ret += ' ';
line_len += next_word.size() + 1;
}else{
ret += "\r\n ";
line_len = 1;
if(line_len + next_word.size() >76)
{
// Oh noez, toooo long word, so folding is not enough! *sigh*
// -> QP-encoding can fold even within words. Ugly but better than exceed the line length limit.
return qp::encode_header(name, value, type);
}
}
ret += next_word;
value_pos = (next_ws == sv::npos ? next_ws : next_ws + 1); // +1 to skip the whitespace
}
return ret;
}
std::string encode_if_necessary(const std::string& name, const std::string& value)
std::string encode_if_necessary(const Rule& rule, sv name, sv value, HeaderType type)
{
if(value.empty())
return std::string();
Rule no_enc_needed = dot_atom_text % qi::char_(" ");
auto begin = value.begin();
bool b = qi::parse(begin, value.end(), no_enc_needed );
if(b && begin==value.end())
return match(rule, value) ? just_fold(name, value, type) : qp::encode_header(name, value, type);
}
std::string encode_local_part(sv local_part)
{
// create a "quoted-string" according to RFC 5322 sect.
std::string s;
s.reserve(local_part.size() + 3);
s += '"';
for(char c:local_part)
{
LOG << "IF_NECESS(" << value << "): " << *begin << " is_end=" << (begin==value.end()) << ".\n";
return dont_encode(name, value);
switch(c)
{
case '\"' : s += "\\\""; break;
case '\\' : s += "\\\\"; break;
default : s += c;
}
}
return qp::encode_header(name, value);
s += '"';
return s;
}
static const TransportEncoder encoder[2] = { &dont_encode, &encode_if_necessary };
@ -41,15 +88,31 @@ namespace
}
void generate(std::string& out, const std::string& header_name, const pEp_identity* id, bool transport_encode)
void generate(std::string& out, sv header_name, const pEp_identity* id, bool transport_encode)
{
LOG << "GEN_ID: " << id->username << " | " << id->address << std::endl;
// FIXME: bizarre addresses need proper quoting/encoding, too! -.-
out += (exists(id->username) ? encoder[transport_encode](header_name, id->username) + " " : std::string() ) + "<" + id->address + ">";
out += exists(id->username) ? encoder[transport_encode](phrase, header_name, id->username, qp::Word) + " " : std::string() ;
if(!exists(id->address))
return;
out += '<';
const sv address(id->address);
const size_t last_at = address.rfind('@');
const sv local_part = address.substr(0, last_at == sv::npos ? last_at : last_at-1);
if( match(dot_atom_text, local_part) )
{
out += address;
}else{
out += encode_local_part(local_part);
out += address.substr(last_at); // from the last '@' (including) to the end of address. :-)
}
out += '>';
}
void generate(std::string& out, const std::string& header_name, const identity_list* il, bool transport_encode)
void generate(std::string& out, sv header_name, const identity_list* il, bool transport_encode)
{
LOG << "GEN_IDList: " << identity_list_length(il) << " entries. " << std::endl;
@ -67,27 +130,30 @@ void generate(std::string& out, const std::string& header_name, const identity_l
}
void generate(std::string& out, const std::string& header_name, const stringlist_t* sl, bool transport_encode)
void generate(std::string& out, const Rule& rule, sv header_name, const stringlist_t* sl, bool transport_encode)
{
if( stringlist_length(sl) == 0)
return;
out += encoder[transport_encode](header_name, sl->value);
out += encoder[transport_encode](rule, header_name, sl->value, qp::Word);
sl = sl->next;
while(sl)
{
out += (transport_encode ? ",\r\n\t" : ", ") + encoder[transport_encode]("", sl->value);
out += (transport_encode ? ",\r\n\t" : ", ") + encoder[transport_encode](rule, "", sl->value, qp::Word);
sl = sl->next;
}
}
void generate_msgids(std::string& out, const std::string& header_name, const stringlist_t* sl, bool transport_encode)
void generate_msgids(std::string& out, sv header_name, const stringlist_t* sl, bool transport_encode)
{
if( stringlist_length(sl) == 0)
return;
out += header_name + ": <" + sl->value + ">";
out += header_name;
out += ": <";
out += sl->value;
out += ">";
sl = sl->next;
while(sl)
{
@ -104,7 +170,7 @@ void generate_header(std::string& smsg, const message* msg, bool transport_encod
LOG << "GEN_HDR: te = " << transport_encode << std::endl;
if(msg->id) smsg += "Message-ID: <"s + msg->id + ">\r\n";
if(msg->shortmsg) smsg += E("Subject", msg->shortmsg) + CRLF;
if(msg->shortmsg) smsg += E(phrase, "Subject", msg->shortmsg, qp::Text) + CRLF;
LOG << "\t smsg so far: " << smsg << std::endl;
@ -121,7 +187,7 @@ void generate_header(std::string& smsg, const message* msg, bool transport_encod
if(msg->reply_to) { generate(smsg, "Reply-To" , msg->reply_to , transport_encode); smsg += CRLF; }
if(msg->in_reply_to) { generate_msgids(smsg, "In-Reply-To", msg->in_reply_to, transport_encode); smsg += CRLF; }
if(msg->references ) { generate_msgids(smsg, "References" , msg->references , transport_encode); smsg += CRLF; }
if(msg->keywords) { generate(smsg, "Keywords" , msg->keywords , transport_encode); smsg += CRLF; }
if(msg->keywords) { generate(smsg, phrase, "Keywords" , msg->keywords , transport_encode); smsg += CRLF; }
const stringpair_list_t* spl = msg->opt_fields;
LOG << "GEN_HDR: " << stringpair_list_length( spl ) << " opt_fields.\n";
@ -132,11 +198,9 @@ void generate_header(std::string& smsg, const message* msg, bool transport_encod
auto q = headers.find(key_low);
if(q == headers.end())
{
// unknown header: just copy it
// unknown header: only encode if contained control characters or non-ASCII characters
LOG << "\t UNKNWON HDR: " << spl->value->key << " :: " << spl->value->value << " <<< \n";
smsg += spl->value->key;
smsg += ": ";
smsg += spl->value->value;
smsg += E( *(vchar | ws), spl->value->key, spl->value->value, qp::Text);
smsg += CRLF;
}else{
LOG << "\t KNWON HDR: " << spl->value->key << " :: low_key: " << q->first << " name(): " << q->second->name() << " <<< \n";

@ -3,13 +3,16 @@
#include "pEpMIME.hh"
#include "pEpMIME_internal.hh"
#include "rules.hh"
namespace pEpMIME
{
void generate(std::string& out, const std::string& header_name, const pEp_identity* id , bool transport_encode);
void generate(std::string& out, const std::string& header_name, const identity_list* il, bool transport_encode);
void generate(std::string& out, const std::string& header_name, const stringlist_t* sl , bool transport_encode);
void generate(std::string& out, sv header_name, const pEp_identity* id , bool transport_encode);
void generate(std::string& out, sv header_name, const identity_list* il, bool transport_encode);
// different header fields must fulfill different syntax rules. :-/
void generate(std::string& out, const Rule& rule, sv header_name, const stringlist_t* sl , bool transport_encode);
void generate_header(std::string& smsg, const message* msg, bool transport_encode);

@ -12,7 +12,6 @@
#include <pEp/stringlist.h>
#include <boost/spirit/include/qi_char.hpp>
#include <boost/spirit/include/qi.hpp> // TODO: find a more specific #include to reduce compile time. qi_omit.hh does not work, yet.
#include <deque>
#include <map>
#include <initializer_list>
@ -27,10 +26,10 @@ typedef std::vector<std::string> DS;
const Rule mchar = +(qi::char_ - qi::char_("<>\\()")); // accept more char values than RFC requires.
const Rule message_id = qi::omit[ *qi::char_("\t\n\r ")] >> qi::lit('<') >> mchar >> qi::lit('>');
const qi::rule<std::string::const_iterator, DS() > message_id_list = *message_id;
const qi::rule<sv::const_iterator, DS() > message_id_list = *message_id;
std::string robust_to_utf8(const std::string& s)
std::string robust_to_utf8(sv s)
{
std::string ret;
try{
@ -39,14 +38,17 @@ std::string robust_to_utf8(const std::string& s)
{
ret = to_utf8("ISO-8859-1", s);
}
// NUL bytes confuse C code, especially the Engine.
ret.erase( std::remove(ret.begin(), ret.end(), '\0'), ret.end() );
return ret;
}
static void add_opt_field(message* msg, const sv& name, const std::string& value)
static void add_opt_field(message* msg, const sv& name, sv value)
{
const std::string nfc_name = robust_to_utf8(std::string{name}); // TODO: use views/ranges to avoid copying
const std::string nfc_value = robust_to_utf8(std::string{value}); // TODO: use views/ranges to avoid copying
const std::string nfc_name = robust_to_utf8(name); // TODO: use views/ranges to avoid copying
const std::string nfc_value = robust_to_utf8(value); // TODO: use views/ranges to avoid copying
auto sp = new_stringpair( nfc_name.c_str(), nfc_value.c_str() );
auto f = stringpair_list_add( msg->opt_fields, sp);
@ -83,7 +85,7 @@ struct Discard : public HeaderBase
{
Discard() : HeaderBase(sv{}) {}
virtual void assign(message* msg, const std::string& s) override
virtual void assign(message* msg, sv s) override
{
// Do nothing, intentionally. So this header is discarded.
}
@ -118,14 +120,14 @@ template<class TM>
struct SimpleHeader : public OutputHeader<TM>
{
typedef OutputHeader<TM> Base;
TM (*in_fn)(const std::string&);
TM (*in_fn)(sv);
SimpleHeader(sv name, TM message::* m, TM(*in)(const std::string&), void(*out)(std::string&, const TM&, bool))
SimpleHeader(sv name, TM message::* m, TM(*in)(sv), void(*out)(std::string&, const TM&, bool))
: Base(name, m, out)
, in_fn{in}
{}
virtual void assign(message* msg, const std::string& s) override
virtual void assign(message* msg, sv s) override
{
msg->*Base::member = (*in_fn)(s);
}
@ -137,7 +139,7 @@ struct SimpleHeader : public OutputHeader<TM>
template<class TM, class TP>
struct RuleHeader : public OutputHeader<TM>
{
typedef qi::rule<std::string::const_iterator, TP()> rule_t;
typedef qi::rule<sv::const_iterator, TP()> rule_t;
typedef OutputHeader<TM> Base;
TM (*in_fn)(const TP&);
@ -149,12 +151,13 @@ struct RuleHeader : public OutputHeader<TM>
, rule{r}
{}
virtual void assign(message* msg, const std::string& s) override
virtual void assign(message* msg, sv s) override
{
std::string::const_iterator begin=s.begin();
sv ss(s);
sv::const_iterator begin=ss.begin();
TP t1;
// LOG << "<TRY TO PARSE \"" << s << "\" as " << typeid(T1).name() << ">\n";
if( qi::parse(begin, s.end(), rule, t1) )
if( qi::parse(begin, ss.end(), rule, t1) )
{
// LOG << "<ASSIGN OK>\n";
msg->*Base::member = (*in_fn)(t1);
@ -175,7 +178,7 @@ void just_copy(std::string& out, const std::string& value, bool transport_encode
// header known by RFC or convention but that are stored only in msg->opt_fields
struct AuxHeader : public HeaderBase
{
typedef qi::rule<std::string::const_iterator, std::string()> rule_t;
// typedef qi::rule<sv::const_iterator, std::string()> rule_t;
typedef void (*out_fn_t)(std::string&, const std::string& value, bool transport_encode);
out_fn_t out_fn;
@ -185,7 +188,7 @@ struct AuxHeader : public HeaderBase
, out_fn{out}
{}
virtual void assign(message* msg, const std::string& s) override
virtual void assign(message* msg, sv s) override
{
add_opt_field(msg, HeaderBase::name(), s);
}
@ -209,13 +212,13 @@ struct AuxHeader : public HeaderBase
// trampoline function templatebecause we have no template parameter type deduction for classes, until C++17. :-/
template<class TM>
std::pair<const std::string, HeaderBase*> P(sv name, TM message::* member, TM(*func)(const std::string&))
std::pair<const std::string, HeaderBase*> P(sv name, TM message::* member, TM(*func)(sv))
{
return std::make_pair(ascii_tolower(name), new SimpleHeader<TM>(name, member, func, nullptr));
}
template<class TM, class TP>
std::pair<const std::string, HeaderBase*> P(sv name, TM message::* member, const qi::rule<std::string::const_iterator, TP()>& rule, TM (*func)(const TP&))
std::pair<const std::string, HeaderBase*> P(sv name, TM message::* member, const qi::rule<sv::const_iterator, TP()>& rule, TM (*func)(const TP&))
{
return std::make_pair(ascii_tolower(name), new RuleHeader<TM, TP>(name, member, rule, func, nullptr));
}
@ -230,6 +233,7 @@ std::pair<const std::string, HeaderBase*> PAUX( HeaderBase* hb)
// make sure the C string is allocated on the correct heap, especially on MS Windows. *sigh*
char* copy_string(const std::string& s)
{
LOG << "COPY_STRING: “" << s << "”.\n";
const std::string nfc = robust_to_utf8(s); // TODO: use views/ranges to avoid copying
return new_string(nfc.c_str(), nfc.size());
}
@ -239,7 +243,7 @@ const HeadersMap headers
{
P("Date", &message::recv , &parse_timestamp) ,
P("Message-Id", &message::id , message_id, &copy_string),
P("Subject", &message::shortmsg, phrase , &copy_string),
P("Subject", &message::shortmsg, unstructured , &copy_string),
P("From", &message::from, &parse_address ),
P("To", &message::to, &parse_address_list),
P("Cc", &message::cc, &parse_address_list),
@ -254,6 +258,7 @@ const HeadersMap headers
PAUX( new AuxHeader("Content-Type", nullptr) ),
PAUX( new AuxHeader("Content-Disposition", nullptr) ),
PAUX( new AuxHeader("Content-Transfer-Encoding", nullptr) ),
};

@ -14,7 +14,7 @@ namespace pEpMIME
{
HeaderBase(sv _name) : m_name(_name) {}
virtual ~HeaderBase() = default;
virtual void assign(message* msg, const std::string&) = 0;
virtual void assign(message* msg, sv) = 0;
virtual void output(std::string& out, const message* msg, bool transport_encode) = 0;
sv name() const { return m_name; }

@ -98,6 +98,12 @@ Message::Message(const BodyLines& lines)
}
sv Message::boundary() const
{
return header_value(mh.tparams, "boundary");
}
std::ostream& operator<<(std::ostream& o, const Message& m)
{
o << "Message: " << m.headers.size() << " header lines:\n";

@ -16,6 +16,9 @@ namespace pEpMIME
HeaderSection headers;
MimeHeaders mh;
BodyLines body;
// only set for multipart bodies
sv boundary() const;
};
std::ostream& operator<<(std::ostream&, const Message& m);

@ -84,10 +84,10 @@ void ContentType::tolower()
Rule token = +( vchar.alias() - qi::char_("]()<>@,;:\\\"/?=["));
TRule<NameValue> parameter = token >> '=' >> (token | quoted_string.alias());
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter);
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> -qi::lit(';') >> qi::omit[*cfws] >> parameter);
const qi::symbols<char, content_disposition_type> disposition_type(
std::vector<std::string>{"attachment", "inline"},
std::vector<sv>{"attachment", "inline"},
std::vector<content_disposition_type>{ PEP_CONTENT_DISP_ATTACHMENT, PEP_CONTENT_DISP_INLINE}, "disposition_type"
);
@ -115,10 +115,10 @@ TRule<Rfc2231ParamValue> param_value =
>> ( +(ext_octet | attrib_char))[ &(_val)->*&Rfc2231ParamValue::value <<= _1 ];
static std::string convert(std::string& charset, const std::string& input)
static std::string convert(std::string& charset, sv input)
{
Rfc2231ParamValue pv;
std::string::const_iterator begin = input.begin();
sv::const_iterator begin = input.begin();
if(qi::parse(begin, input.end(), param_value, pv))
{
if(pv.charset.size())
@ -141,8 +141,9 @@ static void unwrap(std::vector<NameValue>& params)
{
LOG << "UW: " << p << " : ";
Rfc2231ParamName pn;
std::string::const_iterator begin = p.name.cbegin();
if(qi::parse(begin, p.name.cend(), param_name, pn))
sv pname{ p.name };
sv::const_iterator begin = pname.cbegin();
if(qi::parse(begin, pname.cend(), param_name, pn))
{
ascii_tolower(pn.name);
LOG << " RFC2231. ext_value=" << pn.ext_value << ", count=" << pn.count << ".\n";
@ -200,12 +201,20 @@ std::ostream& operator<<(std::ostream& o, const ContentType& ct)
return o << "CT:{" << ct.type << "/" << ct.subtype << ". params=" << ct.tparams << " } ";
}
std::ostream& operator<<(std::ostream& o, const ContentDisposition& cd)
{
return o << "CD:{" << cd.dispo_type << ". params=" << cd.dparams << " } ";
}
std::ostream& operator<<(std::ostream& o, const MimeHeaders& mh)
{
return o << "MH { " << static_cast<const ContentType&>(mh) << "\n"
"\t " << static_cast<const ContentDisposition&>(mh) << "\n"
"\t transfer_encoding: " << mh.transfer_encoding << "\n"
"}";
}
// for "7bit", "8bit" or "binary"
static char* identity_decode(const BodyLines& bl, size_t& output_size)
@ -253,7 +262,7 @@ static char* qp_decode(const BodyLines& bl, size_t& output_size)
}
MimeHeaders::Decoder getDecoder(const std::string& transfer_encoding)
MimeHeaders::Decoder getDecoder(sv transfer_encoding)
{
if(transfer_encoding == "base64")
{
@ -269,12 +278,11 @@ MimeHeaders::Decoder getDecoder(const std::string& transfer_encoding)
ContentType::ContentType(sv header_line)
{
const std::string cts = header_line.to_string();
auto begin = cts.cbegin();
const bool okay = qi::parse(begin, cts.