|
|
@ -1,5 +1,6 @@ |
|
|
|
#include "bodyparser.hh"
|
|
|
|
#include "pEpMIME_internal.hh"
|
|
|
|
#include "mime_headers.hh"
|
|
|
|
#include "rules.hh"
|
|
|
|
#include "base64.hxx"
|
|
|
|
#include "quoted_printable.hxx"
|
|
|
@ -19,261 +20,12 @@ namespace px = boost::phoenix; |
|
|
|
using qi::_val; |
|
|
|
using qi::_1; |
|
|
|
|
|
|
|
struct ContentType |
|
|
|
{ |
|
|
|
std::string type; |
|
|
|
std::string subtype; |
|
|
|
std::vector<pEpMIME::NameValue> params; |
|
|
|
void tolower(); // only for ASCII chars, but that's sufficient here.
|
|
|
|
void unwrap(); // reverses the wrapping of overlong (andtherefore split) parameter values.
|
|
|
|
void sanitize() |
|
|
|
{ |
|
|
|
tolower(); |
|
|
|
if(type.empty()) { type = "text"; subtype="plain"; } |
|
|
|
unwrap(); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
struct ContentDisposition |
|
|
|
{ |
|
|
|
content_disposition_type type; |
|
|
|
std::string filename; |
|
|
|
}; |
|
|
|
|
|
|
|
struct Content |
|
|
|
{ |
|
|
|
Content(const HeaderSection& headers); |
|
|
|
|
|
|
|
ContentType type; |
|
|
|
ContentDisposition dispo; |
|
|
|
std::string transfer_encoding; |
|
|
|
}; |
|
|
|
|
|
|
|
struct Rfc2231ParamName |
|
|
|
{ |
|
|
|
std::string name; |
|
|
|
int count = -1; |
|
|
|
bool ext_value = false; // extended value: charset'language'encoded_value
|
|
|
|
}; |
|
|
|
|
|
|
|
struct Rfc2231ParamValue |
|
|
|
{ |
|
|
|
std::string charset; |
|
|
|
// language is ignored
|
|
|
|
std::string value; |
|
|
|
}; |
|
|
|
|
|
|
|
BOOST_FUSION_ADAPT_STRUCT( |
|
|
|
ContentType, |
|
|
|
(std::string, type) |
|
|
|
(std::string, subtype) |
|
|
|
(std::vector<pEpMIME::NameValue>, params) |
|
|
|
) |
|
|
|
|
|
|
|
/*
|
|
|
|
BOOST_FUSION_ADAPT_STRUCT( |
|
|
|
Rfc2231ParamName, |
|
|
|
(std::string, name) |
|
|
|
(unsigned, count) |
|
|
|
(bool, with_charset) |
|
|
|
) |
|
|
|
*/ |
|
|
|
|
|
|
|
// that boost::fusion magic seems to work only in the actual TU
|
|
|
|
// so it has to be defined here, instead of at the end of pEpMIME_internal.cc *sigh*
|
|
|
|
BOOST_FUSION_ADAPT_STRUCT( |
|
|
|
pEpMIME::NameValue, |
|
|
|
(std::string, name) |
|
|
|
(std::string, value) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
void ContentType::tolower() |
|
|
|
{ |
|
|
|
pEpMIME::ascii_tolower(type); |
|
|
|
pEpMIME::ascii_tolower(subtype); |
|
|
|
} |
|
|
|
|
|
|
|
qi::uint_parser<unsigned char, 16,2,2> hex_octet; |
|
|
|
|
|
|
|
pEpMIME::TRule<char> ext_octet = qi::lit('%') >> hex_octet; |
|
|
|
pEpMIME::TRule<char> attrib_char = qi::ascii::print - qi::char_("]*'%()<>@,:\\\"/?=["); |
|
|
|
|
|
|
|
pEpMIME::TRule<Rfc2231ParamName> param_name = |
|
|
|
(+(qi::char_ - '*')) [ &(_val)->*&Rfc2231ParamName::name <<= _1 ] |
|
|
|
>> -(qi::lit('*') >> qi::uint_[ &(_val)->*&Rfc2231ParamName::count = _1] ) |
|
|
|
>> -(qi::lit('*')[ &(_val)->*&Rfc2231ParamName::ext_value = true] ); |
|
|
|
|
|
|
|
|
|
|
|
pEpMIME::TRule<Rfc2231ParamValue> param_value = |
|
|
|
-qi::hold[ |
|
|
|
(+qi::char_("A-Za-z0-9_./-"))[ &(_val)->*&Rfc2231ParamValue::charset <<= _1 ] |
|
|
|
>> '\'' |
|
|
|
>> qi::omit[ *(qi::char_ - '\'') ] // language is ignored
|
|
|
|
>> '\'' |
|
|
|
] // charset & language is optional and normally only present in the 1st part
|
|
|
|
>> ( +(ext_octet | attrib_char))[ &(_val)->*&Rfc2231ParamValue::value <<= _1 ]; |
|
|
|
|
|
|
|
std::string convert(std::string& charset, const std::string& input) |
|
|
|
{ |
|
|
|
Rfc2231ParamValue pv; |
|
|
|
std::string::const_iterator begin = input.begin(); |
|
|
|
if(qi::parse(begin, input.end(), param_value, pv)) |
|
|
|
{ |
|
|
|
if(pv.charset.size()) |
|
|
|
{ |
|
|
|
charset = pv.charset; |
|
|
|
} |
|
|
|
return to_utf8(charset, pv.value); |
|
|
|
} |
|
|
|
return to_utf8(charset, input); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void ContentType::unwrap() |
|
|
|
{ |
|
|
|
std::vector<pEpMIME::NameValue> new_params; |
|
|
|
std::string ml_name, ml_value; // multiline parameters
|
|
|
|
std::string charset = "UTF-8"; |
|
|
|
int old_count = -1; |
|
|
|
for(auto& p:params) |
|
|
|
{ |
|
|
|
Rfc2231ParamName pn; |
|
|
|
std::string::const_iterator begin = p.name.cbegin(); |
|
|
|
if(qi::parse(begin, p.name.cend(), param_name, pn)) |
|
|
|
{ |
|
|
|
const std::string& value = pn.ext_value ? convert(charset, p.value ) : p.value; |
|
|
|
switch(pn.count) |
|
|
|
{ |
|
|
|
case -1 : // has charset but no multi-line value
|
|
|
|
new_params.emplace_back( pn.name, value ); |
|
|
|
break; |
|
|
|
case 0 : // start of a multi-line value
|
|
|
|
if(!ml_name.empty()) |
|
|
|
{ |
|
|
|
new_params.emplace_back( ml_name, ml_value); |
|
|
|
} |
|
|
|
ml_name = pn.name; |
|
|
|
ml_value = value; |
|
|
|
old_count = 0; |
|
|
|
break; |
|
|
|
default: |
|
|
|
if(pn.name == ml_name && pn.count == old_count+1) |
|
|
|
{ |
|
|
|
ml_value += value; |
|
|
|
old_count = pn.count; |
|
|
|
}else{ |
|
|
|
// non-contiguous counter -> discard it.
|
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
}else{ |
|
|
|
if(!ml_name.empty()) |
|
|
|
{ |
|
|
|
new_params.emplace_back( ml_name, ml_value); |
|
|
|
ml_name.clear(); ml_value.clear(); |
|
|
|
} |
|
|
|
// "legacy" parameter:
|
|
|
|
new_params.emplace_back( std::move(p) ); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
params.swap(new_params); |
|
|
|
} |
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& o, const ContentType& ct) |
|
|
|
{ |
|
|
|
return o << "CT:{" << ct.type << "/" << ct.subtype << ". params=" << ct.params << " } "; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Content::Content(HeaderSections& headers) |
|
|
|
{ |
|
|
|
const std::string cts = header_value(headers, "content-type").to_string(); |
|
|
|
auto begin = cts.cbegin(); |
|
|
|
const bool okay = qi::parse(begin, cts.cend(), content_type, t); |
|
|
|
if(!okay) |
|
|
|
{ |
|
|
|
LOG << "Cannot parse \"" + std::string{cts} + "\" as ContentType.\n"; |
|
|
|
} |
|
|
|
LOG << "<<< CT raw: " << ct << ">>>\n"; |
|
|
|
ct.sanitize(); |
|
|
|
LOG << "<<< CT san: " << ct << ">>>\n"; |
|
|
|
|
|
|
|
const std::string cds = header_value(headers, "content-disposition").to_string(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
namespace pEpMIME |
|
|
|
{ |
|
|
|
|
|
|
|
typedef char* (*Decoder)(const BodyLines&, size_t&); |
|
|
|
|
|
|
|
// for "7bit", "8bit" or "binary"
|
|
|
|
char* identity_decode(const BodyLines& bl, size_t& output_size) |
|
|
|
{ |
|
|
|
const sv body = combineLines(bl); |
|
|
|
output_size = body.size(); |
|
|
|
return new_string(body.data(), body.size()); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
char* base64_decode(const BodyLines& bl, size_t& output_size) |
|
|
|
{ |
|
|
|
size_t out_size = 0; |
|
|
|
for(const auto& line : bl) |
|
|
|
{ |
|
|
|
out_size += (line.size()+3)/4 * 3; |
|
|
|
} |
|
|
|
|
|
|
|
const sv body = combineLines(bl); |
|
|
|
|
|
|
|
char* out_string = new_string(nullptr, out_size); |
|
|
|
char* out_begin = out_string; |
|
|
|
char* out_end = out_string + out_size; |
|
|
|
|
|
|
|
base64::decode_iter( body.begin(), body.end(), out_begin, out_end); |
|
|
|
output_size = out_begin - out_string; |
|
|
|
return out_string; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
char* qp_decode(const BodyLines& bl, size_t& output_size) |
|
|
|
{ |
|
|
|
const sv body = combineLines(bl); |
|
|
|
|
|
|
|
char* out_string = new_string(nullptr, body.size()); |
|
|
|
char* out_begin = out_string; |
|
|
|
char* out_end = out_string + body.size(); |
|
|
|
|
|
|
|
qp::decode_iter( body.begin(), body.end(), out_begin, out_end); |
|
|
|
output_size = out_begin - out_string; |
|
|
|
return out_string; |
|
|
|
} |
|
|
|
|
|
|
|
Decoder getDecoder(const sv transfer_encoding) |
|
|
|
{ |
|
|
|
if(transfer_encoding == "base64") |
|
|
|
{ |
|
|
|
return &base64_decode; |
|
|
|
}else if(transfer_encoding == "quoted-printable") |
|
|
|
{ |
|
|
|
return &qp_decode; |
|
|
|
} |
|
|
|
|
|
|
|
return &identity_decode; |
|
|
|
} |
|
|
|
|
|
|
|
// Tokens from RFC 2045
|
|
|
|
Rule token = +( vchar.alias() - qi::char_("]()<>@,;:\\\"/?=[")); |
|
|
|
|
|
|
|
TRule<NameValue> parameter = token >> '=' >> (token | quoted_string.alias()); |
|
|
|
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char* create_string(const BodyLines& body, const sv& charset, Decoder decoder) |
|
|
|
char* create_string(const BodyLines& body, const sv& charset, MimeHeaders::Decoder decoder) |
|
|
|
{ |
|
|
|
size_t decoded_size = 0; |
|
|
|
char* decoded = decoder(body, decoded_size); |
|
|
@ -289,11 +41,18 @@ char* create_string(const BodyLines& body, const sv& charset, Decoder decoder) |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void add_attachment(message* msg, const BodyLines& body, const ContentType& ct, Decoder decoder) |
|
|
|
void add_attachment(message* msg, const BodyLines& body, const MimeHeaders& mh) |
|
|
|
{ |
|
|
|
size_t decoded_size = 0; |
|
|
|
char* decoded = decoder(body, decoded_size); |
|
|
|
bloblist_t* bl = bloblist_add(msg->attachments, decoded, decoded_size, (ct.type + '/' + ct.subtype).c_str(), "dummy.bin"); |
|
|
|
char* decoded = mh.decoder(body, decoded_size); |
|
|
|
sv filename = header_value(mh.dparams, "filename"); |
|
|
|
if(filename.empty()) // no "filename" field in Content-Disposition?
|
|
|
|
{ |
|
|
|
filename = header_value(mh.tparams, "name"); // legacy: use "name" field from Content-Type header
|
|
|
|
} |
|
|
|
|
|
|
|
const std::string content_type = mh.type + '/' + mh.subtype; |
|
|
|
bloblist_t* bl = bloblist_add(msg->attachments, decoded, decoded_size, content_type.c_str(), (filename.empty()? nullptr : filename.data()) ); |
|
|
|
if(msg->attachments==nullptr) |
|
|
|
{ |
|
|
|
msg->attachments = bl; |
|
|
@ -305,27 +64,26 @@ void add_attachment(message* msg, const BodyLines& body, const ContentType& ct, |
|
|
|
void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body) |
|
|
|
{ |
|
|
|
const std::string mime_version = header_value(headers, "mime-version").to_string(); |
|
|
|
Content c(headers); |
|
|
|
MimeHeaders mh(headers); |
|
|
|
|
|
|
|
if( mime_version == "1.0" ) // TODO: According to RFC 2048 there can be comments in the header field value. -.-
|
|
|
|
{ |
|
|
|
// TODO: for whatever reason "string_view cts" does not work with qi::parse(). WTF!
|
|
|
|
|
|
|
|
if(c.type.type == "text") |
|
|
|
if(mh.type == "text") |
|
|
|
{ |
|
|
|
const sv charset = header_value( ct.params, "charset" ); |
|
|
|
Decoder decoder = getDecoder( header_value( headers, "content-transfer-encoding" ) ); |
|
|
|
if(c.type.subtype == "plain") |
|
|
|
const sv charset = header_value( mh.tparams, "charset" ); |
|
|
|
if(mh.subtype == "plain") |
|
|
|
{ |
|
|
|
// put it in msg->longmsg
|
|
|
|
msg->longmsg = create_string(body, charset, decoder); |
|
|
|
}else if(ct.subtype=="html") |
|
|
|
msg->longmsg = create_string(body, charset, mh.decoder); |
|
|
|
}else if(mh.subtype=="html") |
|
|
|
{ |
|
|
|
// put it in msg->longmsg_formatted
|
|
|
|
msg->longmsg_formatted = create_string(body, charset, decoder); |
|
|
|
msg->longmsg_formatted = create_string(body, charset, mh.decoder); |
|
|
|
}else{ |
|
|
|
// add it as attachment
|
|
|
|
add_attachment(msg, body, ct, decoder); |
|
|
|
add_attachment(msg, body, mh); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|