split bodyparser.cc into separate translation unit: mime_headers.{hh,cc}

afl-fuzzing
Roker 4 years ago
parent 680e8a1ab4
commit 05947792c4

@ -17,7 +17,7 @@ all: libpEpMIME.a unittests fuzz
libpEpMIME.a: pEpMIME.o pEpMIME_internal.o rules.o bodyparser.o \
headerparser.o parse_timestamp.o parse_address.o nulllogger.o \
base64.o nfc.o nfc_sets.o to_utf8.o quoted_printable.o
base64.o nfc.o mime_headers.o nfc_sets.o to_utf8.o quoted_printable.o
${AR} rcs $@ $^
unittests: unittest_mime.o unittest_nfc.o unittest_timestamp.o \

@ -1,5 +1,6 @@
#include "bodyparser.hh"
#include "pEpMIME_internal.hh"
#include "mime_headers.hh"
#include "rules.hh"
#include "base64.hxx"
#include "quoted_printable.hxx"
@ -19,261 +20,12 @@ namespace px = boost::phoenix;
using qi::_val;
using qi::_1;
struct ContentType
{
std::string type;
std::string subtype;
std::vector<pEpMIME::NameValue> params;
void tolower(); // only for ASCII chars, but that's sufficient here.
void unwrap(); // reverses the wrapping of overlong (andtherefore split) parameter values.
void sanitize()
{
tolower();
if(type.empty()) { type = "text"; subtype="plain"; }
unwrap();
}
};
struct ContentDisposition
{
content_disposition_type type;
std::string filename;
};
struct Content
{
Content(const HeaderSection& headers);
ContentType type;
ContentDisposition dispo;
std::string transfer_encoding;
};
struct Rfc2231ParamName
{
std::string name;
int count = -1;
bool ext_value = false; // extended value: charset'language'encoded_value
};
struct Rfc2231ParamValue
{
std::string charset;
// language is ignored
std::string value;
};
BOOST_FUSION_ADAPT_STRUCT(
ContentType,
(std::string, type)
(std::string, subtype)
(std::vector<pEpMIME::NameValue>, params)
)
/*
BOOST_FUSION_ADAPT_STRUCT(
Rfc2231ParamName,
(std::string, name)
(unsigned, count)
(bool, with_charset)
)
*/
// that boost::fusion magic seems to work only in the actual TU
// so it has to be defined here, instead of at the end of pEpMIME_internal.cc *sigh*
BOOST_FUSION_ADAPT_STRUCT(
pEpMIME::NameValue,
(std::string, name)
(std::string, value)
)
void ContentType::tolower()
{
pEpMIME::ascii_tolower(type);
pEpMIME::ascii_tolower(subtype);
}
qi::uint_parser<unsigned char, 16,2,2> hex_octet;
pEpMIME::TRule<char> ext_octet = qi::lit('%') >> hex_octet;
pEpMIME::TRule<char> attrib_char = qi::ascii::print - qi::char_("]*'%()<>@,:\\\"/?=[");
pEpMIME::TRule<Rfc2231ParamName> param_name =
(+(qi::char_ - '*')) [ &(_val)->*&Rfc2231ParamName::name <<= _1 ]
>> -(qi::lit('*') >> qi::uint_[ &(_val)->*&Rfc2231ParamName::count = _1] )
>> -(qi::lit('*')[ &(_val)->*&Rfc2231ParamName::ext_value = true] );
pEpMIME::TRule<Rfc2231ParamValue> param_value =
-qi::hold[
(+qi::char_("A-Za-z0-9_./-"))[ &(_val)->*&Rfc2231ParamValue::charset <<= _1 ]
>> '\''
>> qi::omit[ *(qi::char_ - '\'') ] // language is ignored
>> '\''
] // charset & language is optional and normally only present in the 1st part
>> ( +(ext_octet | attrib_char))[ &(_val)->*&Rfc2231ParamValue::value <<= _1 ];
std::string convert(std::string& charset, const std::string& input)
{
Rfc2231ParamValue pv;
std::string::const_iterator begin = input.begin();
if(qi::parse(begin, input.end(), param_value, pv))
{
if(pv.charset.size())
{
charset = pv.charset;
}
return to_utf8(charset, pv.value);
}
return to_utf8(charset, input);
}
void ContentType::unwrap()
{
std::vector<pEpMIME::NameValue> new_params;
std::string ml_name, ml_value; // multiline parameters
std::string charset = "UTF-8";
int old_count = -1;
for(auto& p:params)
{
Rfc2231ParamName pn;
std::string::const_iterator begin = p.name.cbegin();
if(qi::parse(begin, p.name.cend(), param_name, pn))
{
const std::string& value = pn.ext_value ? convert(charset, p.value ) : p.value;
switch(pn.count)
{
case -1 : // has charset but no multi-line value
new_params.emplace_back( pn.name, value );
break;
case 0 : // start of a multi-line value
if(!ml_name.empty())
{
new_params.emplace_back( ml_name, ml_value);
}
ml_name = pn.name;
ml_value = value;
old_count = 0;
break;
default:
if(pn.name == ml_name && pn.count == old_count+1)
{
ml_value += value;
old_count = pn.count;
}else{
// non-contiguous counter -> discard it.
}
break;
}
}else{
if(!ml_name.empty())
{
new_params.emplace_back( ml_name, ml_value);
ml_name.clear(); ml_value.clear();
}
// "legacy" parameter:
new_params.emplace_back( std::move(p) );
}
}
params.swap(new_params);
}
std::ostream& operator<<(std::ostream& o, const ContentType& ct)
{
return o << "CT:{" << ct.type << "/" << ct.subtype << ". params=" << ct.params << " } ";
}
Content::Content(HeaderSections& headers)
{
const std::string cts = header_value(headers, "content-type").to_string();
auto begin = cts.cbegin();
const bool okay = qi::parse(begin, cts.cend(), content_type, t);
if(!okay)
{
LOG << "Cannot parse \"" + std::string{cts} + "\" as ContentType.\n";
}
LOG << "<<< CT raw: " << ct << ">>>\n";
ct.sanitize();
LOG << "<<< CT san: " << ct << ">>>\n";
const std::string cds = header_value(headers, "content-disposition").to_string();
}
namespace pEpMIME
{
typedef char* (*Decoder)(const BodyLines&, size_t&);
// for "7bit", "8bit" or "binary"
char* identity_decode(const BodyLines& bl, size_t& output_size)
{
const sv body = combineLines(bl);
output_size = body.size();
return new_string(body.data(), body.size());
}
char* base64_decode(const BodyLines& bl, size_t& output_size)
{
size_t out_size = 0;
for(const auto& line : bl)
{
out_size += (line.size()+3)/4 * 3;
}
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, out_size);
char* out_begin = out_string;
char* out_end = out_string + out_size;
base64::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
char* qp_decode(const BodyLines& bl, size_t& output_size)
{
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, body.size());
char* out_begin = out_string;
char* out_end = out_string + body.size();
qp::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
Decoder getDecoder(const sv transfer_encoding)
{
if(transfer_encoding == "base64")
{
return &base64_decode;
}else if(transfer_encoding == "quoted-printable")
{
return &qp_decode;
}
return &identity_decode;
}
// Tokens from RFC 2045
Rule token = +( vchar.alias() - qi::char_("]()<>@,;:\\\"/?=["));
TRule<NameValue> parameter = token >> '=' >> (token | quoted_string.alias());
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter);
char* create_string(const BodyLines& body, const sv& charset, Decoder decoder)
char* create_string(const BodyLines& body, const sv& charset, MimeHeaders::Decoder decoder)
{
size_t decoded_size = 0;
char* decoded = decoder(body, decoded_size);
@ -289,11 +41,18 @@ char* create_string(const BodyLines& body, const sv& charset, Decoder decoder)
}
void add_attachment(message* msg, const BodyLines& body, const ContentType& ct, Decoder decoder)
void add_attachment(message* msg, const BodyLines& body, const MimeHeaders& mh)
{
size_t decoded_size = 0;
char* decoded = decoder(body, decoded_size);
bloblist_t* bl = bloblist_add(msg->attachments, decoded, decoded_size, (ct.type + '/' + ct.subtype).c_str(), "dummy.bin");
char* decoded = mh.decoder(body, decoded_size);
sv filename = header_value(mh.dparams, "filename");
if(filename.empty()) // no "filename" field in Content-Disposition?
{
filename = header_value(mh.tparams, "name"); // legacy: use "name" field from Content-Type header
}
const std::string content_type = mh.type + '/' + mh.subtype;
bloblist_t* bl = bloblist_add(msg->attachments, decoded, decoded_size, content_type.c_str(), (filename.empty()? nullptr : filename.data()) );
if(msg->attachments==nullptr)
{
msg->attachments = bl;
@ -305,27 +64,26 @@ void add_attachment(message* msg, const BodyLines& body, const ContentType& ct,
void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body)
{
const std::string mime_version = header_value(headers, "mime-version").to_string();
Content c(headers);
MimeHeaders mh(headers);
if( mime_version == "1.0" ) // TODO: According to RFC 2048 there can be comments in the header field value. -.-
{
// TODO: for whatever reason "string_view cts" does not work with qi::parse(). WTF!
if(c.type.type == "text")
if(mh.type == "text")
{
const sv charset = header_value( ct.params, "charset" );
Decoder decoder = getDecoder( header_value( headers, "content-transfer-encoding" ) );
if(c.type.subtype == "plain")
const sv charset = header_value( mh.tparams, "charset" );
if(mh.subtype == "plain")
{
// put it in msg->longmsg
msg->longmsg = create_string(body, charset, decoder);
}else if(ct.subtype=="html")
msg->longmsg = create_string(body, charset, mh.decoder);
}else if(mh.subtype=="html")
{
// put it in msg->longmsg_formatted
msg->longmsg_formatted = create_string(body, charset, decoder);
msg->longmsg_formatted = create_string(body, charset, mh.decoder);
}else{
// add it as attachment
add_attachment(msg, body, ct, decoder);
add_attachment(msg, body, mh);
}
}

@ -0,0 +1,291 @@
#include "mime_headers.hh"
#include "rules.hh"
#include "base64.hxx"
#include "quoted_printable.hxx"
#include "string_case.hh"
#include "nfc.hh"
#include "to_utf8.hh"
#include <pEp/pEp_string.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
using qi::_val;
using qi::_1;
namespace pEpMIME
{
struct Rfc2231ParamName
{
std::string name;
int count = -1;
bool ext_value = false; // extended value: charset'language'encoded_value
};
struct Rfc2231ParamValue
{
std::string charset;
// language is ignored
std::string value;
};
}
BOOST_FUSION_ADAPT_STRUCT(
pEpMIME::ContentType,
(std::string, type)
(std::string, subtype)
(std::vector<pEpMIME::NameValue>, tparams)
)
BOOST_FUSION_ADAPT_STRUCT(
pEpMIME::ContentDisposition,
(content_disposition_type, dispo_type)
(std::vector<pEpMIME::NameValue>, dparams)
)
/*
BOOST_FUSION_ADAPT_STRUCT(
Rfc2231ParamName,
(std::string, name)
(unsigned, count)
(bool, with_charset)
)
*/
// that boost::fusion magic seems to work only in the actual TU
// so it has to be defined here, instead of at the end of pEpMIME_internal.cc *sigh*
BOOST_FUSION_ADAPT_STRUCT(
pEpMIME::NameValue,
(std::string, name)
(std::string, value)
)
namespace pEpMIME
{
void ContentType::tolower()
{
ascii_tolower(type);
ascii_tolower(subtype);
}
// Tokens from RFC 2045
Rule token = +( vchar.alias() - qi::char_("]()<>@,;:\\\"/?=["));
TRule<NameValue> parameter = token >> '=' >> (token | quoted_string.alias());
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter);
const qi::symbols<char, content_disposition_type> disposition_type(
std::vector<std::string>{"attachment", "inline"},
std::vector<content_disposition_type>{ PEP_CONTENT_DISP_ATTACHMENT, PEP_CONTENT_DISP_INLINE}, "disposition_type"
);
TRule<ContentDisposition> content_disposition = disposition_type >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter);
qi::uint_parser<unsigned char, 16,2,2> hex_octet;
TRule<char> ext_octet = qi::lit('%') >> hex_octet;
TRule<char> attrib_char = qi::ascii::print - qi::char_("]*'%()<>@,:\\\"/?=[");
TRule<Rfc2231ParamName> param_name =
(+(qi::char_ - '*')) [ &(_val)->*&Rfc2231ParamName::name <<= _1 ]
>> -(qi::lit('*') >> qi::uint_[ &(_val)->*&Rfc2231ParamName::count = _1] )
>> -(qi::lit('*')[ &(_val)->*&Rfc2231ParamName::ext_value = true] );
TRule<Rfc2231ParamValue> param_value =
-qi::hold[
(+qi::char_("A-Za-z0-9_./-"))[ &(_val)->*&Rfc2231ParamValue::charset <<= _1 ]
>> '\''
>> qi::omit[ *(qi::char_ - '\'') ] // language is ignored
>> '\''
] // charset & language is optional and normally only present in the 1st part
>> ( +(ext_octet | attrib_char))[ &(_val)->*&Rfc2231ParamValue::value <<= _1 ];
std::string convert(std::string& charset, const std::string& input)
{
Rfc2231ParamValue pv;
std::string::const_iterator begin = input.begin();
if(qi::parse(begin, input.end(), param_value, pv))
{
if(pv.charset.size())
{
charset = pv.charset;
}
return to_utf8(charset, pv.value);
}
return to_utf8(charset, input);
}
void ContentDisposition::unwrap()
{
std::vector<NameValue> new_params;
std::string ml_name, ml_value; // multiline parameters
std::string charset = "UTF-8";
int old_count = -1;
for(auto& p : dparams)
{
Rfc2231ParamName pn;
std::string::const_iterator begin = p.name.cbegin();
if(qi::parse(begin, p.name.cend(), param_name, pn))
{
const std::string& value = pn.ext_value ? convert(charset, p.value ) : p.value;
switch(pn.count)
{
case -1 : // has charset but no multi-line value
new_params.emplace_back( pn.name, value );
break;
case 0 : // start of a multi-line value
if(!ml_name.empty())
{
new_params.emplace_back( ml_name, ml_value);
}
ml_name = pn.name;
ml_value = value;
old_count = 0;
break;
default:
if(pn.name == ml_name && pn.count == old_count+1)
{
ml_value += value;
old_count = pn.count;
}else{
// non-contiguous counter -> discard it.
}
break;
}
}else{
if(!ml_name.empty())
{
new_params.emplace_back( ml_name, ml_value);
ml_name.clear(); ml_value.clear();
}
// "legacy" parameter:
new_params.emplace_back( std::move(p) );
}
}
dparams.swap(new_params);
}
std::ostream& operator<<(std::ostream& o, const ContentType& ct)
{
return o << "CT:{" << ct.type << "/" << ct.subtype << ". params=" << ct.tparams << " } ";
}
std::ostream& operator<<(std::ostream& o, const ContentDisposition& cd)
{
return o << "CD:{" << cd.dispo_type << ". params=" << cd.dparams << " } ";
}
// for "7bit", "8bit" or "binary"
char* identity_decode(const BodyLines& bl, size_t& output_size)
{
const sv body = combineLines(bl);
output_size = body.size();
return new_string(body.data(), body.size());
}
char* base64_decode(const BodyLines& bl, size_t& output_size)
{
size_t out_size = 0;
for(const auto& line : bl)
{
out_size += (line.size()+3)/4 * 3;
}
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, out_size);
char* out_begin = out_string;
char* out_end = out_string + out_size;
base64::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
char* qp_decode(const BodyLines& bl, size_t& output_size)
{
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, body.size());
char* out_begin = out_string;
char* out_end = out_string + body.size();
qp::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
MimeHeaders::Decoder getDecoder(const std::string& transfer_encoding)
{
if(transfer_encoding == "base64")
{
return &base64_decode;
}else if(transfer_encoding == "quoted-printable")
{
return &qp_decode;
}
return &identity_decode;
}
ContentType::ContentType(sv header_line)
{
const std::string cts = header_line.to_string();
auto begin = cts.cbegin();
const bool okay = qi::parse(begin, cts.cend(), content_type, *this);
if(!okay)
{
LOG << "Cannot parse \"" + std::string{cts} + "\" as ContentType.\n";
}
LOG << "<<< CT raw: " << *this << ">>>\n";
this->sanitize();
LOG << "<<< CT san: " << *this << ">>>\n";
}
ContentDisposition::ContentDisposition(sv header_line)
{
const std::string cds = header_line.to_string();
auto begin = cds.cbegin();
const bool okay = qi::parse(begin, cds.cend(), content_disposition, *this);
if(!okay)
{
LOG << "Cannot parse \"" + std::string{cds} + "\" as ContentDisposition.\n";
}
LOG << "<<< CD raw: " << *this << ">>>\n";
this->unwrap();
LOG << "<<< CD uw: " << *this << ">>>\n";
}
MimeHeaders::MimeHeaders(const HeaderSection& headers)
: ContentType{ header_value(headers, "content-type") }
, ContentDisposition{ header_value(headers, "content-disposition" ) }
, transfer_encoding{ header_value(headers, "content-transfer-encoding") }
, decoder{ getDecoder( transfer_encoding ) }
{
}
} // end of namespace pEpMIME

@ -0,0 +1,46 @@
#ifndef PEP_MIME_MIME_HEADERS_HH
#define PEP_MIME_MIME_HEADERS_HH
#include "pEpMIME_internal.hh"
namespace pEpMIME
{
struct ContentType
{
//ContentType() = default;
ContentType(sv header_line);
std::string type;
std::string subtype;
std::vector<NameValue> tparams;
void tolower(); // only for ASCII chars, but that's sufficient here.
void sanitize()
{
tolower();
if(type.empty()) { type = "text"; subtype="plain"; }
}
};
struct ContentDisposition
{
ContentDisposition(sv header_line);
content_disposition_type dispo_type;
std::vector<NameValue> dparams;
void unwrap(); // reverses the wrapping of overlong (andtherefore split) parameter values.
};
struct MimeHeaders : public ContentType, public ContentDisposition
{
typedef char* (*Decoder)(const BodyLines&, size_t&);
MimeHeaders(const HeaderSection& headers);
std::string transfer_encoding;
Decoder decoder;
};
} // end of namespace pEpMIME
#endif // PEP_MIME_MIME_HEADERS_HH
Loading…
Cancel
Save