Browse Source

start to implement MIME body parsers and implement RFC 2231...

afl-fuzzing
Roker 3 years ago
parent
commit
076418ad22
3 changed files with 154 additions and 73 deletions
  1. +141
    -12
      src/bodyparser.cc
  2. +7
    -61
      src/pEpMIME_internal.hh
  3. +6
    -0
      src/rules.hh

+ 141
- 12
src/bodyparser.cc View File

@ -3,13 +3,20 @@
#include "rules.hh"
#include "base64.hxx"
#include "quoted_printable.hxx"
#include "string_case.hh"
#include "to_utf8.hh"
#include <pEp/pEp_string.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
using qi::_val;
using qi::_1;
struct ContentType
{
@ -17,6 +24,19 @@ namespace qi = boost::spirit::qi;
std::string subtype;
std::vector<pEpMIME::NameValue> params;
void tolower(); // only for ASCII chars, but that's sufficient here.
void unwrap(); // reverses the wrapping of overlong (andtherefore split) parameter values.
void sanitize()
{
tolower();
unwrap();
}
};
struct Rfc2231ParamName
{
std::string name;
unsigned count;
bool with_charset;
};
BOOST_FUSION_ADAPT_STRUCT(
@ -26,6 +46,15 @@ namespace qi = boost::spirit::qi;
(std::vector<pEpMIME::NameValue>, params)
)
/*
BOOST_FUSION_ADAPT_STRUCT(
Rfc2231ParamName,
(std::string, name)
(unsigned, count)
(bool, with_charset)
)
*/
// that boost::fusion magic seems to work only in the actual TU
// so it has to be defined here, instead of at the end of pEpMIME_internal.cc *sigh*
BOOST_FUSION_ADAPT_STRUCT(
@ -52,6 +81,30 @@ void ContentType::tolower()
ascii_tolower(subtype);
}
pEpMIME::TRule<Rfc2231ParamName> param_name =
(+(qi::char_ - '*')) [ &(_val)->*&Rfc2231ParamName::name <<= _1 ]
>> -(qi::lit('*') >> qi::uint_[ &(_val)->*&Rfc2231ParamName::count = _1] )
>> -(qi::lit('*')[ &(_val)->*&Rfc2231ParamName::with_charset = true] );
void ContentType::unwrap()
{
std::vector<pEpMIME::NameValue> params;
std::string name, value;
for(const auto& p:params)
{
Rfc2231ParamName pn;
auto begin = p.name.begin();
if(qi::parse(begin, p.name.end(), param_name, pn))
{
if(pn.name == name)
{
value += p.value; // TODO: decode encoded values!
}
}
}
}
std::ostream& operator<<(std::ostream& o, const ContentType& ct)
{
return o << "CT:{" << ct.type << "/" << ct.subtype << ". params=" << ct.params << " } ";
@ -61,6 +114,17 @@ std::ostream& operator<<(std::ostream& o, const ContentType& ct)
namespace pEpMIME
{
typedef char* (*Decoder)(const BodyLines&, size_t&);
// for "7bit", "8bit" or "binary"
char* identity_decode(const BodyLines& bl, size_t& output_size)
{
const sv body = combineLines(bl);
output_size = body.size();
return new_string(body.data(), body.size());
}
char* base64_decode(const BodyLines& bl, size_t& output_size)
{
size_t out_size = 0;
@ -69,33 +133,43 @@ char* base64_decode(const BodyLines& bl, size_t& output_size)
out_size += (line.size()+3)/4 * 3;
}
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, out_size);
char* out_begin = out_string;
char* out_end = out_string + out_size;
base64::decode_iter( BodyIterator{bl}, BodyIterator{}, out_begin, out_end);
base64::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
char* qp_decode(const BodyLines& bl, size_t& output_size)
{
size_t out_size = 0;
for(const auto& line : bl)
{
out_size += line.size();
}
const sv body = combineLines(bl);
char* out_string = new_string(nullptr, out_size);
char* out_string = new_string(nullptr, body.size());
char* out_begin = out_string;
char* out_end = out_string + out_size;
char* out_end = out_string + body.size();
qp::decode_iter( BodyIterator{bl}, BodyIterator{}, out_begin, out_end);
qp::decode_iter( body.begin(), body.end(), out_begin, out_end);
output_size = out_begin - out_string;
return out_string;
}
Decoder getDecoder(const sv transfer_encoding)
{
if(transfer_encoding == "base64")
{
return &base64_decode;
}else if(transfer_encoding == "quoted-printable")
{
return &qp_decode;
}
return &identity_decode;
}
// Tokens from RFC 2045
Rule token = +( vchar - qi::char_("]()<>@,;:\\\"/?=["));
@ -104,8 +178,31 @@ TRule<NameValue> parameter = token >> '=' >> (token | quoted_string.alias());
TRule<ContentType> content_type = token >> '/' >> token >> *( qi::omit[*cfws] >> ';' >> qi::omit[*cfws] >> parameter);
char* create_string(const BodyLines& body, const sv& charset, Decoder decoder)
{
size_t decoded_size = 0;
char* decoded = decoder(body, decoded_size);
if(charset=="UTF-8" || charset=="UTF8")
{
return decoded; // fine. :-)
}else{
// Sigh, the hard way. At the moment with a lot of unecessary copying. :-/
// Rule1: Make it work. Profile. Make it fast. In this order.
const std::string converted = to_utf8(charset, std::string(decoded, decoded+decoded_size) ); // 1st copy...
return new_string( converted.data(), converted.size() ); // copy again. :'-(
}
}
void add_attachment(message* msg, const BodyLines& body, const ContentType& ct, Decoder decoder)
{
throw "Unimplemented!";
}
// parses the header and fill the parts in msg
void parse_body(message* msg, const HeaderSection& headers, const std::deque<sv>& body)
void parse_body(message* msg, const HeaderSection& headers, const BodyLines& body)
{
if( header_value(headers, "MIME-Version") == "1.0" ) // TODO: According to RFC 2048 there can be comments in the header field value. -.-
{
@ -119,11 +216,43 @@ void parse_body(message* msg, const HeaderSection& headers, const std::deque<sv>
{
throw std::runtime_error( "Cannot parse \"" + std::string{cts} + "\" as ContentType");
}
ct.tolower();
ct.sanitize();
std::cerr << ct << std::endl;
if(ct.type == "text")
{
const sv charset = header_value( ct.params, "charset" );
Decoder decoder = getDecoder( header_value( headers, "Content-Transfer-Encoding" ) );
if(ct.subtype == "plain")
{
// put it in msg->longmsg
msg->longmsg = create_string(body, charset, decoder);
}else if(ct.subtype=="html")
{
// put it in msg->longmsg_formatted
msg->longmsg_formatted = create_string(body, charset, decoder);
}else{
// add it as attachment
add_attachment(msg, body, ct, decoder);
}
}
}else{ // Non-MIME mail
std::cerr << "<<< NO_MIME_MAIL >>>\n";
size_t body_size = 0;
for(const auto& line : body)
{
body_size += line.size() + 2;
}
char* pbody = msg->shortmsg = new_string(nullptr, body_size);
for(const auto& line : body)
{
memcpy(pbody, line.data(), line.size());
pbody += line.size();
memcpy(pbody, "\r\n", 2);
pbody += 2;
}
}
}


+ 7
- 61
src/pEpMIME_internal.hh View File

@ -39,73 +39,19 @@ namespace pEpMIME
typedef std::deque<sv> BodyLines;
struct BodyIterator
{
// "end" iterator
BodyIterator()
: body{nullptr}
{}
// "begin" iterator
BodyIterator(const BodyLines& _body)
: body{&_body}
, line_iter{body->cbegin()}
, char_iter{line_iter->cbegin()}
{}
const char operator*() const
{
return body ? *char_iter : '\0';
}
BodyIterator& operator++()
{
if(body)
{
++char_iter;
if(char_iter == line_iter->end())
{
++line_iter;
if(line_iter == body->end())
{
body = nullptr;
}else{
char_iter = line_iter->begin();
}
}
}
return *this;
}
const BodyLines* body;
BodyLines::const_iterator line_iter;
sv::const_iterator char_iter;
};
inline
bool operator==(const BodyIterator& a, const BodyIterator& b)
sv combineLines(const BodyLines& body)
{
if(a.body == b.body)
if(body.empty())
{
if(a.body == nullptr)
{
return true;
}else{
return a.line_iter == b.line_iter && a.char_iter == b.char_iter;
}
return sv{};
}
return false;
}
inline
bool operator!=(const BodyIterator& a, const BodyIterator& b)
{
return !(a==b);
const char* begin = body.front().begin();
const char* end = body.back().end();
return sv{begin, static_cast<size_t>(end-begin)};
}
}
} // end of namespace pEpMIME
//template<class T>
//std::ostream& operator<<(std::ostream&, const std::vector<T>& v);


+ 6
- 0
src/rules.hh View File

@ -5,6 +5,12 @@
namespace qi = boost::spirit::qi;
// px::_a = px::_1 does not work from vector<char> to string. So this helps:
namespace std
{
string& operator<<=(string& s, const vector<char>& v);
}
namespace pEpMIME
{


Loading…
Cancel
Save