From 2fd28dfadc69621b8e04205a75bb0e1c826585b5 Mon Sep 17 00:00:00 2001 From: roker Date: Tue, 27 Apr 2021 17:18:18 +0200 Subject: [PATCH] MIME-15: remove NUL bytes from text bodies, control chars from header values. --- src/bodyparser.cc | 9 +++++++-- src/headerparser.cc | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bodyparser.cc b/src/bodyparser.cc index 33af654..a838d1d 100644 --- a/src/bodyparser.cc +++ b/src/bodyparser.cc @@ -82,11 +82,16 @@ char* create_string(const BodyLines& body, const sv& charset, MimeHeaders::Decod if(charset=="UTF-8" || charset=="UTF8") { + // Move all NUL bytes to the end where they don't hurt. + std::remove(decoded, decoded + decoded_size, '\0' ); return decoded; // fine. :-) }else{ // Sigh, the hard way. At the moment with a lot of unecessary copying. :-/ - // Rule1: Make it work. Profile. Make it fast. In this order. - const std::string converted = to_utf8((charset.empty() ? "us-ascii" : charset), sv(decoded, decoded_size) ); // 1st copy... + // Rule 1: Make it work. Profile. Make it fast. In this order. + std::string converted = to_utf8((charset.empty() ? "us-ascii" : charset), sv(decoded, decoded_size) ); // 1st copy... + + // remove any NUL bytes + converted.erase( std::remove(converted.begin(), converted.end(), '\0'), converted.end() ); pEp_free(decoded); return new_string( converted.data(), converted.size() ); // copy again. :'-( } diff --git a/src/headerparser.cc b/src/headerparser.cc index f2ac76c..790d1ba 100644 --- a/src/headerparser.cc +++ b/src/headerparser.cc @@ -40,7 +40,8 @@ std::string robust_to_utf8(sv s) } // NUL bytes confuse C code, especially the Engine. - ret.erase( std::remove(ret.begin(), ret.end(), '\0'), ret.end() ); + // MIME-15: remove all C0 control characters. + ret.erase( std::remove_if(ret.begin(), ret.end(), [](char c){ return uint8_t(c) < ' '; } ), ret.end() ); return ret; }