IOSAD-218
Krista Bennett 5 years ago
parent 544c863c43
commit deaf46dae9

@ -189,6 +189,141 @@ mailmime_composite_type_parse(const char * message, size_t length,
return res;
}
static void
hex_to_byte(char* retval_byte, const char* hex_bytes) {
*retval_byte = 0;
char curr_char = hex_bytes[0];
if (isdigit(curr_char))
*retval_byte |= curr_char - '0';
else {
curr_char = tolower(curr_char);
if (curr_char >= 'a' && curr_char <= 'f') {
*retval_byte |= (curr_char - 'a') + 10;
}
else {
*retval_byte = 0;
return;
}
}
*retval_byte <<= 4;
curr_char = hex_bytes[1];
if (isdigit(curr_char))
*retval_byte |= curr_char - '0';
else {
curr_char = tolower(curr_char);
if (curr_char >= 'a' && curr_char <= 'f') {
*retval_byte |= (curr_char - 'a') + 10;
}
else {
*retval_byte = 0;
return;
}
}
}
static void
byte_to_hex(char* upper_hex_value, char* lower_hex_value, char byte) {
if (!upper_hex_value || !lower_hex_value) {
*upper_hex_value = F;
*lower_hex_value = F;
return;
}
char upper = 0;
char lower = 0;
char lower_byte = byte & 0xF;
char upper_byte = byte >> 4;
*lower_hex_value = ((lower_byte < 10) ? ('0' + lower_byte) : 'A' + (lower_byte - 10));
*upper_hex_value = ((upper_byte < 10) ? ('0' + upper_byte) : 'A' + (upper_byte - 10));
}
// Required by RFC2231 - src is always a utf-8 string in our case.
LIBETPAN_EXPORT
void mailmime_parm_value_escape(char** dst, const char* src) {
if (!src || !dst)
return;
*dst = NULL;
int number_of_octets = strlen(src);
if (number_of_octets < 1)
return;
const char* ESCAPED_ENCODING_PREFIX = "utf-8''";
const int ESCAPED_ENCODING_PREFIX_LENGTH = 7;
size_t retval_len = ESCAPED_ENCODING_PREFIX_LENGTH + (number_of_octets * 3);
char* unbroken_string = calloc(retval_len + 1); // 8 = utf-8'' + \0
strncpy(unbroken_string, ESCAPED_ENCODING_PREFIX, retval_len);
char* srcend = src + number_of_octets;
char* curr_src_ptr = src;
char* curr_dst_ptr = unbroken_string + ESCAPED_ENCODING_PREFIX_LENGTH;
while (curr_src_ptr < srcend) {
char upper = 0;
char lower = 0;
byte_to_hex(&upper, &lower, *curr_src_ptr);
// detect FF? Is FF even possible? Leave it for now.
*curr_dst_ptr++ = '%';
*curr_dst_ptr++ = upper;
*curr_dst_ptr++ = lower;
curr_src_ptr++;
}
*dst = unbroken_string; // splitting is the caller's responsibility
}
// Required by RFC2231
LIBETPAN_EXPORT
void mailmime_parm_value_unescape(char** dst, const char* src) {
*dst = NULL;
int percent_count = 0;
size_t srclen = strlen(src);
const char* srcpointer = src;
const char* end = src + srclen;
while (srcpointer && srcpointer < end) {
srcpointer = (strstr(srcpointer, "%"));
if (srcpointer) {
percent_count++;
srcpointer++;
}
}
if (percent_count) {
size_t new_len = srclen + percent_count; // - 1 byte for %, + 2 bytes for 2nd hex digit
char* retstr = (char*)calloc(new_len + 1, 1);
char* dstpointer = retstr;
srcpointer = src;
while (*srcpointer && srcpointer < end) {
if (*srcpointer != '%') {
*dstpointer = *srcpointer;
dstpointer++;
srcpointer++;
}
else {
srcpointer++;
if (!(*srcpointer) || (srcpointer + 1) >= end) {
// Badness! Stop!
free(retstr);
return;
}
hex_to_byte(dstpointer, srcpointer);
if (*dstpointer == 0) {
free(retstr);
return;
}
dstpointer++;
srcpointer += 2;
}
}
*dst = retstr;
}
}
/*
x content := "Content-Type" ":" type "/" subtype
*(";" parameter)
@ -1291,6 +1426,69 @@ static int mailmime_type_parse(const char * message, size_t length,
return res;
}
/*
x extended-initial-value := [charset] "'" [language] "'"
x extended-other-values
*/
LIBETPAN_EXPORT
int mailmime_extended_initial_value_parse(const char * message, size_t length,
size_t * indx, char ** result, char** charset, char** language)
{
int r;
char* value = NULL;
size_t value_length = 0;
size_t cur_token = * indx;
r = mailimf_atom_parse(message, length, &cur_token, &value);
if (r != MAILIMF_NO_ERROR)
return r;
if (value)
value_length = strlen(value);
// ok, let's see what happens here...
char* end_charset = strstr(value, "'");
if (end_charset == NULL || (value + value_length <= end_charset + 1)) {
free(value);
return MAILIMF_ERROR_PARSE;
}
char* end_lang = strstr(end_charset + 1, "'");
if (end_lang == NULL || (value + value_length < end_lang)) { // could be empty after
free(value);
return MAILIMF_ERROR_PARSE;
}
size_t charset_len = end_charset - value;
size_t lang_len = end_lang - (end_charset + 1);
size_t retval_len = strlen(value) - (charset_len + lang_len + 2);
char* _charset = calloc(charset_len + 1, 1);
char* _lang = calloc(lang_len + 1, 1);
char* _value = calloc(retval_len + 1, 1);
if (charset_len > 0) {
strncpy(_charset, value, charset_len);
}
if (lang_len > 0) {
strncpy(_lang, end_charset + 1, lang_len);
}
if (retval_len > 0) {
strncpy(_value, end_lang + 1, retval_len);
}
free(value);
* result = _value;
* charset = _charset;
* language = _lang;
* indx = cur_token;
return MAILIMF_NO_ERROR;
}
/*
x value := token / quoted-string
*/

@ -111,6 +111,10 @@ int mailmime_language_parse(const char * message, size_t length,
size_t * indx,
struct mailmime_language ** result);
LIBETPAN_EXPORT
int mailmime_extended_initial_value_parse(const char * message, size_t length,
size_t * indx, char ** result, char** charset, char** language);
#ifdef __cplusplus
}
#endif

@ -39,6 +39,7 @@
#include "mailmime_disposition.h"
#include "mailmime.h"
#include "charconv.h"
#include <ctype.h>
#include <stdlib.h>
@ -422,6 +423,223 @@ mailmime_disposition_parm_parse(const char * message, size_t length,
return res;
}
/*
// filename-parm := "filename" "=" value
*/
static int
mailmime_extended_parm_parse(const char * message, size_t length,
char * key, size_t * indx, char ** result)
{
int r;
size_t cur_token;
char* built_str = NULL;
size_t built_len = 0;
cur_token = * indx;
r = mailimf_token_case_insensitive_parse(message, length,
&cur_token, key);
if (r != MAILIMF_NO_ERROR)
return r;
// Ok, we know it's of this type.
// So let's see if it's encoded or extended or both
int encoded = 0;
int extended = 0;
// Find out if message is extended, encoded, or both
r = mailimf_char_parse(message, length, &cur_token, '*');
if (r == MAILIMF_NO_ERROR) {
r = mailimf_char_parse(message, length, &cur_token, '0');
if (r == MAILIMF_NO_ERROR) {
extended = 1;
r = mailimf_char_parse(message, length, &cur_token, '*');
if (r == MAILIMF_NO_ERROR)
encoded = 1;
else if (r != MAILIMF_ERROR_PARSE)
return r;
}
else if (r != MAILIMF_ERROR_PARSE)
return r;
}
else //if (r != MAILIMF_ERROR_PARSE)
return r;
r = mailimf_unstrict_char_parse(message, length, &cur_token, '=');
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_cfws_parse(message, length, &cur_token);
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE))
return r;
// Ok, let's go.
if (encoded || extended) {
char* _charset = NULL;
char* _lang = NULL;
// Get the first of either
if (encoded) {
r = mailmime_extended_initial_value_parse(message, length, &cur_token, &built_str, &_charset,
&_lang);
if (r != MAILIMF_NO_ERROR)
return r;
}
else if (extended) {
r = mailmime_value_parse(message, length, &cur_token, &built_str);
if (r != MAILIMF_NO_ERROR)
return r;
}
// Ok, we have an initial string and know it's extended, so let's roll.
if (extended && built_str) {
built_len = strlen(built_str);
while (1) {
r = mailimf_unstrict_char_parse(message, length, &cur_token, ';');
if (r != MAILIMF_NO_ERROR && r != MAILIMF_ERROR_PARSE)
return r;
r = mailimf_cfws_parse(message, length, &cur_token);
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE))
return r;
// FIXME: this is where we have to check and see what really happens...
r = mailimf_token_case_insensitive_parse(message, length,
&cur_token, key);
if (r == MAILIMF_ERROR_PARSE)
break;
if (r != MAILIMF_NO_ERROR)
return r;
// Ok, we know it's of this type.
// So let's see if it's encoded or extended or both
// int part_encoded = 0;
// int part_extended = 0;
// Find out if message part is extended, encoded, or both
r = mailimf_char_parse(message, length, &cur_token, '*');
if (r == MAILIMF_NO_ERROR) {
uint32_t part_num = 0;
r = mailimf_number_parse(message, length, &cur_token, &part_num);
if (r == MAILIMF_NO_ERROR) {
// part_extended = 1;
r = mailimf_char_parse(message, length, &cur_token, '*');
// See RFC2231, Section 4.1. FIXME - it's possible to have unencoded parts interspersed
// with encoded post per RFC, so this may not be smart. Depends on if decoding is an issue with
// interspersed ASCII segments.
// However, at this point, we know that the first part of the parameter either contained encoding information,
// or it shouldn't be encoded. Also, it seems very doubtful most clients would go to the trouble of mixing encoded
// and non-encoded information when splitting the string.
// The fix right now is to ignore the encoding flag at this point, as we will either decode the whole string,
// or not at all.
}
else if (r != MAILIMF_ERROR_PARSE)
return r;
}
else if (r != MAILIMF_ERROR_PARSE)
return r;
r = mailimf_unstrict_char_parse(message, length, &cur_token, '=');
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_cfws_parse(message, length, &cur_token);
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE))
return r;
// Ok, let's go.
// if (part_encoded || part_extended) {
char* part_str = NULL;
// See RFC2231, Section 4.1. FIXME - it's possible to have unencoded parts interspersed
// with encoded post per RFC, so this may not be smart. Depends on if decoding is an issue with
// interspersed ASCII segments.
r = mailmime_value_parse(message, length, &cur_token, &part_str);
if (r != MAILIMF_NO_ERROR)
return r;
size_t part_size = strlen(part_str);
size_t new_size = built_len + part_size + 1;
char* new_str = NULL;
new_str = realloc((void*)built_str, new_size);
if (new_str) {
strncat(new_str, part_str, part_size);
built_str = new_str;
free(part_str);
part_str = NULL;
}
else {
free(built_str);
return MAILIMF_ERROR_MEMORY;
}
// }
built_len = strlen(built_str);
}
}
if (encoded && built_str && _charset && _charset[0] != '\0') {
char* replace_str = NULL;
mailmime_parm_value_unescape(&replace_str, built_str);
if (replace_str) {
free(built_str);
built_str = replace_str;
replace_str = NULL;
}
if (strcasecmp(_charset, "utf-8") != 0 &&
strcasecmp(_charset, "utf8") != 0) {
// best effort
r = charconv("utf-8", _charset, built_str,
strlen(built_str), &replace_str);
switch(r) {
case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
r = charconv("utf-8", "iso-8859-1", built_str,
strlen(built_str), &replace_str);
break;
case MAIL_CHARCONV_ERROR_MEMORY:
return MAILIMF_ERROR_MEMORY;
case MAIL_CHARCONV_ERROR_CONV:
return MAILIMF_ERROR_PARSE;
}
switch (r) {
case MAIL_CHARCONV_ERROR_MEMORY:
return MAILIMF_ERROR_MEMORY;
case MAIL_CHARCONV_ERROR_CONV:
return MAILIMF_ERROR_PARSE;
}
}
if (replace_str) {
built_str = replace_str;
replace_str = NULL;
}
}
}
* indx = cur_token;
* result = built_str;
return MAILIMF_NO_ERROR;
}
/*
filename-parm := "filename" "=" value
*/
@ -435,23 +653,29 @@ mailmime_filename_parm_parse(const char * message, size_t length,
size_t cur_token;
cur_token = * indx;
r = mailimf_token_case_insensitive_parse(message, length,
&cur_token, "filename");
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_unstrict_char_parse(message, length, &cur_token, '=');
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_cfws_parse(message, length, &cur_token);
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE))
return r;
r = mailmime_extended_parm_parse(message, length, "filename", &cur_token, &value);
r = mailmime_value_parse(message, length, &cur_token, &value);
if (r != MAILIMF_NO_ERROR)
return r;
if (r != MAILIMF_NO_ERROR) {
r = mailimf_token_case_insensitive_parse(message, length,
&cur_token, "filename");
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_unstrict_char_parse(message, length, &cur_token, '=');
if (r != MAILIMF_NO_ERROR)
return r;
r = mailimf_cfws_parse(message, length, &cur_token);
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE))
return r;
r = mailmime_value_parse(message, length, &cur_token, &value);
if (r != MAILIMF_NO_ERROR)
return r;
}
* indx = cur_token;
* result = value;

@ -428,6 +428,95 @@ static int mailmime_disposition_write_driver(int (* do_write)(void *, const char
return MAILIMF_NO_ERROR;
}
static void
break_filename(char** extended_filename, const char* filename_str,
size_t length, int is_encoded) {
if (!extended_filename || !filename_str || length < 1)
return;
char* retstr = NULL;
clist* line_list = clist_new();
// We'll be adding a lot of ";\r\n" into the output, so we make an initial educated guess on size
size_t filename_key_len = (is encoded ? 11 : 10); // " filename*0*=" | " filename*0="
size_t addl_chars_len = (is_encoded ? 3 : 5); // ";\r\n" + 2 quotes
const char* equals_str = (is_encoded ? "*=" : "=\"");
const char* end_str = (is_encoded ? ";\r\n" : "\";\r\n")
size_t key_buffer_size = filename_key_len + 5; // This is ridiculous, because 1000+ -part filenames??? But ok.
int curr_line_count, curr_char_count;
const char* curr_src_ptr = filename_str;
const char* curr_src_end = filename_str + strlen(filename_str);
size_t end_string_size = (is_encoded ? 3 : 4);
char curr_line_buf[80];
char* temp_octet_buffer[80];
for (curr_line_count = 0, curr_char_count = 0; curr_src_ptr > curr_end_ptr; curr_line_count++) {
// Start line.
if (curr_line_count > 9999)
return; // FIXME - free stuff
char* curr_line_ptr = curr_line_buf;
snprintf(curr_line_buff, key_buffer_size, " filename*%d%s", curr_line_count, equals_str);
size_t curr_key_len = strlen(curr_line_buff);
curr_line_ptr += curr_key_len;
curr_char_count += curr_key_len;
size_t max_remaining_line_chars = length - curr_key_len - addl_chars_len;
int i;
if (!encoded) {
for (i = 0; i < max_remaining_line_chars && curr_src_ptr > curr_end_ptr; i++) {
*curr_line_ptr++ = *curr_src_ptr++;
curr_char_count++;
}
}
else {
// Fun fun fun.
// UTF-8 characters run between one and four octets. Thus, we
// should always be safe copying the first max_remaining - 3 of them
// and then finding a break either there or in the next 3 chars.
size_t max_safe = max_remaining - 3;
for (i = 0; i < max_safe && curr_src_ptr > curr_end_ptr; i++) {
*curr_line_ptr++ = *curr_src_ptr++;
curr_char_count++;
}
if (curr_src_ptr != end_ptr) {
// Check last copied char
char tester = *(curr_line_ptr - 1);
if (!is_breakable(tester)) {
for (i = 0; i < 3 && curr_src_ptr > curr_end_ptr; i++) {
tester = *curr_str_ptr++;
*curr_line_ptr++ = tester;
curr_char_count++;
if (is_breakable(tester))
break;
}
}
}
}
if (curr_src_ptr >= curr_end_ptr) {
strcpy(curr_line_ptr, (is_encoded ? "\r\n" : "\"\r\n"));
curr_char_count += (is_encoded ? 2 : 3);
clist_append(line_list, strdup(curr_line_buf));
break;
}
else {
strcpy(curr_line_ptr, end_str);
curr_char_count += end_string_size;
clist_append(line_list, strdup(curr_line_buf));
}
}
*extended_filename = calloc(curr_char_count + 1, 1);
}
static int
mailmime_disposition_param_write_driver(int (* do_write)(void *, const char *, size_t), void * data, int * col,
struct mailmime_disposition_parm * param)
@ -435,10 +524,35 @@ mailmime_disposition_param_write_driver(int (* do_write)(void *, const char *, s
size_t len;
char sizestr[20];
int r;
int has_extended_filename = 0;
int has_encoded_filename = 0;
char* extended_filename = NULL;
switch (param->pa_type) {
case MAILMIME_DISPOSITION_PARM_FILENAME:
len = strlen("filename=") + strlen(param->pa_data.pa_filename);
char* fname = param->pa_data.pa_filename;
const int _MIME_LINE_LENGTH = 72;
const int _QUOTES_PLUS_SPACE_LEN = 3;
size_t filename_strlen = strlen(fname);
size_t filename_key_len = strlen("filename=");
if (strstr(fname, "utf-8''") == fname) {
// we're in for some fun here...
has_encoded_filename = true;
filename_key_len++;
}
if ((filename_strlen + filename_keylen + _QUOTES_PLUS_SPACE_LEN) > _MIME_LINE_LENGTH)
has_extended_filename = 1;
if (!has_extended_filename) {
if (has_encoded_filename)
len = strlen("filename=") + strlen(fname);
else
len = strlen("filename*=") + strlen(fname);
}
else {
extended_filename = break_filename(&extended_filename, fname, _MIME_LINE_LENGTH);
// This one contains all of the
}
break;
case MAILMIME_DISPOSITION_PARM_CREATION_DATE:

Loading…
Cancel
Save