diff --git a/src/low-level/mime/mailmime.c b/src/low-level/mime/mailmime.c index 99dcefd..e4700f8 100644 --- a/src/low-level/mime/mailmime.c +++ b/src/low-level/mime/mailmime.c @@ -189,6 +189,141 @@ mailmime_composite_type_parse(const char * message, size_t length, return res; } +static void +hex_to_byte(char* retval_byte, const char* hex_bytes) { + *retval_byte = 0; + char curr_char = hex_bytes[0]; + + if (isdigit(curr_char)) + *retval_byte |= curr_char - '0'; + else { + curr_char = tolower(curr_char); + if (curr_char >= 'a' && curr_char <= 'f') { + *retval_byte |= (curr_char - 'a') + 10; + } + else { + *retval_byte = 0; + return; + } + } + *retval_byte <<= 4; + + curr_char = hex_bytes[1]; + + if (isdigit(curr_char)) + *retval_byte |= curr_char - '0'; + else { + curr_char = tolower(curr_char); + if (curr_char >= 'a' && curr_char <= 'f') { + *retval_byte |= (curr_char - 'a') + 10; + } + else { + *retval_byte = 0; + return; + } + } +} + +static void +byte_to_hex(char* upper_hex_value, char* lower_hex_value, char byte) { + if (!upper_hex_value || !lower_hex_value) { + *upper_hex_value = F; + *lower_hex_value = F; + return; + } + char upper = 0; + char lower = 0; + char lower_byte = byte & 0xF; + char upper_byte = byte >> 4; + *lower_hex_value = ((lower_byte < 10) ? ('0' + lower_byte) : 'A' + (lower_byte - 10)); + *upper_hex_value = ((upper_byte < 10) ? ('0' + upper_byte) : 'A' + (upper_byte - 10)); +} + +// Required by RFC2231 - src is always a utf-8 string in our case. +LIBETPAN_EXPORT +void mailmime_parm_value_escape(char** dst, const char* src) { + if (!src || !dst) + return; + + *dst = NULL; + + int number_of_octets = strlen(src); + if (number_of_octets < 1) + return; + + const char* ESCAPED_ENCODING_PREFIX = "utf-8''"; + const int ESCAPED_ENCODING_PREFIX_LENGTH = 7; + size_t retval_len = ESCAPED_ENCODING_PREFIX_LENGTH + (number_of_octets * 3); + + char* unbroken_string = calloc(retval_len + 1); // 8 = utf-8'' + \0 + strncpy(unbroken_string, ESCAPED_ENCODING_PREFIX, retval_len); + + char* srcend = src + number_of_octets; + char* curr_src_ptr = src; + char* curr_dst_ptr = unbroken_string + ESCAPED_ENCODING_PREFIX_LENGTH; + + while (curr_src_ptr < srcend) { + char upper = 0; + char lower = 0; + byte_to_hex(&upper, &lower, *curr_src_ptr); + // detect FF? Is FF even possible? Leave it for now. + *curr_dst_ptr++ = '%'; + *curr_dst_ptr++ = upper; + *curr_dst_ptr++ = lower; + curr_src_ptr++; + } + *dst = unbroken_string; // splitting is the caller's responsibility +} + +// Required by RFC2231 +LIBETPAN_EXPORT +void mailmime_parm_value_unescape(char** dst, const char* src) { + *dst = NULL; + int percent_count = 0; + size_t srclen = strlen(src); + const char* srcpointer = src; + const char* end = src + srclen; + while (srcpointer && srcpointer < end) { + srcpointer = (strstr(srcpointer, "%")); + if (srcpointer) { + percent_count++; + srcpointer++; + } + } + if (percent_count) { + size_t new_len = srclen + percent_count; // - 1 byte for %, + 2 bytes for 2nd hex digit + char* retstr = (char*)calloc(new_len + 1, 1); + char* dstpointer = retstr; + srcpointer = src; + while (*srcpointer && srcpointer < end) { + if (*srcpointer != '%') { + *dstpointer = *srcpointer; + dstpointer++; + srcpointer++; + } + else { + srcpointer++; + if (!(*srcpointer) || (srcpointer + 1) >= end) { + // Badness! Stop! + free(retstr); + return; + } + hex_to_byte(dstpointer, srcpointer); + if (*dstpointer == 0) { + free(retstr); + return; + } + dstpointer++; + srcpointer += 2; + } + } + *dst = retstr; + } +} + + + + /* x content := "Content-Type" ":" type "/" subtype *(";" parameter) @@ -1291,6 +1426,69 @@ static int mailmime_type_parse(const char * message, size_t length, return res; } +/* +x extended-initial-value := [charset] "'" [language] "'" +x extended-other-values +*/ + +LIBETPAN_EXPORT +int mailmime_extended_initial_value_parse(const char * message, size_t length, + size_t * indx, char ** result, char** charset, char** language) +{ + int r; + char* value = NULL; + size_t value_length = 0; + size_t cur_token = * indx; + + r = mailimf_atom_parse(message, length, &cur_token, &value); + + if (r != MAILIMF_NO_ERROR) + return r; + + if (value) + value_length = strlen(value); + + // ok, let's see what happens here... + char* end_charset = strstr(value, "'"); + if (end_charset == NULL || (value + value_length <= end_charset + 1)) { + free(value); + return MAILIMF_ERROR_PARSE; + } + char* end_lang = strstr(end_charset + 1, "'"); + if (end_lang == NULL || (value + value_length < end_lang)) { // could be empty after + free(value); + return MAILIMF_ERROR_PARSE; + } + + size_t charset_len = end_charset - value; + size_t lang_len = end_lang - (end_charset + 1); + size_t retval_len = strlen(value) - (charset_len + lang_len + 2); + + char* _charset = calloc(charset_len + 1, 1); + char* _lang = calloc(lang_len + 1, 1); + char* _value = calloc(retval_len + 1, 1); + + if (charset_len > 0) { + strncpy(_charset, value, charset_len); + } + if (lang_len > 0) { + strncpy(_lang, end_charset + 1, lang_len); + } + if (retval_len > 0) { + strncpy(_value, end_lang + 1, retval_len); + } + + free(value); + + * result = _value; + * charset = _charset; + * language = _lang; + * indx = cur_token; + + return MAILIMF_NO_ERROR; +} + + /* x value := token / quoted-string */ diff --git a/src/low-level/mime/mailmime.h b/src/low-level/mime/mailmime.h index b86f0a7..236fa55 100644 --- a/src/low-level/mime/mailmime.h +++ b/src/low-level/mime/mailmime.h @@ -111,6 +111,10 @@ int mailmime_language_parse(const char * message, size_t length, size_t * indx, struct mailmime_language ** result); +LIBETPAN_EXPORT +int mailmime_extended_initial_value_parse(const char * message, size_t length, + size_t * indx, char ** result, char** charset, char** language); + #ifdef __cplusplus } #endif diff --git a/src/low-level/mime/mailmime_disposition.c b/src/low-level/mime/mailmime_disposition.c index 2ddc60a..3d64a04 100644 --- a/src/low-level/mime/mailmime_disposition.c +++ b/src/low-level/mime/mailmime_disposition.c @@ -39,6 +39,7 @@ #include "mailmime_disposition.h" #include "mailmime.h" +#include "charconv.h" #include #include @@ -422,6 +423,223 @@ mailmime_disposition_parm_parse(const char * message, size_t length, return res; } +/* +// filename-parm := "filename" "=" value +*/ + +static int +mailmime_extended_parm_parse(const char * message, size_t length, + char * key, size_t * indx, char ** result) +{ + int r; + size_t cur_token; + char* built_str = NULL; + size_t built_len = 0; + + cur_token = * indx; + + r = mailimf_token_case_insensitive_parse(message, length, + &cur_token, key); + if (r != MAILIMF_NO_ERROR) + return r; + + // Ok, we know it's of this type. + // So let's see if it's encoded or extended or both + + int encoded = 0; + int extended = 0; + + // Find out if message is extended, encoded, or both + r = mailimf_char_parse(message, length, &cur_token, '*'); + + if (r == MAILIMF_NO_ERROR) { + r = mailimf_char_parse(message, length, &cur_token, '0'); + + if (r == MAILIMF_NO_ERROR) { + extended = 1; + r = mailimf_char_parse(message, length, &cur_token, '*'); + if (r == MAILIMF_NO_ERROR) + encoded = 1; + else if (r != MAILIMF_ERROR_PARSE) + return r; + } + else if (r != MAILIMF_ERROR_PARSE) + return r; + } + else //if (r != MAILIMF_ERROR_PARSE) + return r; + + r = mailimf_unstrict_char_parse(message, length, &cur_token, '='); + if (r != MAILIMF_NO_ERROR) + return r; + + r = mailimf_cfws_parse(message, length, &cur_token); + if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) + return r; + + // Ok, let's go. + if (encoded || extended) { + char* _charset = NULL; + char* _lang = NULL; + + + // Get the first of either + if (encoded) { + r = mailmime_extended_initial_value_parse(message, length, &cur_token, &built_str, &_charset, + &_lang); + if (r != MAILIMF_NO_ERROR) + return r; + } + else if (extended) { + r = mailmime_value_parse(message, length, &cur_token, &built_str); + if (r != MAILIMF_NO_ERROR) + return r; + } + // Ok, we have an initial string and know it's extended, so let's roll. + if (extended && built_str) { + built_len = strlen(built_str); + + while (1) { + + r = mailimf_unstrict_char_parse(message, length, &cur_token, ';'); + if (r != MAILIMF_NO_ERROR && r != MAILIMF_ERROR_PARSE) + return r; + + r = mailimf_cfws_parse(message, length, &cur_token); + if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) + return r; + + // FIXME: this is where we have to check and see what really happens... + r = mailimf_token_case_insensitive_parse(message, length, + &cur_token, key); + if (r == MAILIMF_ERROR_PARSE) + break; + + if (r != MAILIMF_NO_ERROR) + return r; + + // Ok, we know it's of this type. + // So let's see if it's encoded or extended or both + + // int part_encoded = 0; + // int part_extended = 0; + + // Find out if message part is extended, encoded, or both + r = mailimf_char_parse(message, length, &cur_token, '*'); + + if (r == MAILIMF_NO_ERROR) { + uint32_t part_num = 0; + r = mailimf_number_parse(message, length, &cur_token, &part_num); + + if (r == MAILIMF_NO_ERROR) { +// part_extended = 1; + r = mailimf_char_parse(message, length, &cur_token, '*'); + // See RFC2231, Section 4.1. FIXME - it's possible to have unencoded parts interspersed + // with encoded post per RFC, so this may not be smart. Depends on if decoding is an issue with + // interspersed ASCII segments. + // However, at this point, we know that the first part of the parameter either contained encoding information, + // or it shouldn't be encoded. Also, it seems very doubtful most clients would go to the trouble of mixing encoded + // and non-encoded information when splitting the string. + // The fix right now is to ignore the encoding flag at this point, as we will either decode the whole string, + // or not at all. + } + else if (r != MAILIMF_ERROR_PARSE) + return r; + } + else if (r != MAILIMF_ERROR_PARSE) + return r; + + r = mailimf_unstrict_char_parse(message, length, &cur_token, '='); + if (r != MAILIMF_NO_ERROR) + return r; + + r = mailimf_cfws_parse(message, length, &cur_token); + if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) + return r; + + // Ok, let's go. +// if (part_encoded || part_extended) { + + char* part_str = NULL; + + // See RFC2231, Section 4.1. FIXME - it's possible to have unencoded parts interspersed + // with encoded post per RFC, so this may not be smart. Depends on if decoding is an issue with + // interspersed ASCII segments. + r = mailmime_value_parse(message, length, &cur_token, &part_str); + if (r != MAILIMF_NO_ERROR) + return r; + + size_t part_size = strlen(part_str); + size_t new_size = built_len + part_size + 1; + + char* new_str = NULL; + new_str = realloc((void*)built_str, new_size); + if (new_str) { + strncat(new_str, part_str, part_size); + built_str = new_str; + free(part_str); + part_str = NULL; + } + else { + free(built_str); + return MAILIMF_ERROR_MEMORY; + } + +// } + built_len = strlen(built_str); + } + } + + if (encoded && built_str && _charset && _charset[0] != '\0') { + char* replace_str = NULL; + mailmime_parm_value_unescape(&replace_str, built_str); + + if (replace_str) { + free(built_str); + built_str = replace_str; + replace_str = NULL; + } + + if (strcasecmp(_charset, "utf-8") != 0 && + strcasecmp(_charset, "utf8") != 0) { + + // best effort + r = charconv("utf-8", _charset, built_str, + strlen(built_str), &replace_str); + + switch(r) { + case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET: + r = charconv("utf-8", "iso-8859-1", built_str, + strlen(built_str), &replace_str); + break; + case MAIL_CHARCONV_ERROR_MEMORY: + return MAILIMF_ERROR_MEMORY; + case MAIL_CHARCONV_ERROR_CONV: + return MAILIMF_ERROR_PARSE; + } + switch (r) { + case MAIL_CHARCONV_ERROR_MEMORY: + return MAILIMF_ERROR_MEMORY; + case MAIL_CHARCONV_ERROR_CONV: + return MAILIMF_ERROR_PARSE; + } + } + + if (replace_str) { + built_str = replace_str; + replace_str = NULL; + } + } + } + + * indx = cur_token; + * result = built_str; + + return MAILIMF_NO_ERROR; +} + + + /* filename-parm := "filename" "=" value */ @@ -435,23 +653,29 @@ mailmime_filename_parm_parse(const char * message, size_t length, size_t cur_token; cur_token = * indx; - - r = mailimf_token_case_insensitive_parse(message, length, - &cur_token, "filename"); - if (r != MAILIMF_NO_ERROR) - return r; - - r = mailimf_unstrict_char_parse(message, length, &cur_token, '='); - if (r != MAILIMF_NO_ERROR) - return r; - r = mailimf_cfws_parse(message, length, &cur_token); - if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) - return r; + r = mailmime_extended_parm_parse(message, length, "filename", &cur_token, &value); - r = mailmime_value_parse(message, length, &cur_token, &value); - if (r != MAILIMF_NO_ERROR) - return r; + if (r != MAILIMF_NO_ERROR) { + + r = mailimf_token_case_insensitive_parse(message, length, + &cur_token, "filename"); + if (r != MAILIMF_NO_ERROR) + return r; + + r = mailimf_unstrict_char_parse(message, length, &cur_token, '='); + if (r != MAILIMF_NO_ERROR) + return r; + + r = mailimf_cfws_parse(message, length, &cur_token); + if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) + return r; + + r = mailmime_value_parse(message, length, &cur_token, &value); + if (r != MAILIMF_NO_ERROR) + return r; + + } * indx = cur_token; * result = value; diff --git a/src/low-level/mime/mailmime_write_generic.c b/src/low-level/mime/mailmime_write_generic.c index 68bdf23..61d9973 100644 --- a/src/low-level/mime/mailmime_write_generic.c +++ b/src/low-level/mime/mailmime_write_generic.c @@ -428,6 +428,95 @@ static int mailmime_disposition_write_driver(int (* do_write)(void *, const char return MAILIMF_NO_ERROR; } +static void +break_filename(char** extended_filename, const char* filename_str, + size_t length, int is_encoded) { + if (!extended_filename || !filename_str || length < 1) + return; + + char* retstr = NULL; + + clist* line_list = clist_new(); + + // We'll be adding a lot of ";\r\n" into the output, so we make an initial educated guess on size + size_t filename_key_len = (is encoded ? 11 : 10); // " filename*0*=" | " filename*0=" + size_t addl_chars_len = (is_encoded ? 3 : 5); // ";\r\n" + 2 quotes + const char* equals_str = (is_encoded ? "*=" : "=\""); + const char* end_str = (is_encoded ? ";\r\n" : "\";\r\n") + size_t key_buffer_size = filename_key_len + 5; // This is ridiculous, because 1000+ -part filenames??? But ok. + + int curr_line_count, curr_char_count; + + const char* curr_src_ptr = filename_str; + const char* curr_src_end = filename_str + strlen(filename_str); + + size_t end_string_size = (is_encoded ? 3 : 4); + char curr_line_buf[80]; + char* temp_octet_buffer[80]; + + for (curr_line_count = 0, curr_char_count = 0; curr_src_ptr > curr_end_ptr; curr_line_count++) { + // Start line. + if (curr_line_count > 9999) + return; // FIXME - free stuff + + char* curr_line_ptr = curr_line_buf; + snprintf(curr_line_buff, key_buffer_size, " filename*%d%s", curr_line_count, equals_str); + size_t curr_key_len = strlen(curr_line_buff); + + curr_line_ptr += curr_key_len; + curr_char_count += curr_key_len; + + size_t max_remaining_line_chars = length - curr_key_len - addl_chars_len; + int i; + + if (!encoded) { + for (i = 0; i < max_remaining_line_chars && curr_src_ptr > curr_end_ptr; i++) { + *curr_line_ptr++ = *curr_src_ptr++; + curr_char_count++; + } + } + else { + // Fun fun fun. + // UTF-8 characters run between one and four octets. Thus, we + // should always be safe copying the first max_remaining - 3 of them + // and then finding a break either there or in the next 3 chars. + size_t max_safe = max_remaining - 3; + for (i = 0; i < max_safe && curr_src_ptr > curr_end_ptr; i++) { + *curr_line_ptr++ = *curr_src_ptr++; + curr_char_count++; + } + if (curr_src_ptr != end_ptr) { + // Check last copied char + char tester = *(curr_line_ptr - 1); + if (!is_breakable(tester)) { + for (i = 0; i < 3 && curr_src_ptr > curr_end_ptr; i++) { + tester = *curr_str_ptr++; + *curr_line_ptr++ = tester; + curr_char_count++; + if (is_breakable(tester)) + break; + } + } + } + } + if (curr_src_ptr >= curr_end_ptr) { + strcpy(curr_line_ptr, (is_encoded ? "\r\n" : "\"\r\n")); + curr_char_count += (is_encoded ? 2 : 3); + clist_append(line_list, strdup(curr_line_buf)); + break; + } + else { + strcpy(curr_line_ptr, end_str); + curr_char_count += end_string_size; + clist_append(line_list, strdup(curr_line_buf)); + } + } + + *extended_filename = calloc(curr_char_count + 1, 1); + + +} + static int mailmime_disposition_param_write_driver(int (* do_write)(void *, const char *, size_t), void * data, int * col, struct mailmime_disposition_parm * param) @@ -435,10 +524,35 @@ mailmime_disposition_param_write_driver(int (* do_write)(void *, const char *, s size_t len; char sizestr[20]; int r; - + int has_extended_filename = 0; + int has_encoded_filename = 0; + char* extended_filename = NULL; + switch (param->pa_type) { case MAILMIME_DISPOSITION_PARM_FILENAME: - len = strlen("filename=") + strlen(param->pa_data.pa_filename); + char* fname = param->pa_data.pa_filename; + const int _MIME_LINE_LENGTH = 72; + const int _QUOTES_PLUS_SPACE_LEN = 3; + size_t filename_strlen = strlen(fname); + size_t filename_key_len = strlen("filename="); + if (strstr(fname, "utf-8''") == fname) { + // we're in for some fun here... + has_encoded_filename = true; + filename_key_len++; + } + if ((filename_strlen + filename_keylen + _QUOTES_PLUS_SPACE_LEN) > _MIME_LINE_LENGTH) + has_extended_filename = 1; + + if (!has_extended_filename) { + if (has_encoded_filename) + len = strlen("filename=") + strlen(fname); + else + len = strlen("filename*=") + strlen(fname); + } + else { + extended_filename = break_filename(&extended_filename, fname, _MIME_LINE_LENGTH); + // This one contains all of the + } break; case MAILMIME_DISPOSITION_PARM_CREATION_DATE: