/*
|
|
* libEtPan! -- a mail stuff library
|
|
*
|
|
* Copyright (C) 2001, 2005 - DINH Viet Hoa
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the libEtPan! project nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* $Id: mailmime_decode.c,v 1.37 2010/11/16 20:52:28 hoa Exp $
|
|
*/
|
|
|
|
/*
|
|
RFC 2047 : MIME (Multipurpose Internet Mail Extensions) Part Three:
|
|
Message Header Extensions for Non-ASCII Text
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
# include <config.h>
|
|
#endif
|
|
|
|
#include "mailmime_decode.h"
|
|
|
|
#include <ctype.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
# include <unistd.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_MMAN_H
|
|
# include <sys/mman.h>
|
|
#endif
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "mailmime_content.h"
|
|
|
|
#include "charconv.h"
|
|
#include "mmapstring.h"
|
|
#include "mailimf.h"
|
|
|
|
#ifndef TRUE
|
|
#define TRUE 1
|
|
#endif
|
|
|
|
#ifndef FALSE
|
|
#define FALSE 0
|
|
#endif
|
|
|
|
static int mailmime_charset_parse(const char * message, size_t length,
|
|
size_t * indx, char ** charset);
|
|
|
|
static int detect_CRCR(const char * message, size_t length,
|
|
size_t * indx);
|
|
|
|
enum {
|
|
MAILMIME_ENCODING_B,
|
|
MAILMIME_ENCODING_Q
|
|
};
|
|
|
|
static int mailmime_encoding_parse(const char * message, size_t length,
|
|
size_t * indx, int * result);
|
|
|
|
static int mailmime_etoken_parse(const char * message, size_t length,
|
|
size_t * indx, char ** result);
|
|
|
|
static int
|
|
mailmime_non_encoded_word_parse(const char * message, size_t length,
|
|
size_t * indx,
|
|
char ** result, int * p_has_fwd);
|
|
|
|
|
|
|
|
enum {
|
|
TYPE_ERROR,
|
|
TYPE_WORD,
|
|
TYPE_ENCODED_WORD
|
|
};
|
|
|
|
LIBETPAN_EXPORT
|
|
int mailmime_encoded_phrase_parse(const char * default_fromcode,
|
|
const char * message, size_t length,
|
|
size_t * indx, const char * tocode,
|
|
char ** result)
|
|
{
|
|
MMAPString * gphrase;
|
|
struct mailmime_encoded_word * word;
|
|
int first;
|
|
size_t cur_token;
|
|
int r;
|
|
int res;
|
|
char * str;
|
|
char * wordutf8;
|
|
int type;
|
|
int missing_closing_quote;
|
|
|
|
cur_token = * indx;
|
|
|
|
gphrase = mmap_string_new("");
|
|
if (gphrase == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto err;
|
|
}
|
|
|
|
first = TRUE;
|
|
|
|
type = TYPE_ERROR; /* XXX - removes a gcc warning */
|
|
|
|
// Start parsing
|
|
while (1) {
|
|
int has_fwd;
|
|
|
|
word = NULL;
|
|
|
|
// Try to parse this part of the message as mime-encoded
|
|
r = mailmime_encoded_word_parse(message, length, &cur_token, &word, &has_fwd, &missing_closing_quote);
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
// Either it was mime-encoded or there was no error.
|
|
if ((!first) && has_fwd) {
|
|
if (type != TYPE_ENCODED_WORD) {
|
|
if (mmap_string_append_c(gphrase, ' ') == NULL) {
|
|
mailmime_encoded_word_free(word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
}
|
|
}
|
|
type = TYPE_ENCODED_WORD;
|
|
wordutf8 = NULL;
|
|
r = charconv(tocode, word->wd_charset, word->wd_text,
|
|
strlen(word->wd_text), &wordutf8);
|
|
switch (r) {
|
|
case MAIL_CHARCONV_ERROR_MEMORY:
|
|
mailmime_encoded_word_free(word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
|
|
case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
|
|
r = charconv(tocode, "iso-8859-1", word->wd_text,
|
|
strlen(word->wd_text), &wordutf8);
|
|
break;
|
|
case MAIL_CHARCONV_ERROR_CONV:
|
|
mailmime_encoded_word_free(word);
|
|
res = MAILIMF_ERROR_PARSE;
|
|
goto free;
|
|
}
|
|
|
|
switch (r) {
|
|
case MAIL_CHARCONV_ERROR_MEMORY:
|
|
mailmime_encoded_word_free(word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
case MAIL_CHARCONV_ERROR_CONV:
|
|
mailmime_encoded_word_free(word);
|
|
res = MAILIMF_ERROR_PARSE;
|
|
goto free;
|
|
}
|
|
|
|
if (wordutf8 != NULL) {
|
|
// append the word (converted to the proper charset)
|
|
if (mmap_string_append(gphrase, wordutf8) == NULL) {
|
|
mailmime_encoded_word_free(word);
|
|
free(wordutf8);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
free(wordutf8);
|
|
}
|
|
mailmime_encoded_word_free(word);
|
|
first = FALSE;
|
|
}
|
|
else if (r == MAILIMF_ERROR_PARSE) {
|
|
// Wasn't mime-encoded
|
|
/* do nothing */
|
|
}
|
|
else {
|
|
// Some error condition we didn't expect
|
|
res = r;
|
|
goto free;
|
|
}
|
|
|
|
if (r == MAILIMF_ERROR_PARSE) {
|
|
// Not mime-encoded, so parse as if it isn't
|
|
char * raw_word;
|
|
|
|
raw_word = NULL;
|
|
|
|
// Check for special case of word=^CRCR.
|
|
// We need to be sure we advanced the cur_token
|
|
// past it. Usually with an empty string.
|
|
if (first) {
|
|
r = detect_CRCR(message, length, &cur_token);
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
// We've advanced the token and pretend
|
|
// the first CR isn't there. Let the algorithm
|
|
// take care of the legit CRLF.
|
|
if (mmap_string_append_c(gphrase, ' ') == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
first = FALSE;
|
|
break;
|
|
}
|
|
}
|
|
r = mailmime_non_encoded_word_parse(message, length,
|
|
&cur_token, &raw_word, &has_fwd);
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
if ((!first) && has_fwd) {
|
|
if (mmap_string_append_c(gphrase, ' ') == NULL) {
|
|
free(raw_word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
}
|
|
type = TYPE_WORD;
|
|
|
|
wordutf8 = NULL;
|
|
r = charconv(tocode, default_fromcode, raw_word,
|
|
strlen(raw_word), &wordutf8);
|
|
|
|
switch (r) {
|
|
case MAIL_CHARCONV_ERROR_MEMORY:
|
|
free(raw_word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
|
|
case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
|
|
case MAIL_CHARCONV_ERROR_CONV:
|
|
free(raw_word);
|
|
res = MAILIMF_ERROR_PARSE;
|
|
goto free;
|
|
}
|
|
|
|
if (mmap_string_append(gphrase, wordutf8) == NULL) {
|
|
free(wordutf8);
|
|
free(raw_word);
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
|
|
free(wordutf8);
|
|
free(raw_word);
|
|
first = FALSE;
|
|
}
|
|
else if (r == MAILIMF_ERROR_PARSE) {
|
|
r = mailimf_fws_parse(message, length, &cur_token);
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
break;
|
|
}
|
|
|
|
if (mmap_string_append_c(gphrase, ' ') == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
first = FALSE;
|
|
break;
|
|
}
|
|
else {
|
|
res = r;
|
|
goto free;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (first) {
|
|
if (cur_token != length) {
|
|
res = MAILIMF_ERROR_PARSE;
|
|
goto free;
|
|
}
|
|
}
|
|
|
|
str = strdup(gphrase->str);
|
|
if (str == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free;
|
|
}
|
|
mmap_string_free(gphrase);
|
|
|
|
* result = str;
|
|
* indx = cur_token;
|
|
|
|
return MAILIMF_NO_ERROR;
|
|
|
|
free:
|
|
mmap_string_free(gphrase);
|
|
err:
|
|
return res;
|
|
}
|
|
|
|
static int
|
|
mailmime_non_encoded_word_parse(const char * message, size_t length,
|
|
size_t * indx,
|
|
char ** result, int * p_has_fwd)
|
|
{
|
|
int end;
|
|
size_t cur_token;
|
|
int res;
|
|
char * text;
|
|
int r;
|
|
size_t begin;
|
|
int state;
|
|
int has_fwd;
|
|
|
|
cur_token = * indx;
|
|
|
|
has_fwd = 0;
|
|
|
|
// Check to see if it starts with folding whitespace
|
|
r = mailimf_fws_parse(message, length, &cur_token);
|
|
if (r == MAILIMF_NO_ERROR) { // it does
|
|
has_fwd = 1;
|
|
}
|
|
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
|
|
// legit error
|
|
res = r;
|
|
goto err;
|
|
}
|
|
|
|
begin = cur_token;
|
|
|
|
// Get the word up to the next =? or whitespace
|
|
state = 0;
|
|
end = FALSE;
|
|
while (1) {
|
|
if (cur_token >= length)
|
|
break;
|
|
|
|
switch (message[cur_token]) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
case '\n':
|
|
state = 0;
|
|
end = TRUE;
|
|
break;
|
|
case '=':
|
|
state = 1;
|
|
break;
|
|
case '?':
|
|
if (state == 1) { // begin of mime-encoding?
|
|
cur_token --;
|
|
end = TRUE;
|
|
}
|
|
default:
|
|
state = 0;
|
|
break;
|
|
}
|
|
|
|
if (end)
|
|
break;
|
|
|
|
cur_token ++;
|
|
}
|
|
|
|
if (cur_token - begin == 0) { // we processed nothing, bail
|
|
res = MAILIMF_ERROR_PARSE;
|
|
goto err;
|
|
}
|
|
|
|
text = malloc(cur_token - begin + 1);
|
|
if (text == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto err;
|
|
}
|
|
|
|
memcpy(text, message + begin, cur_token - begin);
|
|
text[cur_token - begin] = '\0';
|
|
|
|
* indx = cur_token;
|
|
* result = text;
|
|
* p_has_fwd = has_fwd;
|
|
|
|
return MAILIMF_NO_ERROR;
|
|
|
|
err:
|
|
return res;
|
|
}
|
|
|
|
int mailmime_encoded_word_parse(const char * message, size_t length,
|
|
size_t * indx,
|
|
struct mailmime_encoded_word ** result,
|
|
int * p_has_fwd, int * p_missing_closing_quote)
|
|
{
|
|
size_t cur_token;
|
|
char * charset;
|
|
int encoding;
|
|
char * text;
|
|
size_t end_encoding;
|
|
char * decoded;
|
|
size_t decoded_len;
|
|
struct mailmime_encoded_word * ew;
|
|
int r;
|
|
int res;
|
|
int opening_quote;
|
|
int end;
|
|
int has_fwd;
|
|
int missing_closing_quote;
|
|
|
|
cur_token = * indx;
|
|
|
|
missing_closing_quote = 0;
|
|
has_fwd = 0;
|
|
r = mailimf_fws_parse(message, length, &cur_token);
|
|
if (r == MAILIMF_NO_ERROR) { // there was folding whitespace, now consumed
|
|
has_fwd = 1;
|
|
}
|
|
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) { // actual error
|
|
res = r;
|
|
goto err;
|
|
}
|
|
|
|
// check for opening quote, consume if so
|
|
opening_quote = FALSE;
|
|
r = mailimf_char_parse(message, length, &cur_token, '\"');
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
opening_quote = TRUE;
|
|
}
|
|
else if (r == MAILIMF_ERROR_PARSE) {
|
|
/* do nothing */
|
|
}
|
|
else {
|
|
res = r;
|
|
goto err;
|
|
}
|
|
|
|
// Check for MIME encoded-word syntax
|
|
// =?charset?encoding?encoded text?=
|
|
r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "=?");
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto err;
|
|
}
|
|
|
|
// get charset
|
|
r = mailmime_charset_parse(message, length, &cur_token, &charset);
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto err;
|
|
}
|
|
|
|
// charset terminator
|
|
r = mailimf_char_parse(message, length, &cur_token, '?');
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_charset;
|
|
}
|
|
|
|
// get encoding
|
|
r = mailmime_encoding_parse(message, length, &cur_token, &encoding);
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_charset;
|
|
}
|
|
|
|
// encoding terminator
|
|
r = mailimf_char_parse(message, length, &cur_token, '?');
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_charset;
|
|
}
|
|
|
|
// get encoded text
|
|
end = FALSE;
|
|
end_encoding = cur_token;
|
|
while (1) {
|
|
if (end_encoding >= length)
|
|
break;
|
|
|
|
// are we done?
|
|
if (end_encoding + 1 < length) {
|
|
if ((message[end_encoding] == '?') && (message[end_encoding + 1] == '=')) {
|
|
end = TRUE;
|
|
}
|
|
}
|
|
|
|
if (end)
|
|
break;
|
|
|
|
end_encoding ++;
|
|
}
|
|
|
|
// decode text
|
|
decoded_len = 0;
|
|
decoded = NULL;
|
|
switch (encoding) {
|
|
case MAILMIME_ENCODING_B:
|
|
r = mailmime_base64_body_parse(message, end_encoding,
|
|
&cur_token, &decoded,
|
|
&decoded_len);
|
|
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_charset;
|
|
}
|
|
break;
|
|
case MAILMIME_ENCODING_Q:
|
|
r = mailmime_quoted_printable_body_parse(message, end_encoding,
|
|
&cur_token, &decoded,
|
|
&decoded_len, TRUE);
|
|
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_charset;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
text = malloc(decoded_len + 1);
|
|
if (text == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free_charset;
|
|
}
|
|
|
|
// Copy decoded text
|
|
if (decoded_len > 0)
|
|
memcpy(text, decoded, decoded_len);
|
|
text[decoded_len] = '\0';
|
|
|
|
mailmime_decoded_part_free(decoded);
|
|
|
|
// Detect if we stopped parsing the *encoded* text (before we sent it off to
|
|
// be decoded because we hit the terminator, or because we hit the end
|
|
// of the specified length
|
|
r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "?=");
|
|
#if 0
|
|
if (r != MAILIMF_NO_ERROR) {
|
|
res = r;
|
|
goto free_encoded_text;
|
|
}
|
|
#endif
|
|
|
|
if (opening_quote) {
|
|
r = mailimf_char_parse(message, length, &cur_token, '\"');
|
|
#if 0
|
|
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
|
|
res = r;
|
|
goto free_encoded_text;
|
|
}
|
|
#endif
|
|
if (r == MAILIMF_ERROR_PARSE) {
|
|
missing_closing_quote = 1;
|
|
}
|
|
}
|
|
|
|
/* fix charset */
|
|
if (strcasecmp(charset, "utf8") == 0) {
|
|
free(charset);
|
|
charset = strdup("utf-8");
|
|
}
|
|
ew = mailmime_encoded_word_new(charset, text);
|
|
if (ew == NULL) {
|
|
res = MAILIMF_ERROR_MEMORY;
|
|
goto free_encoded_text;
|
|
}
|
|
|
|
* result = ew;
|
|
* indx = cur_token;
|
|
* p_has_fwd = has_fwd;
|
|
* p_missing_closing_quote = missing_closing_quote;
|
|
|
|
return MAILIMF_NO_ERROR;
|
|
|
|
free_encoded_text:
|
|
mailmime_encoded_text_free(text);
|
|
free_charset:
|
|
mailmime_charset_free(charset);
|
|
err:
|
|
return res;
|
|
}
|
|
|
|
static int mailmime_charset_parse(const char * message, size_t length,
|
|
size_t * indx, char ** charset)
|
|
{
|
|
return mailmime_etoken_parse(message, length, indx, charset);
|
|
}
|
|
|
|
static int mailmime_encoding_parse(const char * message, size_t length,
|
|
size_t * indx, int * result)
|
|
{
|
|
size_t cur_token;
|
|
int encoding;
|
|
|
|
cur_token = * indx;
|
|
|
|
if (cur_token >= length)
|
|
return MAILIMF_ERROR_PARSE;
|
|
|
|
switch ((char) toupper((unsigned char) message[cur_token])) {
|
|
case 'Q':
|
|
encoding = MAILMIME_ENCODING_Q;
|
|
break;
|
|
case 'B':
|
|
encoding = MAILMIME_ENCODING_B;
|
|
break;
|
|
default:
|
|
return MAILIMF_ERROR_INVAL;
|
|
}
|
|
|
|
cur_token ++;
|
|
|
|
* result = encoding;
|
|
* indx = cur_token;
|
|
|
|
return MAILIMF_NO_ERROR;
|
|
}
|
|
|
|
int is_etoken_char(char ch)
|
|
{
|
|
unsigned char uch = ch;
|
|
|
|
if (uch < 31)
|
|
return FALSE;
|
|
|
|
switch (uch) {
|
|
case ' ':
|
|
case '(':
|
|
case ')':
|
|
case '<':
|
|
case '>':
|
|
case '@':
|
|
case ',':
|
|
case ';':
|
|
case ':':
|
|
case '"':
|
|
case '/':
|
|
case '[':
|
|
case ']':
|
|
case '?':
|
|
#if 0
|
|
case '.':
|
|
#endif
|
|
case '=':
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static int mailmime_etoken_parse(const char * message, size_t length,
|
|
size_t * indx, char ** result)
|
|
{
|
|
return mailimf_custom_string_parse(message, length,
|
|
indx, result,
|
|
is_etoken_char);
|
|
}
|
|
|
|
static int detect_CRCR(const char * message, size_t length,
|
|
size_t * indx) {
|
|
size_t cur_token = *indx;
|
|
int r = mailimf_char_parse(message, length, &cur_token, '\r');
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
r = mailimf_char_parse(message, length, &cur_token, '\r');
|
|
if (r == MAILIMF_NO_ERROR) {
|
|
// Yup, there was a CRCR here.
|
|
// Advance token past the first \r
|
|
*indx = (*indx) + 1;
|
|
return r;
|
|
}
|
|
}
|
|
// Leave indx alone and move on.
|
|
return MAILIMF_ERROR_PARSE; // not actual error. Usual behaviour.
|
|
}
|