libetpan - fdik
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

684 lines
16 KiB

/*
* libEtPan! -- a mail stuff library
*
* Copyright (C) 2001, 2005 - DINH Viet Hoa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the libEtPan! project nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: mailmime_decode.c,v 1.37 2010/11/16 20:52:28 hoa Exp $
*/
/*
RFC 2047 : MIME (Multipurpose Internet Mail Extensions) Part Three:
Message Header Extensions for Non-ASCII Text
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "mailmime_decode.h"
#include <ctype.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifdef HAVE_SYS_MMAN_H
# include <sys/mman.h>
#endif
#include <string.h>
#include <stdlib.h>
#include "mailmime_content.h"
#include "charconv.h"
#include "mmapstring.h"
#include "mailimf.h"
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
static int mailmime_charset_parse(const char * message, size_t length,
size_t * indx, char ** charset);
static int detect_CRCR(const char * message, size_t length,
size_t * indx);
enum {
MAILMIME_ENCODING_B,
MAILMIME_ENCODING_Q
};
static int mailmime_encoding_parse(const char * message, size_t length,
size_t * indx, int * result);
static int mailmime_etoken_parse(const char * message, size_t length,
size_t * indx, char ** result);
static int
mailmime_non_encoded_word_parse(const char * message, size_t length,
size_t * indx,
char ** result, int * p_has_fwd);
enum {
TYPE_ERROR,
TYPE_WORD,
TYPE_ENCODED_WORD
};
LIBETPAN_EXPORT
int mailmime_encoded_phrase_parse(const char * default_fromcode,
const char * message, size_t length,
size_t * indx, const char * tocode,
char ** result)
{
MMAPString * gphrase;
struct mailmime_encoded_word * word;
int first;
size_t cur_token;
int r;
int res;
char * str;
char * wordutf8;
int type;
int missing_closing_quote;
cur_token = * indx;
gphrase = mmap_string_new("");
if (gphrase == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto err;
}
first = TRUE;
type = TYPE_ERROR; /* XXX - removes a gcc warning */
// Start parsing
while (1) {
int has_fwd;
word = NULL;
// Try to parse this part of the message as mime-encoded
r = mailmime_encoded_word_parse(message, length, &cur_token, &word, &has_fwd, &missing_closing_quote);
if (r == MAILIMF_NO_ERROR) {
// Either it was mime-encoded or there was no error.
if ((!first) && has_fwd) {
if (type != TYPE_ENCODED_WORD) {
if (mmap_string_append_c(gphrase, ' ') == NULL) {
mailmime_encoded_word_free(word);
res = MAILIMF_ERROR_MEMORY;
goto free;
}
}
}
type = TYPE_ENCODED_WORD;
wordutf8 = NULL;
r = charconv(tocode, word->wd_charset, word->wd_text,
strlen(word->wd_text), &wordutf8);
switch (r) {
case MAIL_CHARCONV_ERROR_MEMORY:
mailmime_encoded_word_free(word);
res = MAILIMF_ERROR_MEMORY;
goto free;
case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
r = charconv(tocode, "iso-8859-1", word->wd_text,
strlen(word->wd_text), &wordutf8);
break;
case MAIL_CHARCONV_ERROR_CONV:
mailmime_encoded_word_free(word);
res = MAILIMF_ERROR_PARSE;
goto free;
}
switch (r) {
case MAIL_CHARCONV_ERROR_MEMORY:
mailmime_encoded_word_free(word);
res = MAILIMF_ERROR_MEMORY;
goto free;
case MAIL_CHARCONV_ERROR_CONV:
mailmime_encoded_word_free(word);
res = MAILIMF_ERROR_PARSE;
goto free;
}
if (wordutf8 != NULL) {
// append the word (converted to the proper charset)
if (mmap_string_append(gphrase, wordutf8) == NULL) {
mailmime_encoded_word_free(word);
free(wordutf8);
res = MAILIMF_ERROR_MEMORY;
goto free;
}
free(wordutf8);
}
mailmime_encoded_word_free(word);
first = FALSE;
}
else if (r == MAILIMF_ERROR_PARSE) {
// Wasn't mime-encoded
/* do nothing */
}
else {
// Some error condition we didn't expect
res = r;
goto free;
}
if (r == MAILIMF_ERROR_PARSE) {
// Not mime-encoded, so parse as if it isn't
char * raw_word;
raw_word = NULL;
// Check for special case of word=^CRCR.
// We need to be sure we advanced the cur_token
// past it. Usually with an empty string.
if (first) {
r = detect_CRCR(message, length, &cur_token);
if (r == MAILIMF_NO_ERROR) {
// We've advanced the token and pretend
// the first CR isn't there. Let the algorithm
// take care of the legit CRLF.
if (mmap_string_append_c(gphrase, ' ') == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto free;
}
first = FALSE;
break;
}
}
r = mailmime_non_encoded_word_parse(message, length,
&cur_token, &raw_word, &has_fwd);
if (r == MAILIMF_NO_ERROR) {
if ((!first) && has_fwd) {
if (mmap_string_append_c(gphrase, ' ') == NULL) {
free(raw_word);
res = MAILIMF_ERROR_MEMORY;
goto free;
}
}
type = TYPE_WORD;
wordutf8 = NULL;
r = charconv(tocode, default_fromcode, raw_word,
strlen(raw_word), &wordutf8);
switch (r) {
case MAIL_CHARCONV_ERROR_MEMORY:
free(raw_word);
res = MAILIMF_ERROR_MEMORY;
goto free;
case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
case MAIL_CHARCONV_ERROR_CONV:
free(raw_word);
res = MAILIMF_ERROR_PARSE;
goto free;
}
if (mmap_string_append(gphrase, wordutf8) == NULL) {
free(wordutf8);
free(raw_word);
res = MAILIMF_ERROR_MEMORY;
goto free;
}
free(wordutf8);
free(raw_word);
first = FALSE;
}
else if (r == MAILIMF_ERROR_PARSE) {
r = mailimf_fws_parse(message, length, &cur_token);
if (r != MAILIMF_NO_ERROR) {
break;
}
if (mmap_string_append_c(gphrase, ' ') == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto free;
}
first = FALSE;
break;
}
else {
res = r;
goto free;
}
}
}
if (first) {
if (cur_token != length) {
res = MAILIMF_ERROR_PARSE;
goto free;
}
}
str = strdup(gphrase->str);
if (str == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto free;
}
mmap_string_free(gphrase);
* result = str;
* indx = cur_token;
return MAILIMF_NO_ERROR;
free:
mmap_string_free(gphrase);
err:
return res;
}
static int
mailmime_non_encoded_word_parse(const char * message, size_t length,
size_t * indx,
char ** result, int * p_has_fwd)
{
int end;
size_t cur_token;
int res;
char * text;
int r;
size_t begin;
int state;
int has_fwd;
cur_token = * indx;
has_fwd = 0;
// Check to see if it starts with folding whitespace
r = mailimf_fws_parse(message, length, &cur_token);
if (r == MAILIMF_NO_ERROR) { // it does
has_fwd = 1;
}
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
// legit error
res = r;
goto err;
}
begin = cur_token;
// Get the word up to the next =? or whitespace
state = 0;
end = FALSE;
while (1) {
if (cur_token >= length)
break;
switch (message[cur_token]) {
case ' ':
case '\t':
case '\r':
case '\n':
state = 0;
end = TRUE;
break;
case '=':
state = 1;
break;
case '?':
if (state == 1) { // begin of mime-encoding?
cur_token --;
end = TRUE;
}
default:
state = 0;
break;
}
if (end)
break;
cur_token ++;
}
if (cur_token - begin == 0) { // we processed nothing, bail
res = MAILIMF_ERROR_PARSE;
goto err;
}
text = malloc(cur_token - begin + 1);
if (text == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto err;
}
memcpy(text, message + begin, cur_token - begin);
text[cur_token - begin] = '\0';
* indx = cur_token;
* result = text;
* p_has_fwd = has_fwd;
return MAILIMF_NO_ERROR;
err:
return res;
}
int mailmime_encoded_word_parse(const char * message, size_t length,
size_t * indx,
struct mailmime_encoded_word ** result,
int * p_has_fwd, int * p_missing_closing_quote)
{
size_t cur_token;
char * charset;
int encoding;
char * text;
size_t end_encoding;
char * decoded;
size_t decoded_len;
struct mailmime_encoded_word * ew;
int r;
int res;
int opening_quote;
int end;
int has_fwd;
int missing_closing_quote;
cur_token = * indx;
missing_closing_quote = 0;
has_fwd = 0;
r = mailimf_fws_parse(message, length, &cur_token);
if (r == MAILIMF_NO_ERROR) { // there was folding whitespace, now consumed
has_fwd = 1;
}
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) { // actual error
res = r;
goto err;
}
// check for opening quote, consume if so
opening_quote = FALSE;
r = mailimf_char_parse(message, length, &cur_token, '\"');
if (r == MAILIMF_NO_ERROR) {
opening_quote = TRUE;
}
else if (r == MAILIMF_ERROR_PARSE) {
/* do nothing */
}
else {
res = r;
goto err;
}
// Check for MIME encoded-word syntax
// =?charset?encoding?encoded text?=
r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "=?");
if (r != MAILIMF_NO_ERROR) {
res = r;
goto err;
}
// get charset
r = mailmime_charset_parse(message, length, &cur_token, &charset);
if (r != MAILIMF_NO_ERROR) {
res = r;
goto err;
}
// charset terminator
r = mailimf_char_parse(message, length, &cur_token, '?');
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_charset;
}
// get encoding
r = mailmime_encoding_parse(message, length, &cur_token, &encoding);
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_charset;
}
// encoding terminator
r = mailimf_char_parse(message, length, &cur_token, '?');
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_charset;
}
// get encoded text
end = FALSE;
end_encoding = cur_token;
while (1) {
if (end_encoding >= length)
break;
// are we done?
if (end_encoding + 1 < length) {
if ((message[end_encoding] == '?') && (message[end_encoding + 1] == '=')) {
end = TRUE;
}
}
if (end)
break;
end_encoding ++;
}
// decode text
decoded_len = 0;
decoded = NULL;
switch (encoding) {
case MAILMIME_ENCODING_B:
r = mailmime_base64_body_parse(message, end_encoding,
&cur_token, &decoded,
&decoded_len);
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_charset;
}
break;
case MAILMIME_ENCODING_Q:
r = mailmime_quoted_printable_body_parse(message, end_encoding,
&cur_token, &decoded,
&decoded_len, TRUE);
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_charset;
}
break;
}
text = malloc(decoded_len + 1);
if (text == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto free_charset;
}
// Copy decoded text
if (decoded_len > 0)
memcpy(text, decoded, decoded_len);
text[decoded_len] = '\0';
mailmime_decoded_part_free(decoded);
// Detect if we stopped parsing the *encoded* text (before we sent it off to
// be decoded because we hit the terminator, or because we hit the end
// of the specified length
r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "?=");
#if 0
if (r != MAILIMF_NO_ERROR) {
res = r;
goto free_encoded_text;
}
#endif
if (opening_quote) {
r = mailimf_char_parse(message, length, &cur_token, '\"');
#if 0
if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
res = r;
goto free_encoded_text;
}
#endif
if (r == MAILIMF_ERROR_PARSE) {
missing_closing_quote = 1;
}
}
/* fix charset */
if (strcasecmp(charset, "utf8") == 0) {
free(charset);
charset = strdup("utf-8");
}
ew = mailmime_encoded_word_new(charset, text);
if (ew == NULL) {
res = MAILIMF_ERROR_MEMORY;
goto free_encoded_text;
}
* result = ew;
* indx = cur_token;
* p_has_fwd = has_fwd;
* p_missing_closing_quote = missing_closing_quote;
return MAILIMF_NO_ERROR;
free_encoded_text:
mailmime_encoded_text_free(text);
free_charset:
mailmime_charset_free(charset);
err:
return res;
}
static int mailmime_charset_parse(const char * message, size_t length,
size_t * indx, char ** charset)
{
return mailmime_etoken_parse(message, length, indx, charset);
}
static int mailmime_encoding_parse(const char * message, size_t length,
size_t * indx, int * result)
{
size_t cur_token;
int encoding;
cur_token = * indx;
if (cur_token >= length)
return MAILIMF_ERROR_PARSE;
switch ((char) toupper((unsigned char) message[cur_token])) {
case 'Q':
encoding = MAILMIME_ENCODING_Q;
break;
case 'B':
encoding = MAILMIME_ENCODING_B;
break;
default:
return MAILIMF_ERROR_INVAL;
}
cur_token ++;
* result = encoding;
* indx = cur_token;
return MAILIMF_NO_ERROR;
}
int is_etoken_char(char ch)
{
unsigned char uch = ch;
if (uch < 31)
return FALSE;
switch (uch) {
case ' ':
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '"':
case '/':
case '[':
case ']':
case '?':
#if 0
case '.':
#endif
case '=':
return FALSE;
}
return TRUE;
}
static int mailmime_etoken_parse(const char * message, size_t length,
size_t * indx, char ** result)
{
return mailimf_custom_string_parse(message, length,
indx, result,
is_etoken_char);
}
static int detect_CRCR(const char * message, size_t length,
size_t * indx) {
size_t cur_token = *indx;
int r = mailimf_char_parse(message, length, &cur_token, '\r');
if (r == MAILIMF_NO_ERROR) {
r = mailimf_char_parse(message, length, &cur_token, '\r');
if (r == MAILIMF_NO_ERROR) {
// Yup, there was a CRCR here.
// Advance token past the first \r
*indx = (*indx) + 1;
return r;
}
}
// Leave indx alone and move on.
return MAILIMF_ERROR_PARSE; // not actual error. Usual behaviour.
}