unmime.c
上传用户:xxcykj
上传日期:2007-01-04
资源大小:727k
文件大小:19k
- /*
- * MIME mail decoding.
- *
- * This module contains decoding routines for converting
- * quoted-printable data into pure 8-bit data, in MIME
- * formatted messages.
- *
- * By Henrik Storner <storner@image.dk>
- *
- * Configuration file support for fetchmail 4.3.8 by
- * Frank Damgaard <frda@post3.tele.dk>
- *
- */
- #include "config.h"
- #include <string.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <ctype.h>
- #include "fetchmail.h"
- static unsigned char unhex(unsigned char c)
- {
- if ((c >= '0') && (c <= '9'))
- return (c - '0');
- else if ((c >= 'A') && (c <= 'F'))
- return (c - 'A' + 10);
- else if ((c >= 'a') && (c <= 'f'))
- return (c - 'a' + 10);
- else
- return c;
- }
- static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
- {
- c1 = unhex(c1);
- c2 = unhex(c2);
- if ((c1 > 15) || (c2 > 15))
- return 1;
- else {
- *c_out = 16*c1+c2;
- return 0;
- }
- }
- /*
- * Routines to decode MIME QP-encoded headers, as per RFC 2047.
- */
- /* States of the decoding state machine */
- #define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */
- #define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */
- #define S_COPY_MIME 2 /* Decode a sequence of coded characters */
- static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */
- static const char MIMEHDR_END[] = "?="; /* End of coded sequence */
- void UnMimeHeader(unsigned char *hdr)
- {
- /* Decode a buffer containing data encoded according to RFC
- * 2047. This only handles content-transfer-encoding; conversion
- * between character sets is not implemented. In other words: We
- * assume the charsets used can be displayed by your mail program
- * without problems.
- */
- /* Note: Decoding is done "in-situ", i.e. without using an
- * additional buffer for temp. storage. This is possible, since the
- * decoded string will always be shorter than the encoded string,
- * due to the en- coding scheme.
- */
- int state = S_COPY_PLAIN;
- unsigned char *p_in, *p_out, *p;
- unsigned char enc = ' '; /* initialization pacifies -Wall */
- int i;
- /* Speed up in case this is not a MIME-encoded header */
- p = strstr(hdr, MIMEHDR_INIT);
- if (p == NULL)
- return; /* No MIME header */
- /* Loop through the buffer.
- * p_in : Next char to be processed.
- * p_out: Where to put the next processed char
- * enc : Encoding used (usually, 'q' = quoted-printable)
- */
- for (p_out = p_in = hdr; (*p_in); ) {
- switch (state) {
- case S_COPY_PLAIN:
- p = strstr(p_in, MIMEHDR_INIT);
- if (p == NULL) {
- /*
- * No more coded data in buffer,
- * just move remainder into place.
- */
- i = strlen(p_in); /* How much left */
- memmove(p_out, p_in, i);
- p_in += i; p_out += i;
- }
- else {
- /* MIME header init found at location p */
- if (p > p_in) {
- /* There are some uncoded chars at the beginning. */
- i = (p - p_in);
- memmove(p_out, p_in, i);
- p_out += i;
- }
- p_in = (p + 2);
- state = S_SKIP_MIMEINIT;
- }
- break;
- case S_SKIP_MIMEINIT:
- /* Mime type definition: "charset?encoding?" */
- p = strchr(p_in, '?');
- if (p != NULL) {
- /* p_in .. (p-1) holds the charset */
- /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
- if (*(p+2) == '?') {
- enc = tolower(*(p+1));
- p_in = p+3;
- state = S_COPY_MIME;
- }
- else
- state = S_COPY_PLAIN;
- }
- else
- state = S_COPY_PLAIN; /* Invalid data */
- break;
- case S_COPY_MIME:
- p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */
- if (p == NULL) p = p_in + strlen(p_in);
- for (; (p_in < p); ) {
- /* Decode all encoded data */
- if (enc == 'q') {
- if (*p_in == '=') {
- /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
- if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
- p_in += 3;
- else {
- /* Invalid QP data - pass through unchanged. */
- *p_out = *p_in;
- p_in++;
- }
- }
- else if (*p_in == '_') {
- /*
- * RFC 2047: '_' inside encoded word represents 0x20.
- * NOT a space - always the value 0x20.
- */
- *p_out = 0x20;
- p_in++;
- }
- else {
- /* Copy unchanged */
- *p_out = *p_in;
- p_in++;
- }
- p_out++;
- }
- else if (enc == 'b') {
- /* Decode base64 encoded data */
- char delimsave;
- int decoded_count;
- delimsave = *p; *p = 'r';
- decoded_count = from64tobits(p_out, p_in);
- *p = delimsave;
- if (decoded_count > 0)
- p_out += decoded_count;
- p_in = p;
- }
- else {
- /* Copy unchanged */
- *p_out = *p_in;
- p_in++;
- p_out++;
- }
- }
- if (*p_in)
- p_in += 2; /* Skip the MIMEHDR_END delimiter */
- /*
- * We've completed decoding one encoded sequence. But another
- * may follow immediately, in which case whitespace before the
- * new MIMEHDR_INIT delimiter must be discarded.
- * See if that is the case
- */
- p = strstr(p_in, MIMEHDR_INIT);
- state = S_COPY_PLAIN;
- if (p != NULL) {
- /*
- * There is more MIME data later on. Is there
- * whitespace only before the delimiter?
- */
- unsigned char *q;
- int wsp_only = 1;
- for (q=p_in; (wsp_only && (q < p)); q++)
- wsp_only = isspace(*q);
- if (wsp_only) {
- /*
- * Whitespace-only before the MIME delimiter. OK,
- * just advance p_in to past the new MIMEHDR_INIT,
- * and prepare to process the new MIME charset/encoding
- * header.
- */
- p_in = p + sizeof(MIMEHDR_INIT) - 1;
- state = S_SKIP_MIMEINIT;
- }
- }
- break;
- }
- }
- *p_out = ' ';
- }
- /*
- * Routines for decoding body-parts of a message.
- *
- * Since the "fetch" part of fetchmail gets a message body
- * one line at a time, we need to maintain some state variables
- * across multiple invokations of the UnMimeBodyline() routine.
- * The driver routine should call MimeBodyType() when all
- * headers have been received, and then UnMimeBodyline() for
- * every line in the message body.
- *
- */
- #define S_BODY_DATA 0
- #define S_BODY_HDR 1
- /*
- * Flag indicating if we are currently processing
- * the headers or the body of a (multipart) message.
- */
- static int BodyState = S_BODY_DATA;
- /*
- * Flag indicating if we are in the process of decoding
- * a quoted-printable body part.
- */
- static int CurrEncodingIsQP = 0;
- static int CurrTypeNeedsDecode = 0;
- /*
- * Delimiter for multipart messages. RFC 2046 states that this must
- * NEVER be longer than 70 characters. Add 3 for the two hyphens
- * at the beginning, and a terminating null.
- */
- #define MAX_DELIM_LEN 70
- static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];
- /* This string replaces the "Content-Transfer-Encoding: quoted-printable"
- * string in all headers, including those in body-parts. The replacement
- * must be no longer than the original string.
- */
- static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
- static void SetEncoding8bit(unsigned char *XferEncOfs)
- {
- unsigned char *p;
- if (XferEncOfs != NULL) {
- memcpy(XferEncOfs, ENC8BIT, sizeof(ENC8BIT) - 1);
- /* If anything left, in this header, replace with whitespace */
- for (p=XferEncOfs+sizeof(ENC8BIT)-1; (*p >= ' '); p++) *p=' ';
- }
- }
- static char *GetBoundary(char *CntType)
- {
- char *p1, *p2;
- int flag;
- /* Find the "boundary" delimiter. It must be preceded with a ';'
- * and optionally some whitespace.
- */
- p1 = CntType;
- do {
- p2 = strchr(p1, ';');
- if (p2)
- for (p2++; isspace(*p2); p2++);
- p1 = p2;
- } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0));
- if (p1 == NULL)
- /* No boundary delimiter */
- return NULL;
- /* Skip "boundary", whitespace and '='; check that we do have a '=' */
- for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++)
- flag |= (*p1 == '=');
- if (!flag)
- return NULL;
- /* Find end of boundary delimiter string */
- if (*p1 == '"') {
- /* The delimiter is inside quotes */
- p1++;
- p2 = strchr(p1, '"');
- if (p2 == NULL)
- return NULL; /* No closing '"' !?! */
- }
- else {
- /* There might be more text after the "boundary" string. */
- p2 = strchr(p1, ';'); /* Safe - delimiter with ';' must be in quotes */
- }
- /* Zero-terminate the boundary string */
- if (p2 != NULL)
- *p2 = ' ';
- return (p1 && strlen(p1)) ? p1 : NULL;
- }
- int CheckContentType(char *CntType)
- {
- /*
- * Static array of Content-Type's for which we will do
- * quoted-printable decoding, if requested.
- * It is probably wise to do this only on known text-only types;
- * be really careful if you change this.
- */
- static char *DecodedTypes[] = {
- "text/", /* Will match ALL content-type's starting with 'text/' */
- "message/rfc822",
- NULL
- };
- char *p = CntType;
- int i;
- /* If no Content-Type header, it isn't MIME - don't touch it */
- if (CntType == NULL) return 0;
- /* Skip whitespace, if any */
- for (; isspace(*p); p++) ;
- for (i=0;
- (DecodedTypes[i] &&
- (strncasecmp(p, DecodedTypes[i], strlen(DecodedTypes[i]))));
- i++) ;
- return (DecodedTypes[i] != NULL);
- }
- /*
- * This routine does three things:
- * 1) It determines - based on the message headers - whether the
- * message body is a MIME message that may hold 8 bit data.
- * - A message that has a "quoted-printable" or "8bit" transfer
- * encoding is assumed to contain 8-bit data (when decoded).
- * - A multipart message is assumed to contain 8-bit data
- * when decoded (there might be quoted-printable body-parts).
- * - All other messages are assumed NOT to include 8-bit data.
- * 2) It determines the delimiter-string used in multi-part message
- * bodies.
- * 3) It sets the initial values of the CurrEncodingIsQP,
- * CurrTypeNeedsDecode, and BodyState variables, from the header
- * contents.
- *
- * The return value is a bitmask.
- */
- int MimeBodyType(unsigned char *hdrs, int WantDecode)
- {
- unsigned char *NxtHdr = hdrs;
- unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
- int HdrsFound = 0; /* We only look for three headers */
- int BodyType; /* Return value */
- /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
- MultipartDelimiter[0] = ' ';
- CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
- BodyState = S_BODY_DATA;
- BodyType = 0;
- /* Just in case ... */
- if (hdrs == NULL)
- return BodyType;
- XferEnc = XferEncOfs = CntType = MimeVer = NULL;
- do {
- if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
- XferEncOfs = NxtHdr;
- p = nxtaddr(NxtHdr);
- if (p != NULL) {
- xalloca(XferEnc, char *, strlen(p) + 1);
- strcpy(XferEnc, p);
- HdrsFound++;
- }
- }
- else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
- /*
- * This one is difficult. We cannot use the standard
- * nxtaddr() routine, since the boundary-delimiter is
- * (probably) enclosed in quotes - and thus appears
- * as an rfc822 comment, and nxtaddr() "eats" up any
- * spaces in the delimiter. So, we have to do this
- * by hand.
- */
- /* Skip the "Content-Type:" part and whitespace after it */
- for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == 't')); NxtHdr++);
- /*
- * Get the full value of the Content-Type header;
- * it might span multiple lines. So search for
- * a newline char, but ignore those that have a
- * have a TAB or space just after the NL (continued
- * lines).
- */
- p = NxtHdr-1;
- do {
- p=strchr((p+1),'n');
- } while ( (p != NULL) && ((*(p+1) == 't') || (*(p+1) == ' ')) );
- if (p == NULL) p = NxtHdr + strlen(NxtHdr);
- xalloca(CntType, char *, p-NxtHdr+2);
- strncpy(CntType, NxtHdr, (p-NxtHdr));
- *(CntType+(p-NxtHdr)) = ' ';
- HdrsFound++;
- }
- else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
- p = nxtaddr(NxtHdr);
- if (p != NULL) {
- xalloca(MimeVer, char *, strlen(p) + 1);
- strcpy(MimeVer, p);
- HdrsFound++;
- }
- }
- NxtHdr = (strchr(NxtHdr, 'n'));
- if (NxtHdr != NULL) NxtHdr++;
- } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));
- /* Done looking through the headers, now check what they say */
- if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {
- CurrTypeNeedsDecode = CheckContentType(CntType);
- /* Check Content-Type to see if this is a multipart message */
- if ( (CntType != NULL) &&
- ((strncasecmp(CntType, "multipart/mixed", 16) == 0) ||
- (strncasecmp(CntType, "message/", 8) == 0)) ) {
- char *p1 = GetBoundary(CntType);
- if (p1 != NULL) {
- /* The actual delimiter is "--" followed by
- the boundary string */
- strcpy(MultipartDelimiter, "--");
- strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
- BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
- }
- }
- /*
- * Check Content-Transfer-Encoding, but
- * ONLY for non-multipart messages (BodyType == 0).
- */
- if ((XferEnc != NULL) && (BodyType == 0)) {
- if (strcasecmp(XferEnc, "quoted-printable") == 0) {
- CurrEncodingIsQP = 1;
- BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
- if (WantDecode && CurrTypeNeedsDecode) {
- SetEncoding8bit(XferEncOfs);
- }
- }
- else if (strcasecmp(XferEnc, "7bit") == 0) {
- CurrEncodingIsQP = 0;
- BodyType = (MSG_IS_7BIT);
- }
- else if (strcasecmp(XferEnc, "8bit") == 0) {
- CurrEncodingIsQP = 0;
- BodyType = (MSG_IS_8BIT);
- }
- }
- }
- return BodyType;
- }
- /*
- * Decode one line of data containing QP data.
- * Return flag set if this line ends with a soft line-break.
- * 'bufp' is modified to point to the end of the output buffer.
- */
- static int DoOneQPLine(unsigned char **bufp, flag delimited, flag issoftline)
- {
- unsigned char *buf = *bufp;
- unsigned char *p_in, *p_out, *p;
- int n;
- int ret = 0;
- /*
- * Special case: line consists of a single =2E and messages are
- * dot-terminated. Line has to be dot-stuffed after decoding.
- */
- if (delimited && !issoftline && buf[0]=='=' && !strncmp(*bufp, "=2Ern", 5))
- {
- strcpy(buf, "..rn");
- *bufp += 5;
- return(FALSE);
- }
- p_in = buf;
- if (delimited && issoftline && (strncmp(buf, "..", 2) == 0))
- p_in++;
- for (p_out = buf; (*p_in); ) {
- p = strchr(p_in, '=');
- if (p == NULL) {
- /* No more QP data, just move remainder into place */
- n = strlen(p_in);
- memmove(p_out, p_in, n);
- p_in += n; p_out += n;
- }
- else {
- if (p > p_in) {
- /* There are some uncoded chars at the beginning. */
- n = (p - p_in);
- memmove(p_out, p_in, n);
- p_out += n;
- }
-
- switch (*(p+1)) {
- case ' ': case 'r': case 'n':
- /* Soft line break, skip '=' */
- p_in = p+1;
- if (*p_in == 'r') p_in++;
- if (*p_in == 'n') p_in++;
- ret = 1;
- break;
- default:
- /* There is a QP encoded byte */
- if (qp_char(*(p+1), *(p+2), p_out) == 0) {
- p_in = p+3;
- }
- else {
- /* Invalid QP data - pass through unchanged. */
- *p_out = '=';
- p_in = p+1;
- }
- p_out++;
- break;
- }
- }
- }
- *p_out = ' ';
- *bufp = p_out;
- return ret;
- }
- /* This is called once per line in the message body. We need to scan
- * all lines in the message body for the multipart delimiter string,
- * and handle any body-part headers in such messages (these can toggle
- * qp-decoding on and off).
- *
- * Note: Messages that are NOT multipart-messages go through this
- * routine quickly, since BodyState will always be S_BODY_DATA,
- * and MultipartDelimiter is NULL.
- *
- * Return flag set if this line ends with a soft line-break.
- * 'bufp' is modified to point to the end of the output buffer.
- */
- int UnMimeBodyline(unsigned char **bufp, flag delimited, flag softline)
- {
- unsigned char *buf = *bufp;
- int ret = 0;
- switch (BodyState) {
- case S_BODY_HDR:
- UnMimeHeader(buf); /* Headers in body-parts can be encoded, too! */
- if ((*buf == ' ') || (*buf == 'n') || (strcmp(buf, "rn") == 0)) {
- BodyState = S_BODY_DATA;
- }
- else if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
- char *XferEnc;
- XferEnc = nxtaddr(buf);
- if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
- CurrEncodingIsQP = 1;
- /*
- * Hmm ... we cannot be really sure that CurrTypeNeedsDecode
- * has been set - we may not have seen the Content-Type header
- * yet. But *usually* the Content-Type header comes first, so
- * this will work. And there is really no way of doing it
- * "right" as long as we stick with the line-by-line processing.
- */
- if (CurrTypeNeedsDecode)
- SetEncoding8bit(buf);
- }
- }
- else if (strncasecmp("Content-Type:", buf, 13) == 0) {
- CurrTypeNeedsDecode = CheckContentType(nxtaddr(buf));
- }
- *bufp = (buf + strlen(buf));
- break;
- case S_BODY_DATA:
- if ((*MultipartDelimiter) &&
- (strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
- BodyState = S_BODY_HDR;
- CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
- }
- if (CurrEncodingIsQP && CurrTypeNeedsDecode)
- ret = DoOneQPLine(bufp, delimited, softline);
- else
- *bufp = (buf + strlen(buf));
- break;
- }
- return ret;
- }
- #ifdef STANDALONE
- #include <stdio.h>
- #include <unistd.h>
- char *program_name = "unmime";
- int outlevel = 0;
- #define BUFSIZE_INCREMENT 4096
- #ifdef DEBUG
- #define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
- #else
- #define DBG_FWRITE(B,L,BS,FD)
- #endif
- int main(int argc, char *argv[])
- {
- unsigned int BufSize;
- unsigned char *buffer, *buf_p;
- int nl_count, i, bodytype;
- #ifdef DEBUG
- pid_t pid;
- FILE *fd_orig, *fd_conv;
- char fnam[100];
- pid = getpid();
- sprintf(fnam, "/tmp/i_unmime.%x", pid);
- fd_orig = fopen(fnam, "w");
- sprintf(fnam, "/tmp/o_unmime.%x", pid);
- fd_conv = fopen(fnam, "w");
- #endif
- BufSize = BUFSIZE_INCREMENT; /* Initial size of buffer */
- buf_p = buffer = (unsigned char *) xmalloc(BufSize);
- nl_count = 0;
- do {
- i = fread(buf_p, 1, 1, stdin);
- switch (*buf_p) {
- case 'n':
- nl_count++;
- break;
- case 'r':
- break;
- default:
- nl_count = 0;
- break;
- }
- buf_p++;
- if ((buf_p - buffer) == BufSize) {
- /* Buffer is full! Get more room. */
- buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
- buf_p = buffer + BufSize;
- BufSize += BUFSIZE_INCREMENT;
- }
- } while ((i > 0) && (nl_count < 2));
- *buf_p = ' ';
- DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);
- UnMimeHeader(buffer);
- bodytype = MimeBodyType(buffer, 1);
- i = strlen(buffer);
- fwrite(buffer, i, 1, stdout);
- DBG_FWRITE(buffer, i, 1, fd_conv);
-
- do {
- buf_p = (buffer - 1);
- do {
- buf_p++;
- i = fread(buf_p, 1, 1, stdin);
- } while ((i == 1) && (*buf_p != 'n'));
- if (i == 1) buf_p++;
- *buf_p = ' ';
- DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);
- if (buf_p > buffer) {
- if (bodytype & MSG_NEEDS_DECODE) {
- buf_p = buffer;
- UnMimeBodyline(&buf_p, 0);
- }
- fwrite(buffer, (buf_p - buffer), 1, stdout);
- DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
- }
- } while (buf_p > buffer);
- free(buffer);
- fflush(stdout);
- #ifdef DEBUG
- fclose(fd_orig);
- fclose(fd_conv);
- #endif
- return 0;
- }
- #endif