Email客户端

开发平台：
Unix_Linux

unmime.c：源码内容
							/*
 * MIME mail decoding.
 *
 * This module contains decoding routines for converting
 * quoted-printable data into pure 8-bit data, in MIME
 * formatted messages.
 *
 * By Henrik Storner <storner@image.dk>
 *
 * Configuration file support for fetchmail 4.3.8 by 
 * Frank Damgaard <frda@post3.tele.dk>
 * 
 */
#include "config.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "fetchmail.h"
static unsigned char unhex(unsigned char c)
{
  if ((c >= '0') && (c <= '9'))
    return (c - '0');
  else if ((c >= 'A') && (c <= 'F'))
    return (c - 'A' + 10);
  else if ((c >= 'a') && (c <= 'f'))
    return (c - 'a' + 10);
  else
    return c;
}
static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
  c1 = unhex(c1);
  c2 = unhex(c2);
  if ((c1 > 15) || (c2 > 15)) 
    return 1;
  else {
    *c_out = 16*c1+c2;
    return 0;
  }
}
/*
 * Routines to decode MIME QP-encoded headers, as per RFC 2047.
 */
/* States of the decoding state machine */
#define S_COPY_PLAIN        0	/* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT     1	/* Get the encoding, and skip header */
#define S_COPY_MIME         2	/* Decode a sequence of coded characters */
static const char MIMEHDR_INIT[]  = "=?";	/* Start of coded sequence */
static const char MIMEHDR_END[]   = "?=";	/* End of coded sequence */
void UnMimeHeader(unsigned char *hdr)
{
  /* Decode a buffer containing data encoded according to RFC
   * 2047. This only handles content-transfer-encoding; conversion
   * between character sets is not implemented.  In other words: We
   * assume the charsets used can be displayed by your mail program
   * without problems. 
   */
  /* Note: Decoding is done "in-situ", i.e. without using an
   * additional buffer for temp. storage. This is possible, since the
   * decoded string will always be shorter than the encoded string,
   * due to the en- coding scheme.
   */
  int  state = S_COPY_PLAIN;
  unsigned char *p_in, *p_out, *p;
  unsigned char enc = '';		/* initialization pacifies -Wall */
  int  i;
  /* Speed up in case this is not a MIME-encoded header */
  p = strstr(hdr, MIMEHDR_INIT);
  if (p == NULL)
    return;   /* No MIME header */
  /* Loop through the buffer.
   *  p_in : Next char to be processed.
   *  p_out: Where to put the next processed char
   *  enc  : Encoding used (usually, 'q' = quoted-printable)
   */
  for (p_out = p_in = hdr; (*p_in); ) {
    switch (state) {
    case S_COPY_PLAIN:
      p = strstr(p_in, MIMEHDR_INIT);
      if (p == NULL) {
	/* 
	 * No more coded data in buffer, 
         * just move remainder into place. 
	 */
        i = strlen(p_in);   /* How much left */
	memmove(p_out, p_in, i);
	p_in += i; p_out += i;
      }
      else {
	/* MIME header init found at location p */
	if (p > p_in) {
          /* There are some uncoded chars at the beginning. */
          i = (p - p_in);
	  memmove(p_out, p_in, i);
	  p_out += i;
	}
	p_in = (p + 2);
	state = S_SKIP_MIMEINIT;
      }
      break;
    case S_SKIP_MIMEINIT:
      /* Mime type definition: "charset?encoding?" */
      p = strchr(p_in, '?');
      if (p != NULL) {
	/* p_in .. (p-1) holds the charset */
	/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
	if (*(p+2) == '?') {
	  enc = tolower(*(p+1));
	  p_in = p+3;
	  state = S_COPY_MIME;
	}
	else
	  state = S_COPY_PLAIN;
      }
      else
	state = S_COPY_PLAIN;   /* Invalid data */
      break;
    case S_COPY_MIME:
      p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
      if (p == NULL) p = p_in + strlen(p_in);
      for (; (p_in < p); ) {
	/* Decode all encoded data */
	if (enc == 'q') {
	  if (*p_in == '=') {
	    /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
	    if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
	      p_in += 3;
	    else {
	      /* Invalid QP data - pass through unchanged. */
	      *p_out = *p_in;
	      p_in++;
	    }
	  }
	  else if (*p_in == '_') {
	    /* 
             * RFC 2047: '_' inside encoded word represents 0x20.
             * NOT a space - always the value 0x20.
             */
	    *p_out = 0x20;
	    p_in++;
	  }
	  else {
	    /* Copy unchanged */
	    *p_out = *p_in;
	    p_in++;
	  }
	  p_out++;
	}
	else if (enc == 'b') {
	  /* Decode base64 encoded data */
	  char delimsave;
	  int decoded_count;
	  delimsave = *p; *p = 'r';
	  decoded_count = from64tobits(p_out, p_in);
	  *p = delimsave;
	  if (decoded_count > 0) 
	    p_out += decoded_count;            
	  p_in = p;
	}
	else {
	  /* Copy unchanged */
	  *p_out = *p_in;
	  p_in++;
	  p_out++;
	}
      }
      if (*p_in)
	p_in += 2;   /* Skip the MIMEHDR_END delimiter */
      /* 
       * We've completed decoding one encoded sequence. But another
       * may follow immediately, in which case whitespace before the
       * new MIMEHDR_INIT delimiter must be discarded.
       * See if that is the case 
       */
      p = strstr(p_in, MIMEHDR_INIT);
      state = S_COPY_PLAIN;
      if (p != NULL) {
	/*
	 * There is more MIME data later on. Is there
         * whitespace  only before the delimiter? 
	 */
        unsigned char *q;
        int  wsp_only = 1;
        for (q=p_in; (wsp_only && (q < p)); q++)
          wsp_only = isspace(*q);
        if (wsp_only) {
	  /* 
	   * Whitespace-only before the MIME delimiter. OK,
           * just advance p_in to past the new MIMEHDR_INIT,
           * and prepare to process the new MIME charset/encoding
	   * header.
	   */
	  p_in = p + sizeof(MIMEHDR_INIT) - 1;
	  state = S_SKIP_MIMEINIT;
        }
      }
      break;
    }
  }
  *p_out = '';
}
/*
 * Routines for decoding body-parts of a message.
 *
 * Since the "fetch" part of fetchmail gets a message body
 * one line at a time, we need to maintain some state variables
 * across multiple invokations of the UnMimeBodyline() routine.
 * The driver routine should call MimeBodyType() when all
 * headers have been received, and then UnMimeBodyline() for
 * every line in the message body.
 *
 */
#define S_BODY_DATA 0
#define S_BODY_HDR  1
/* 
 * Flag indicating if we are currently processing 
 * the headers or the body of a (multipart) message.
 */
static int  BodyState = S_BODY_DATA;
/* 
 * Flag indicating if we are in the process of decoding
 * a quoted-printable body part.
 */
static int  CurrEncodingIsQP = 0;
static int  CurrTypeNeedsDecode = 0;
/* 
 * Delimiter for multipart messages. RFC 2046 states that this must
 * NEVER be longer than 70 characters. Add 3 for the two hyphens
 * at the beginning, and a terminating null.
 */
#define MAX_DELIM_LEN 70
static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];
/* This string replaces the "Content-Transfer-Encoding: quoted-printable"
 * string in all headers, including those in body-parts. The replacement
 * must be no longer than the original string.
 */
static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
static void SetEncoding8bit(unsigned char *XferEncOfs)
{
  unsigned char *p;
  if (XferEncOfs != NULL) {
     memcpy(XferEncOfs, ENC8BIT, sizeof(ENC8BIT) - 1);
     /* If anything left, in this header, replace with whitespace */
     for (p=XferEncOfs+sizeof(ENC8BIT)-1; (*p >= ' '); p++) *p=' ';
  }
}
static char *GetBoundary(char *CntType)
{
  char *p1, *p2;
  int flag;
  /* Find the "boundary" delimiter. It must be preceded with a ';'
   * and optionally some whitespace.
   */
  p1 = CntType;
  do {
    p2 = strchr(p1, ';'); 
    if (p2)
      for (p2++; isspace(*p2); p2++);
    p1 = p2;
  } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0));
  if (p1 == NULL)
    /* No boundary delimiter */
    return NULL;
  /* Skip "boundary", whitespace and '='; check that we do have a '=' */
  for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++)
    flag |= (*p1 == '=');
  if (!flag)
    return NULL;
  /* Find end of boundary delimiter string */
  if (*p1 == '"') {
    /* The delimiter is inside quotes */
    p1++;
    p2 = strchr(p1, '"');
    if (p2 == NULL)
      return NULL;  /* No closing '"' !?! */
  }
  else {
    /* There might be more text after the "boundary" string. */
    p2 = strchr(p1, ';');  /* Safe - delimiter with ';' must be in quotes */
  }
  /* Zero-terminate the boundary string */
  if (p2 != NULL)
    *p2 = '';
  return (p1 && strlen(p1)) ? p1 : NULL;
}
int CheckContentType(char *CntType)
{
  /*
   * Static array of Content-Type's for which we will do
   * quoted-printable decoding, if requested. 
   * It is probably wise to do this only on known text-only types;
   * be really careful if you change this.
   */
  static char *DecodedTypes[] = {
    "text/",        /* Will match ALL content-type's starting with 'text/' */
    "message/rfc822", 
    NULL
  };
  char *p = CntType;
  int i;
  /* If no Content-Type header, it isn't MIME - don't touch it */
  if (CntType == NULL) return 0;
  /* Skip whitespace, if any */
  for (; isspace(*p); p++) ;
  for (i=0; 
       (DecodedTypes[i] && 
	(strncasecmp(p, DecodedTypes[i], strlen(DecodedTypes[i])))); 
       i++) ;
  return (DecodedTypes[i] != NULL);
}
/*
 * This routine does three things:
 * 1) It determines - based on the message headers - whether the
 *    message body is a MIME message that may hold 8 bit data.
 *    - A message that has a "quoted-printable" or "8bit" transfer 
 *      encoding is assumed to contain 8-bit data (when decoded).
 *    - A multipart message is assumed to contain 8-bit data
 *      when decoded (there might be quoted-printable body-parts).
 *    - All other messages are assumed NOT to include 8-bit data.
 * 2) It determines the delimiter-string used in multi-part message
 *    bodies.
 * 3) It sets the initial values of the CurrEncodingIsQP, 
 *    CurrTypeNeedsDecode, and BodyState variables, from the header 
 *    contents.
 *
 * The return value is a bitmask.
 */
int MimeBodyType(unsigned char *hdrs, int WantDecode)
{
  unsigned char *NxtHdr = hdrs;
  unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
  int  HdrsFound = 0;     /* We only look for three headers */
  int  BodyType;          /* Return value */ 
  /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
  MultipartDelimiter[0] = '';
  CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
  BodyState = S_BODY_DATA;
  BodyType = 0;
  /* Just in case ... */
  if (hdrs == NULL)
    return BodyType;
  XferEnc = XferEncOfs = CntType = MimeVer = NULL;
  do {
    if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
      XferEncOfs = NxtHdr;
      p = nxtaddr(NxtHdr);
      if (p != NULL) {
	xalloca(XferEnc, char *, strlen(p) + 1);
	strcpy(XferEnc, p);
	HdrsFound++;
      }
    }
    else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
      /*
       * This one is difficult. We cannot use the standard
       * nxtaddr() routine, since the boundary-delimiter is
       * (probably) enclosed in quotes - and thus appears
       * as an rfc822 comment, and nxtaddr() "eats" up any
       * spaces in the delimiter. So, we have to do this
       * by hand.
       */
      /* Skip the "Content-Type:" part and whitespace after it */
      for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == 't')); NxtHdr++);
      /* 
       * Get the full value of the Content-Type header;
       * it might span multiple lines. So search for
       * a newline char, but ignore those that have a
       * have a TAB or space just after the NL (continued
       * lines).
       */
      p = NxtHdr-1;
      do {
        p=strchr((p+1),'n'); 
      } while ( (p != NULL) && ((*(p+1) == 't') || (*(p+1) == ' ')) );
      if (p == NULL) p = NxtHdr + strlen(NxtHdr);
      xalloca(CntType, char *, p-NxtHdr+2);
      strncpy(CntType, NxtHdr, (p-NxtHdr));
      *(CntType+(p-NxtHdr)) = '';
      HdrsFound++;
    }
    else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
      p = nxtaddr(NxtHdr);
      if (p != NULL) {
	xalloca(MimeVer, char *, strlen(p) + 1);
	strcpy(MimeVer, p);
	HdrsFound++;
      }
    }
    NxtHdr = (strchr(NxtHdr, 'n'));
    if (NxtHdr != NULL) NxtHdr++;
  } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));
  /* Done looking through the headers, now check what they say */
  if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {
    CurrTypeNeedsDecode = CheckContentType(CntType);
    /* Check Content-Type to see if this is a multipart message */
    if ( (CntType != NULL) &&
         ((strncasecmp(CntType, "multipart/mixed", 16) == 0) ||
	  (strncasecmp(CntType, "message/", 8) == 0)) ) {
      char *p1 = GetBoundary(CntType);
      if (p1 != NULL) {
	/* The actual delimiter is "--" followed by 
	   the boundary string */
	strcpy(MultipartDelimiter, "--");
	strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
      }
    }
    /* 
     * Check Content-Transfer-Encoding, but
     * ONLY for non-multipart messages (BodyType == 0).
     */
    if ((XferEnc != NULL) && (BodyType == 0)) {
      if (strcasecmp(XferEnc, "quoted-printable") == 0) {
	CurrEncodingIsQP = 1;
	BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
	if (WantDecode && CurrTypeNeedsDecode) {
           SetEncoding8bit(XferEncOfs);
        }
      }
      else if (strcasecmp(XferEnc, "7bit") == 0) {
	CurrEncodingIsQP = 0;
	BodyType = (MSG_IS_7BIT);
      }
      else if (strcasecmp(XferEnc, "8bit") == 0) {
	CurrEncodingIsQP = 0;
	BodyType = (MSG_IS_8BIT);
      }
    }
  }
  return BodyType;
}
/*
 * Decode one line of data containing QP data.
 * Return flag set if this line ends with a soft line-break.
 * 'bufp' is modified to point to the end of the output buffer.
 */
static int DoOneQPLine(unsigned char **bufp, flag delimited, flag issoftline)
{
  unsigned char *buf = *bufp;
  unsigned char *p_in, *p_out, *p;
  int n;
  int ret = 0;
  /*
   * Special case: line consists of a single =2E and messages are 
   * dot-terminated.  Line has to be dot-stuffed after decoding.
   */
  if (delimited && !issoftline && buf[0]=='=' && !strncmp(*bufp, "=2Ern", 5))
  {
      strcpy(buf, "..rn");
      *bufp += 5;
      return(FALSE);
  }
  p_in = buf;
  if (delimited && issoftline && (strncmp(buf, "..", 2) == 0))
    p_in++;
  for (p_out = buf; (*p_in); ) {
    p = strchr(p_in, '=');
    if (p == NULL) {
      /* No more QP data, just move remainder into place */
      n = strlen(p_in);
      memmove(p_out, p_in, n);
      p_in += n; p_out += n;
    }
    else {
      if (p > p_in) {
	/* There are some uncoded chars at the beginning. */
	n = (p - p_in);
	memmove(p_out, p_in, n);
	p_out += n;
      }
              
      switch (*(p+1)) {
      case '': case 'r': case 'n':
	/* Soft line break, skip '=' */
	p_in = p+1; 
	if (*p_in == 'r') p_in++;
	if (*p_in == 'n') p_in++;
        ret = 1;
	break;
      default:
	/* There is a QP encoded byte */
	if (qp_char(*(p+1), *(p+2), p_out) == 0) {
	  p_in = p+3;
	}
	else {
	  /* Invalid QP data - pass through unchanged. */
	  *p_out = '=';
	  p_in = p+1;
	}
	p_out++;
	break;
      }
    }
  }
  *p_out = '';
  *bufp = p_out;
  return ret;
}
/* This is called once per line in the message body.  We need to scan
 * all lines in the message body for the multipart delimiter string,
 * and handle any body-part headers in such messages (these can toggle
 * qp-decoding on and off).
 *
 * Note: Messages that are NOT multipart-messages go through this
 * routine quickly, since BodyState will always be S_BODY_DATA,
 * and MultipartDelimiter is NULL.
 *
 * Return flag set if this line ends with a soft line-break.
 * 'bufp' is modified to point to the end of the output buffer.
 */
int UnMimeBodyline(unsigned char **bufp, flag delimited, flag softline)
{
  unsigned char *buf = *bufp;
  int ret = 0;
  switch (BodyState) {
  case S_BODY_HDR:
    UnMimeHeader(buf);   /* Headers in body-parts can be encoded, too! */
    if ((*buf == '') || (*buf == 'n') || (strcmp(buf, "rn") == 0)) {
      BodyState = S_BODY_DATA;
    } 
    else if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
      char *XferEnc;
      XferEnc = nxtaddr(buf);
      if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
	CurrEncodingIsQP = 1;
        /*
	 * Hmm ... we cannot be really sure that CurrTypeNeedsDecode
         * has been set - we may not have seen the Content-Type header
         * yet. But *usually* the Content-Type header comes first, so
         * this will work. And there is really no way of doing it 
         * "right" as long as we stick with the line-by-line processing.
	 */
	if (CurrTypeNeedsDecode)
	    SetEncoding8bit(buf);
      }
    }
    else if (strncasecmp("Content-Type:", buf, 13) == 0) {
      CurrTypeNeedsDecode = CheckContentType(nxtaddr(buf));
    }
    *bufp = (buf + strlen(buf));
    break;
  case S_BODY_DATA:
    if ((*MultipartDelimiter) && 
	(strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
      BodyState = S_BODY_HDR;
      CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
    }
    if (CurrEncodingIsQP && CurrTypeNeedsDecode) 
      ret = DoOneQPLine(bufp, delimited, softline);
    else
     *bufp = (buf + strlen(buf));
    break;
  }
  return ret;
}
#ifdef STANDALONE
#include <stdio.h>
#include <unistd.h>
char *program_name = "unmime";
int outlevel = 0;
#define BUFSIZE_INCREMENT 4096
#ifdef DEBUG
#define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
#else
#define DBG_FWRITE(B,L,BS,FD)
#endif
int main(int argc, char *argv[])
{
  unsigned int BufSize;
  unsigned char *buffer, *buf_p;
  int nl_count, i, bodytype;
#ifdef DEBUG
  pid_t pid;
  FILE *fd_orig, *fd_conv;
  char fnam[100];
  pid = getpid();
  sprintf(fnam, "/tmp/i_unmime.%x", pid);
  fd_orig = fopen(fnam, "w");
  sprintf(fnam, "/tmp/o_unmime.%x", pid);
  fd_conv = fopen(fnam, "w");
#endif
  BufSize = BUFSIZE_INCREMENT;    /* Initial size of buffer */
  buf_p = buffer = (unsigned char *) xmalloc(BufSize);
  nl_count = 0;
  do {
    i = fread(buf_p, 1, 1, stdin);
    switch (*buf_p) {
     case 'n':
       nl_count++;
       break;
     case 'r':
       break;
     default:
       nl_count = 0;
       break;
    }
    buf_p++;
    if ((buf_p - buffer) == BufSize) {
       /* Buffer is full! Get more room. */
       buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
       buf_p = buffer + BufSize;
       BufSize += BUFSIZE_INCREMENT;
    }
  } while ((i > 0) && (nl_count < 2));
  *buf_p = '';
  DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);
  UnMimeHeader(buffer);
  bodytype = MimeBodyType(buffer, 1);
  i = strlen(buffer);
  fwrite(buffer, i, 1, stdout);
  DBG_FWRITE(buffer, i, 1, fd_conv);
  
  do {
     buf_p = (buffer - 1);
     do {
        buf_p++;
        i = fread(buf_p, 1, 1, stdin);
     } while ((i == 1) && (*buf_p != 'n'));
     if (i == 1) buf_p++;
     *buf_p = '';
     DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);
     if (buf_p > buffer) {
        if (bodytype & MSG_NEEDS_DECODE) {
           buf_p = buffer;
           UnMimeBodyline(&buf_p, 0);
        }
        fwrite(buffer, (buf_p - buffer), 1, stdout);
        DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
     }
  } while (buf_p > buffer);
  free(buffer);
  fflush(stdout);
#ifdef DEBUG
  fclose(fd_orig);
  fclose(fd_conv);
#endif
  return 0;
}
#endif