iconvert.c
资源名称:tcpmp.rar [点击查看]
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:6k
源码类别:
Windows CE
开发平台:
C/C++
- /*
- * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
- #ifdef HAVE_CONFIG_H
- #include <config.h>
- #endif
- #ifdef HAVE_ICONV
- #include <assert.h>
- #include <errno.h>
- #include <iconv.h>
- #include <stdlib.h>
- #include <string.h>
- /*
- * Convert data from one encoding to another. Return:
- *
- * -2 : memory allocation failed
- * -1 : unknown encoding
- * 0 : data was converted exactly
- * 1 : data was converted inexactly
- * 2 : data was invalid (but still converted)
- *
- * We convert in two steps, via UTF-8, as this is the only
- * reliable way of distinguishing between invalid input
- * and valid input which iconv refuses to transliterate.
- * We convert from UTF-8 twice, because we have no way of
- * knowing whether the conversion was exact if iconv returns
- * E2BIG (due to a bug in the specification of iconv).
- * An alternative approach is to assume that the output of
- * iconv is never more than 4 times as long as the input,
- * but I prefer to avoid that assumption if possible.
- */
- int iconvert(const char *fromcode, const char *tocode,
- const char *from, size_t fromlen,
- char **to, size_t *tolen)
- {
- int ret = 0;
- iconv_t cd1, cd2;
- char *ib;
- char *ob;
- char *utfbuf = 0, *outbuf, *newbuf;
- size_t utflen, outlen, ibl, obl, k;
- char tbuf[2048];
- cd1 = iconv_open("UTF-8", fromcode);
- if (cd1 == (iconv_t)(-1))
- return -1;
- cd2 = (iconv_t)(-1);
- /* Don't use strcasecmp() as it's locale-dependent. */
- if (!strchr("Uu", tocode[0]) ||
- !strchr("Tt", tocode[1]) ||
- !strchr("Ff", tocode[2]) ||
- tocode[3] != '-' ||
- tocode[4] != '8' ||
- tocode[5] != ' ') {
- char *tocode1;
- /*
- * Try using this non-standard feature of glibc and libiconv.
- * This is deliberately not a config option as people often
- * change their iconv library without rebuilding applications.
- */
- tocode1 = (char *)malloc(strlen(tocode) + 11);
- if (!tocode1)
- goto fail;
- strcpy(tocode1, tocode);
- strcat(tocode1, "//TRANSLIT");
- cd2 = iconv_open(tocode1, "UTF-8");
- free(tocode1);
- if (cd2 == (iconv_t)(-1))
- cd2 = iconv_open(tocode, fromcode);
- if (cd2 == (iconv_t)(-1)) {
- iconv_close(cd1);
- return -1;
- }
- }
- utflen = 1; /*fromlen * 2 + 1; XXX */
- utfbuf = (char *)malloc(utflen);
- if (!utfbuf)
- goto fail;
- /* Convert to UTF-8 */
- ib = (char *)from;
- ibl = fromlen;
- ob = utfbuf;
- obl = utflen;
- for (;;) {
- k = iconv(cd1, &ib, &ibl, &ob, &obl);
- assert((!k && !ibl) ||
- (k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
- (k == (size_t)(-1) &&
- (errno == EILSEQ || errno == EINVAL) && ibl));
- if (!ibl)
- break;
- if (obl < 6) {
- /* Enlarge the buffer */
- utflen *= 2;
- newbuf = (char *)realloc(utfbuf, utflen);
- if (!newbuf)
- goto fail;
- ob = (ob - utfbuf) + newbuf;
- obl = utflen - (ob - newbuf);
- utfbuf = newbuf;
- }
- else {
- /* Invalid input */
- ib++, ibl--;
- *ob++ = '#', obl--;
- ret = 2;
- iconv(cd1, 0, 0, 0, 0);
- }
- }
- if (cd2 == (iconv_t)(-1)) {
- /* The target encoding was UTF-8 */
- if (tolen)
- *tolen = ob - utfbuf;
- if (!to) {
- free(utfbuf);
- iconv_close(cd1);
- return ret;
- }
- newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1);
- if (!newbuf)
- goto fail;
- ob = (ob - utfbuf) + newbuf;
- *ob = ' ';
- *to = newbuf;
- iconv_close(cd1);
- return ret;
- }
- /* Truncate the buffer to be tidy */
- utflen = ob - utfbuf;
- newbuf = (char *)realloc(utfbuf, utflen);
- if (!newbuf)
- goto fail;
- utfbuf = newbuf;
- /* Convert from UTF-8 to discover how long the output is */
- outlen = 0;
- ib = utfbuf;
- ibl = utflen;
- while (ibl) {
- ob = tbuf;
- obl = sizeof(tbuf);
- k = iconv(cd2, &ib, &ibl, &ob, &obl);
- assert((k != (size_t)(-1) && !ibl) ||
- (k == (size_t)(-1) && errno == E2BIG && ibl) ||
- (k == (size_t)(-1) && errno == EILSEQ && ibl));
- if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
- /* Replace one character */
- char *tb = "?";
- size_t tbl = 1;
- outlen += ob - tbuf;
- ob = tbuf;
- obl = sizeof(tbuf);
- k = iconv(cd2, &tb, &tbl, &ob, &obl);
- assert((!k && !tbl) ||
- (k == (size_t)(-1) && errno == EILSEQ && tbl));
- for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
- ;
- }
- outlen += ob - tbuf;
- }
- ob = tbuf;
- obl = sizeof(tbuf);
- k = iconv(cd2, 0, 0, &ob, &obl);
- assert(!k);
- outlen += ob - tbuf;
- /* Convert from UTF-8 for real */
- outbuf = (char *)malloc(outlen + 1);
- if (!outbuf)
- goto fail;
- ib = utfbuf;
- ibl = utflen;
- ob = outbuf;
- obl = outlen;
- while (ibl) {
- k = iconv(cd2, &ib, &ibl, &ob, &obl);
- assert((k != (size_t)(-1) && !ibl) ||
- (k == (size_t)(-1) && errno == EILSEQ && ibl));
- if (k && !ret)
- ret = 1;
- if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
- /* Replace one character */
- char *tb = "?";
- size_t tbl = 1;
- k = iconv(cd2, &tb, &tbl, &ob, &obl);
- assert((!k && !tbl) ||
- (k == (size_t)(-1) && errno == EILSEQ && tbl));
- for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
- ;
- }
- }
- k = iconv(cd2, 0, 0, &ob, &obl);
- assert(!k);
- assert(!obl);
- *ob = ' ';
- free(utfbuf);
- iconv_close(cd1);
- iconv_close(cd2);
- if (tolen)
- *tolen = outlen;
- if (!to) {
- free(outbuf);
- return ret;
- }
- *to = outbuf;
- return ret;
- fail:
- if(0 != utfbuf)
- free(utfbuf);
- iconv_close(cd1);
- if (cd2 != (iconv_t)(-1))
- iconv_close(cd2);
- return -2;
- }
- #endif /* HAVE_ICONV */