uniconv.cpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:14k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2002 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Id: uniconv.cpp,v 1.1 2002/11/22 14:57:06 tng Exp $
- */
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <errno.h>
- #include <ctype.h>
- #include <cunhc.h>
- #include "ccsid.h"
- #include "uniconv.h"
- XERCES_CPP_NAMESPACE_BEGIN
- #define WORK_BUFFER_SIZE 16*1024
- #define DDA_NEEDED CUNBCPRM_DDA_REQ
- #define RETRY_THRESHOLD 10000
- // This is utility routine which strips '-', '_' and spaces from the name and
- // also upper cases the name. It also returns the length of the string.
- static int stripNameCopy(const char *s,char *d,int max)
- {
- int si=0;
- int di=0;
- while ( (s[si] != ' ') && (di < max) ) {
- if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') )
- si++;
- else {
- d[di] = toupper(s[si]);
- si++;di++;
- }
- }
- d[di] = 0;
- if (s[si] != ' ')
- return -1;
- return si;
- }
- // This takes a name and does a lookup into the ccsid table (from ccsid.h)
- // to find the corresponding ccsid. It also checks if the string ends in s390
- // and returns that information to the caller.
- // The lookup into the table is done via a binary search since we know that the
- // table was nicely sorted for us.
- static int getccsid(const char *s,int * is390)
- {
- char tmpstr[_AE_MAX_CODESET_NAME_LENGTH];
- int start;
- int limit;
- int index;
- int result;
- int thelen;
- // Clean up the name....
- if (s == NULL)
- return -1;
- if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1)
- return -1;
- // Check for the S390 string in the name
- *is390 = 0;
- if ( (strstr((char *)tmpstr, "S390")) != NULL )
- *is390 = 1;
- // Now lookup the name via a binary search
- start = 0;
- limit = _AE_NUM_OF_CODESETS;
- index = limit/2;
- while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) &&
- (start < limit-1) ) {
- if (result < 0)
- limit = index;
- else
- start = index;
- index = (start+limit)/2;
- }
- if (result != 0 && start >= limit-1)
- return -1;
- return CCSID_MAPPING[index].CCSID;
- }
- // **********************************************************************
- // These are the character conversion services
- // **********************************************************************
- // "Open" the conversion. Allocate memory to hold the handle which
- // unicode services requires. Call unicode services with a 0 length
- // so that it can initialize it's handle.
- // Note that unicode services must always be called in a loop since
- // it could be busy reloading its tables.
- uniconv_t uniconv_open(const char *destenc, const char *srcenc) {
- CUNBCPRM defparms = {CUNBCPRM_DEFAULT};
- CUNBCPRM * tmpp;
- void * handle_area;
- char *cptr;
- int srcis390;
- int destis390;
- errno = 0;
- handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE);
- tmpp = (CUNBCPRM *) handle_area;
- if (tmpp==NULL)
- return (uniconv_t)-1;
- // initialize the parm area with defaults, then start filling it
- // in with our values.
- memcpy(tmpp,&defparms,sizeof(defparms));
- tmpp->Src_Buf_Len= 0;
- // get the ccsids.
- if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) ||
- ((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) {
- errno=ENOENT;
- free(handle_area);
- return (uniconv_t)-1;
- }
- tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED);
- tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE;
- tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM));
- tmpp->DDA_Buf_Len=DDA_NEEDED;
- // This flag tells the services to automatically refresh the handle if it
- // becomes invalid.
- tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START;
- tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE;
- /* Determine which technique to use */
- if ( (srcis390) || (destis390) )
- // This technique causes it to swap LF and NL.
- memcpy(tmpp->Technique,"L ",8);
- else
- memcpy(tmpp->Technique," ",8);
- // Retry if the services are busy reloading their tables.
- int retry_count = 0;
- while (retry_count < RETRY_THRESHOLD) {
- CUNLCNV(tmpp);
- if (tmpp->Return_Code == CUN_RC_OK)
- break;
- else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
- ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
- // Let it loop around again
- retry_count++;
- else
- break;
- }
- if (tmpp->Return_Code != CUN_RC_OK) {
- // printf("uniconv_open() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code); // remove this after function test
- free(handle_area);
- errno=EINVAL;
- handle_area = (uniconv_t)-1;
- }
- return handle_area;
- }
- // All that is required for close is to free the handle buffer.
- int uniconv_close(uniconv_t handle_area) {
- errno = 0;
- if (((int)handle_area) <= 0) {
- errno=EBADF;
- return -1;
- }
- free(handle_area);
- return 0;
- }
- // This does the real conversion.
- // Note that unicode services must always be called in a loop since
- // it could be busy reloading its tables.
- int uniconv(uniconv_t cd, char **inbuf, size_t *inbytesleft,
- char **outbuf, size_t *outbytesleft) {
- CUNBCPRM * tmpp;
- size_t startinlen = *inbytesleft;
- size_t startoutlen = *outbytesleft;
- errno = 0;
- if (((int)cd) <= 0) {
- errno=EBADF;
- return -1;
- }
- // Fill in the parameter area with current values
- tmpp = (CUNBCPRM *) cd;
- tmpp->Src_Buf_Ptr = *inbuf;
- tmpp->Src_Buf_Len = *inbytesleft;
- tmpp->Targ_Buf_Ptr = *outbuf;
- tmpp->Targ_Buf_Len = *outbytesleft;
- // Retry if the services are busy reloading their tables.
- int retry_count = 0;
- while (retry_count < RETRY_THRESHOLD) {
- CUNLCNV(tmpp);
- if (tmpp->Return_Code == CUN_RC_OK)
- break;
- else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
- ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
- // Let it loop around again
- retry_count++;
- else
- break;
- }
- *inbuf = (char *)tmpp->Src_Buf_Ptr;
- *inbytesleft = tmpp->Src_Buf_Len;
- *outbuf = (char *)tmpp->Targ_Buf_Ptr;
- *outbytesleft = tmpp->Targ_Buf_Len;
- if (tmpp->Return_Code != CUN_RC_OK) {
- if (tmpp->Reason_Code == CUN_RS_TRG_EXH)
- errno=E2BIG;
- else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE)
- errno=EINVAL;
- else {
- printf("uniconv() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code); // remove after function test
- errno=EBADF;
- return -1;
- }
- }
- return (startinlen-*inbytesleft);
- }
- // **********************************************************************
- // These are the case conversion services.
- // **********************************************************************
- // This "opens" the case conversion. It allocates the parameter area
- // then does a dummy call to unicode services so that it can set up
- // the handle.
- // Note that unicode services must always be called in a loop since
- // it could be busy reloading its tables.
- static inline uniconv_t uniconv_case_open(unsigned char direction) {
- CUNBAPRM defparms = {CUNBAPRM_DEFAULT};
- CUNBAPRM * tmpp;
- void * handle_area;
- errno = 0;
- handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ);
- tmpp = (CUNBAPRM *) handle_area;
- if (tmpp==NULL)
- return (uniconv_t)-1;
- // initialize the parm area with defaults, then start filling it
- // in with our values.
- memcpy(tmpp,&defparms,sizeof(defparms));
- tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM));
- tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ;
- // This flag tells the services to automatically refresh the handle if it
- // becomes invalid.
- tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START;
- unichar_t inchar = 0x61;
- unichar_t outchar;
- tmpp->Src_Buf_Ptr=&inchar;
- tmpp->Targ_Buf_Ptr=&outchar;
- tmpp->Targ_Buf_Len=sizeof(unichar_t);
- tmpp->Src_Buf_Len=sizeof(unichar_t);
- tmpp->Conv_Type=direction;
- // Retry if the services are busy reloading their tables.
- int retry_count = 0;
- while (true) {
- CUNLASE ( tmpp );
- if (tmpp->Return_Code == CUN_RC_OK) {
- break;
- } else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
- ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
- // Let it loop around again
- retry_count++;
- if (retry_count > RETRY_THRESHOLD) {
- errno = ENOSYS;
- break;
- }
- } else {
- // printf("CUNLASE: Unicode Services is a Failure!n");
- // printf("CUNLASE rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code);
- errno = ENOSYS;
- break;
- }
- }
- if (tmpp->Return_Code != CUN_RC_OK) {
- // printf("uniconv_case_open() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code); // remove after function test.
- free(handle_area);
- errno=EINVAL;
- handle_area = (uniconv_t)-1;
- }
- return handle_area;
- }
- // These are the actual external interfaces for the open function
- uniconv_t uniconv_toupper_open() {
- return uniconv_case_open(CUNBAPRM_TO_UPPER);
- }
- uniconv_t uniconv_tolower_open() {
- return uniconv_case_open(CUNBAPRM_TO_LOWER);
- }
- // This closes the case conversion. All it does is free the handle buffer.
- int _uniconv_case_close(uniconv_t handle_area) {
- errno = 0;
- if (((int)handle_area) <= 0) {
- errno=EBADF;
- return -1;
- }
- free(handle_area);
- return 0;
- }
- // This does the actual case conversion. The direction is already
- // stored in the handle buffer.
- // Note that unicode services must always be called in a loop since
- // it could be busy reloading its tables.
- unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) {
- unichar_t outchar;
- CUNBAPRM * tmpp;
- errno = 0;
- if (((int)cd) <= 0) {
- errno=EBADF;
- return -1;
- }
- tmpp = (CUNBAPRM *) cd;
- tmpp->Src_Buf_Ptr=&inchar;
- tmpp->Targ_Buf_Ptr=&outchar;
- tmpp->Targ_Buf_Len=sizeof(unichar_t);
- tmpp->Src_Buf_Len=sizeof(unichar_t);
- // Retry if the services are busy reloading their tables.
- int retry_count = 0;
- while (true) {
- CUNLASE ( tmpp );
- if (tmpp->Return_Code == CUN_RC_OK) {
- break;
- }
- else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
- ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
- (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
- // Let it loop around again
- retry_count++;
- if (retry_count > RETRY_THRESHOLD) {
- errno = ENOSYS;
- break;
- }
- } else {
- // printf("CUNLASE: Unicode Services is a Failure!n");
- // printf("CUNLASE rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code);
- errno = ENOSYS;
- break;
- }
- }
- return outchar;
- }
- XERCES_CPP_NAMESPACE_END