词法分析

开发平台：
Visual C++

uniconv.cpp：源码内容
							/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
/*
 * $Id: uniconv.cpp,v 1.1 2002/11/22 14:57:06 tng Exp $
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <cunhc.h>
#include "ccsid.h"
#include "uniconv.h"
XERCES_CPP_NAMESPACE_BEGIN
#define WORK_BUFFER_SIZE 16*1024
#define DDA_NEEDED CUNBCPRM_DDA_REQ
#define RETRY_THRESHOLD 10000
// This is utility routine which strips '-', '_' and spaces from the name and
// also upper cases the name. It also returns the length of the string.
static int stripNameCopy(const char *s,char *d,int max)
{
   int si=0;
   int di=0;
   while ( (s[si] != '') && (di < max) ) {
      if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') )
         si++;
      else {
         d[di] = toupper(s[si]);
         si++;di++;
      }
   }
   d[di] = 0;
   if (s[si] != '')
      return -1;
   return si;
}
// This takes a name and does a lookup into the ccsid table (from ccsid.h)
// to find the corresponding ccsid. It also checks if the string ends in s390
// and returns that information to the caller.
// The lookup into the table is done via a binary search since we know that the
// table was nicely sorted for us.
static int getccsid(const char *s,int * is390)
{
   char tmpstr[_AE_MAX_CODESET_NAME_LENGTH];
   int start;
   int limit;
   int index;
   int result;
   int thelen;
   // Clean up the name....
   if (s == NULL)
      return -1;
   if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1)
      return -1;
   // Check for the S390 string in the name
   *is390 = 0;
   if ( (strstr((char *)tmpstr, "S390")) != NULL )
      *is390 = 1;
   // Now lookup the name via a binary search
   start = 0;
   limit = _AE_NUM_OF_CODESETS;
   index = limit/2;
   while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) &&
            (start < limit-1) ) {
      if (result < 0)
         limit = index;
      else
          start = index;
      index = (start+limit)/2;
   }
   if (result != 0 && start >= limit-1)
      return -1;
   return CCSID_MAPPING[index].CCSID;
}
// **********************************************************************
// These are the character conversion services
// **********************************************************************
// "Open" the conversion. Allocate memory to hold the handle which
// unicode services requires. Call unicode services with a 0 length
// so that it can initialize it's handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
uniconv_t uniconv_open(const char *destenc, const char *srcenc) {
   CUNBCPRM  defparms = {CUNBCPRM_DEFAULT};
   CUNBCPRM * tmpp;
   void * handle_area;
   char *cptr;
   int srcis390;
   int destis390;
   errno = 0;
   handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE);
   tmpp = (CUNBCPRM *) handle_area;
   if (tmpp==NULL)
      return (uniconv_t)-1;
   // initialize the parm area with defaults, then start filling it
   // in with our values.
   memcpy(tmpp,&defparms,sizeof(defparms));
   tmpp->Src_Buf_Len= 0;
   // get the ccsids.
   if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) ||
        ((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) {
      errno=ENOENT;
      free(handle_area);
      return (uniconv_t)-1;
   }
   tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED);
   tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE;
   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM));
   tmpp->DDA_Buf_Len=DDA_NEEDED;
   // This flag tells the services to automatically refresh the handle if it
   // becomes invalid.
   tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START;
   tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE;
   /* Determine which technique to use */
   if ( (srcis390) || (destis390) )
      // This technique causes it to swap LF and NL.
      memcpy(tmpp->Technique,"L       ",8);
   else
      memcpy(tmpp->Technique,"        ",8);
   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (retry_count < RETRY_THRESHOLD) {
      CUNLCNV(tmpp);
      if (tmpp->Return_Code == CUN_RC_OK)
         break;
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
         // Let it loop around again
         retry_count++;
      else
         break;
   }
   if (tmpp->Return_Code != CUN_RC_OK) {
// printf("uniconv_open() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code);   // remove this after function test
      free(handle_area);
      errno=EINVAL;
      handle_area = (uniconv_t)-1;
   }
   return handle_area;
}
// All that is required for close is to free the handle buffer.
int uniconv_close(uniconv_t handle_area) {
   errno = 0;
   if (((int)handle_area) <= 0) {
      errno=EBADF;
      return -1;
   }
   free(handle_area);
   return 0;
}
// This does the real conversion.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
int uniconv(uniconv_t cd, char **inbuf,  size_t *inbytesleft,
                          char **outbuf, size_t *outbytesleft) {
   CUNBCPRM * tmpp;
   size_t startinlen = *inbytesleft;
   size_t startoutlen = *outbytesleft;
   errno = 0;
   if (((int)cd) <= 0) {
      errno=EBADF;
      return -1;
   }
   // Fill in the parameter area with current values
   tmpp = (CUNBCPRM *) cd;
   tmpp->Src_Buf_Ptr = *inbuf;
   tmpp->Src_Buf_Len = *inbytesleft;
   tmpp->Targ_Buf_Ptr = *outbuf;
   tmpp->Targ_Buf_Len = *outbytesleft;
   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (retry_count < RETRY_THRESHOLD) {
      CUNLCNV(tmpp);
      if (tmpp->Return_Code == CUN_RC_OK)
         break;
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
         // Let it loop around again
         retry_count++;
      else
         break;
   }
   *inbuf        = (char *)tmpp->Src_Buf_Ptr;
   *inbytesleft  = tmpp->Src_Buf_Len;
   *outbuf       = (char *)tmpp->Targ_Buf_Ptr;
   *outbytesleft = tmpp->Targ_Buf_Len;
   if (tmpp->Return_Code != CUN_RC_OK) {
      if (tmpp->Reason_Code == CUN_RS_TRG_EXH)
         errno=E2BIG;
      else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE)
         errno=EINVAL;
      else {
 printf("uniconv() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code); // remove after function test
         errno=EBADF;
         return -1;
      }
   }
   return (startinlen-*inbytesleft);
}
// **********************************************************************
// These are the case conversion services.
// **********************************************************************
// This "opens" the case conversion. It allocates the parameter area
// then does a dummy call to unicode services so that it can set up
// the handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
static inline uniconv_t uniconv_case_open(unsigned char direction) {
CUNBAPRM  defparms = {CUNBAPRM_DEFAULT};
CUNBAPRM * tmpp;
void * handle_area;
   errno = 0;
   handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ);
   tmpp = (CUNBAPRM *) handle_area;
   if (tmpp==NULL)
      return (uniconv_t)-1;
   // initialize the parm area with defaults, then start filling it
   // in with our values.
   memcpy(tmpp,&defparms,sizeof(defparms));
   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM));
   tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ;
   // This flag tells the services to automatically refresh the handle if it
   // becomes invalid.
   tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START;
   unichar_t inchar = 0x61;
   unichar_t outchar;
   tmpp->Src_Buf_Ptr=&inchar;
   tmpp->Targ_Buf_Ptr=&outchar;
   tmpp->Targ_Buf_Len=sizeof(unichar_t);
   tmpp->Src_Buf_Len=sizeof(unichar_t);
   tmpp->Conv_Type=direction;
   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (true) {
      CUNLASE ( tmpp );
      if (tmpp->Return_Code == CUN_RC_OK) {
         break;
      } else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                  ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
         // Let it loop around again
         retry_count++;
         if (retry_count > RETRY_THRESHOLD) {
            errno = ENOSYS;
            break;
         }
      } else {
// printf("CUNLASE: Unicode Services is a Failure!n");
// printf("CUNLASE rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code);
         errno = ENOSYS;
         break;
      }
   }
   if (tmpp->Return_Code != CUN_RC_OK) {
// printf("uniconv_case_open() Error!!! rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code); // remove after function test.
      free(handle_area);
      errno=EINVAL;
      handle_area = (uniconv_t)-1;
   }
   return handle_area;
}
// These are the actual external interfaces for the open function
uniconv_t uniconv_toupper_open() {
   return uniconv_case_open(CUNBAPRM_TO_UPPER);
}
uniconv_t uniconv_tolower_open() {
   return uniconv_case_open(CUNBAPRM_TO_LOWER);
}
// This closes the case conversion. All it does is free the handle buffer.
int _uniconv_case_close(uniconv_t handle_area) {
   errno = 0;
   if (((int)handle_area) <= 0) {
      errno=EBADF;
      return -1;
   }
   free(handle_area);
   return 0;
}
// This does the actual case conversion. The direction is already
// stored in the handle buffer.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) {
   unichar_t outchar;
   CUNBAPRM * tmpp;
   errno = 0;
   if (((int)cd) <= 0) {
      errno=EBADF;
      return -1;
   }
   tmpp = (CUNBAPRM *) cd;
   tmpp->Src_Buf_Ptr=&inchar;
   tmpp->Targ_Buf_Ptr=&outchar;
   tmpp->Targ_Buf_Len=sizeof(unichar_t);
   tmpp->Src_Buf_Len=sizeof(unichar_t);
   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (true) {
      CUNLASE ( tmpp );
      if (tmpp->Return_Code == CUN_RC_OK) {
         break;
      }
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
         // Let it loop around again
         retry_count++;
         if (retry_count > RETRY_THRESHOLD) {
            errno = ENOSYS;
            break;
         }
      } else {
// printf("CUNLASE: Unicode Services is a Failure!n");
// printf("CUNLASE rc=%d rs=%dn",tmpp->Return_Code,tmpp->Reason_Code);
         errno = ENOSYS;
         break;
      }
   }
   return outchar;
}
XERCES_CPP_NAMESPACE_END