utf8.c
上传用户:lyxiangda
上传日期:2007-01-12
资源大小:3042k
文件大小:18k
- /*
- * The contents of this file are subject to the Mozilla Public
- * License Version 1.1 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS
- * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * rights and limitations under the License.
- *
- * The Original Code is the Netscape security libraries.
- *
- * The Initial Developer of the Original Code is Netscape
- * Communications Corporation. Portions created by Netscape are
- * Copyright (C) 1994-2000 Netscape Communications Corporation. All
- * Rights Reserved.
- *
- * Contributor(s):
- *
- * Alternatively, the contents of this file may be used under the
- * terms of the GNU General Public License Version 2 or later (the
- * "GPL"), in which case the provisions of the GPL are applicable
- * instead of those above. If you wish to allow use of your
- * version of this file only under the terms of the GPL and not to
- * allow others to use your version of this file under the MPL,
- * indicate your decision by deleting the provisions above and
- * replace them with the notice and other provisions required by
- * the GPL. If you do not delete the provisions above, a recipient
- * may use your version of this file under either the MPL or the
- * GPL.
- */
- #ifdef DEBUG
- static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.3 $ $Date: 2000/05/12 18:43:28 $ $Name: NSS_3_1_1_RTM $";
- #endif /* DEBUG */
- /*
- * utf8.c
- *
- * This file contains some additional utility routines required for
- * handling UTF8 strings.
- */
- #ifndef BASE_H
- #include "base.h"
- #endif /* BASE_H */
- #include "plstr.h"
- /*
- * NOTES:
- *
- * There's an "is hex string" function in pki1/atav.c. If we need
- * it in more places, pull that one out.
- */
- /*
- * nssUTF8_CaseIgnoreMatch
- *
- * Returns true if the two UTF8-encoded strings pointed to by the
- * two specified NSSUTF8 pointers differ only in typcase.
- *
- * The error may be one of the following values:
- * NSS_ERROR_INVALID_POINTER
- *
- * Return value:
- * PR_TRUE if the strings match, ignoring case
- * PR_FALSE if they don't
- * PR_FALSE upon error
- */
- NSS_IMPLEMENT PRBool
- nssUTF8_CaseIgnoreMatch
- (
- const NSSUTF8 *a,
- const NSSUTF8 *b,
- PRStatus *statusOpt
- )
- {
- #ifdef NSSDEBUG
- if( ((const NSSUTF8 *)NULL == a) ||
- ((const NSSUTF8 *)NULL == b) ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return PR_FALSE;
- }
- #endif /* NSSDEBUG */
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_SUCCESS;
- }
- /*
- * XXX fgmr
- *
- * This is, like, so wrong!
- */
- if( 0 == PL_strcasecmp((const char *)a, (const char *)b) ) {
- return PR_TRUE;
- } else {
- return PR_FALSE;
- }
- }
- /*
- * nssUTF8_PrintableMatch
- *
- * Returns true if the two Printable strings pointed to by the
- * two specified NSSUTF8 pointers match when compared with the
- * rules for Printable String (leading and trailing spaces are
- * disregarded, extents of whitespace match irregardless of length,
- * and case is not significant), then PR_TRUE will be returned.
- * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE
- * will be returned. If the optional statusOpt argument is not
- * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
- * location.
- *
- * The error may be one of the following values:
- * NSS_ERROR_INVALID_POINTER
- *
- * Return value:
- * PR_TRUE if the strings match, ignoring case
- * PR_FALSE if they don't
- * PR_FALSE upon error
- */
- NSS_IMPLEMENT PRBool
- nssUTF8_PrintableMatch
- (
- const NSSUTF8 *a,
- const NSSUTF8 *b,
- PRStatus *statusOpt
- )
- {
- PRUint8 *c;
- PRUint8 *d;
- #ifdef NSSDEBUG
- if( ((const NSSUTF8 *)NULL == a) ||
- ((const NSSUTF8 *)NULL == b) ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return PR_FALSE;
- }
- #endif /* NSSDEBUG */
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_SUCCESS;
- }
- c = (PRUint8 *)a;
- d = (PRUint8 *)b;
- while( ' ' == *c ) {
- c++;
- }
- while( ' ' == *d ) {
- d++;
- }
- while( (' ' != *c) && (' ' != *d) ) {
- PRUint8 e, f;
- e = *c;
- f = *d;
-
- if( ('a' <= e) && (e <= 'z') ) {
- e -= ('a' - 'A');
- }
- if( ('a' <= f) && (f <= 'z') ) {
- f -= ('a' - 'A');
- }
- if( e != f ) {
- return PR_FALSE;
- }
- c++;
- d++;
- if( ' ' == *c ) {
- while( ' ' == *c ) {
- c++;
- }
- c--;
- }
- if( ' ' == *d ) {
- while( ' ' == *d ) {
- d++;
- }
- d--;
- }
- }
- while( ' ' == *c ) {
- c++;
- }
- while( ' ' == *d ) {
- d++;
- }
- if( *c == *d ) {
- /* And both ' ', btw */
- return PR_TRUE;
- } else {
- return PR_FALSE;
- }
- }
- /*
- * nssUTF8_Duplicate
- *
- * This routine duplicates the UTF8-encoded string pointed to by the
- * specified NSSUTF8 pointer. If the optional arenaOpt argument is
- * not null, the memory required will be obtained from that arena;
- * otherwise, the memory required will be obtained from the heap.
- * A pointer to the new string will be returned. In case of error,
- * an error will be placed on the error stack and NULL will be
- * returned.
- *
- * The error may be one of the following values:
- * NSS_ERROR_INVALID_POINTER
- * NSS_ERROR_INVALID_ARENA
- * NSS_ERROR_NO_MEMORY
- */
- NSS_IMPLEMENT NSSUTF8 *
- nssUTF8_Duplicate
- (
- const NSSUTF8 *s,
- NSSArena *arenaOpt
- )
- {
- NSSUTF8 *rv;
- PRUint32 len;
- #ifdef NSSDEBUG
- if( (const NSSUTF8 *)NULL == s ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- return (NSSUTF8 *)NULL;
- }
- if( (NSSArena *)NULL != arenaOpt ) {
- if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
- return (NSSUTF8 *)NULL;
- }
- }
- #endif /* NSSDEBUG */
- len = PL_strlen((const char *)s);
- #ifdef PEDANTIC
- if( ' ' != ((const char *)s)[ len ] ) {
- /* must have wrapped, e.g., too big for PRUint32 */
- nss_SetError(NSS_ERROR_NO_MEMORY);
- return (NSSUTF8 *)NULL;
- }
- #endif /* PEDANTIC */
- len++; /* zero termination */
- rv = nss_ZAlloc(arenaOpt, len);
- if( (void *)NULL == rv ) {
- return (NSSUTF8 *)NULL;
- }
- (void)nsslibc_memcpy(rv, s, len);
- return rv;
- }
- /*
- * nssUTF8_Size
- *
- * This routine returns the length in bytes (including the terminating
- * null) of the UTF8-encoded string pointed to by the specified
- * NSSUTF8 pointer. Zero is returned on error.
- *
- * The error may be one of the following values:
- * NSS_ERROR_INVALID_POINTER
- * NSS_ERROR_VALUE_TOO_LARGE
- *
- * Return value:
- * 0 on error
- * nonzero length of the string.
- */
- NSS_IMPLEMENT PRUint32
- nssUTF8_Size
- (
- const NSSUTF8 *s,
- PRStatus *statusOpt
- )
- {
- PRUint32 sv;
- #ifdef NSSDEBUG
- if( (const NSSUTF8 *)NULL == s ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return 0;
- }
- #endif /* NSSDEBUG */
- sv = PL_strlen((const char *)s) + 1;
- #ifdef PEDANTIC
- if( ' ' != ((const char *)s)[ sv-1 ] ) {
- /* wrapped */
- nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return 0;
- }
- #endif /* PEDANTIC */
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_SUCCESS;
- }
- return sv;
- }
- /*
- * nssUTF8_Length
- *
- * This routine returns the length in characters (not including the
- * terminating null) of the UTF8-encoded string pointed to by the
- * specified NSSUTF8 pointer.
- *
- * The error may be one of the following values:
- * NSS_ERROR_INVALID_POINTER
- * NSS_ERROR_VALUE_TOO_LARGE
- * NSS_ERROR_INVALID_STRING
- *
- * Return value:
- * length of the string (which may be zero)
- * 0 on error
- */
- NSS_IMPLEMENT PRUint32
- nssUTF8_Length
- (
- const NSSUTF8 *s,
- PRStatus *statusOpt
- )
- {
- PRUint32 l = 0;
- const PRUint8 *c = (const PRUint8 *)s;
- #ifdef NSSDEBUG
- if( (const NSSUTF8 *)NULL == s ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- goto loser;
- }
- #endif /* NSSDEBUG */
- /*
- * From RFC 2044:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
- */
- while( 0 != *c ) {
- PRUint32 incr;
- if( (*c & 0x80) == 0 ) {
- incr = 1;
- } else if( (*c & 0xE0) == 0xC0 ) {
- incr = 2;
- } else if( (*c & 0xF0) == 0xE0 ) {
- incr = 3;
- } else if( (*c & 0xF8) == 0xF0 ) {
- incr = 4;
- } else if( (*c & 0xFC) == 0xF8 ) {
- incr = 5;
- } else if( (*c & 0xFE) == 0xFC ) {
- incr = 6;
- } else {
- nss_SetError(NSS_ERROR_INVALID_STRING);
- goto loser;
- }
- l += incr;
- #ifdef PEDANTIC
- if( l < incr ) {
- /* Wrapped-- too big */
- nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
- goto loser;
- }
- {
- PRUint8 *d;
- for( d = &c[1]; d < &c[incr]; d++ ) {
- if( (*d & 0xC0) != 0xF0 ) {
- nss_SetError(NSS_ERROR_INVALID_STRING);
- goto loser;
- }
- }
- }
- #endif /* PEDANTIC */
- c += incr;
- }
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_SUCCESS;
- }
- return l;
- loser:
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return 0;
- }
- /*
- * nssUTF8_Create
- *
- * This routine creates a UTF8 string from a string in some other
- * format. Some types of string may include embedded null characters,
- * so for them the length parameter must be used. For string types
- * that are null-terminated, the length parameter is optional; if it
- * is zero, it will be ignored. If the optional arena argument is
- * non-null, the memory used for the new string will be obtained from
- * that arena, otherwise it will be obtained from the heap. This
- * routine may return NULL upon error, in which case it will have
- * placed an error on the error stack.
- *
- * The error may be one of the following:
- * NSS_ERROR_INVALID_POINTER
- * NSS_ERROR_NO_MEMORY
- * NSS_ERROR_UNSUPPORTED_TYPE
- *
- * Return value:
- * NULL upon error
- * A non-null pointer to a new UTF8 string otherwise
- */
- extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */
- NSS_IMPLEMENT NSSUTF8 *
- nssUTF8_Create
- (
- NSSArena *arenaOpt,
- nssStringType type,
- const void *inputString,
- PRUint32 size /* in bytes, not characters */
- )
- {
- NSSUTF8 *rv = NULL;
- #ifdef NSSDEBUG
- if( (NSSArena *)NULL != arenaOpt ) {
- if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
- return (NSSUTF8 *)NULL;
- }
- }
- if( (const void *)NULL == inputString ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- return (NSSUTF8 *)NULL;
- }
- #endif /* NSSDEBUG */
- switch( type ) {
- case nssStringType_DirectoryString:
- /* This is a composite type requiring BER */
- nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
- break;
- case nssStringType_TeletexString:
- /*
- * draft-ietf-pkix-ipki-part1-11 says in part:
- *
- * In addition, many legacy implementations support names encoded
- * in the ISO 8859-1 character set (Latin1String) but tag them as
- * TeletexString. The Latin1String includes characters used in
- * Western European countries which are not part of the
- * TeletexString charcter set. Implementations that process
- * TeletexString SHOULD be prepared to handle the entire ISO
- * 8859-1 character set.[ISO 8859-1].
- */
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_PrintableString:
- /*
- * PrintableString consists of A-Za-z0-9 ,()+,-./:=?
- * This is a subset of ASCII, which is a subset of UTF8.
- * So we can just duplicate the string over.
- */
- if( 0 == size ) {
- rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
- } else {
- rv = nss_ZAlloc(arenaOpt, size+1);
- if( (NSSUTF8 *)NULL == rv ) {
- return (NSSUTF8 *)NULL;
- }
- (void)nsslibc_memcpy(rv, inputString, size);
- }
- break;
- case nssStringType_UniversalString:
- /* 4-byte unicode */
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_BMPString:
- /* Base Multilingual Plane of Unicode */
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_UTF8String:
- if( 0 == size ) {
- rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
- } else {
- rv = nss_ZAlloc(arenaOpt, size+1);
- if( (NSSUTF8 *)NULL == rv ) {
- return (NSSUTF8 *)NULL;
- }
- (void)nsslibc_memcpy(rv, inputString, size);
- }
- break;
- case nssStringType_PHGString:
- /*
- * PHGString is an IA5String (with case-insensitive comparisons).
- * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
- * currency symbol.
- */
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_GeneralString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- default:
- nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
- break;
- }
- return rv;
- }
- NSS_IMPLEMENT NSSItem *
- nssUTF8_GetEncoding
- (
- NSSArena *arenaOpt,
- NSSItem *rvOpt,
- nssStringType type,
- NSSUTF8 *string
- )
- {
- NSSItem *rv = (NSSItem *)NULL;
- PRStatus status = PR_SUCCESS;
- #ifdef NSSDEBUG
- if( (NSSArena *)NULL != arenaOpt ) {
- if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
- return (NSSItem *)NULL;
- }
- }
- if( (NSSUTF8 *)NULL == string ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- return (NSSItem *)NULL;
- }
- #endif /* NSSDEBUG */
- switch( type ) {
- case nssStringType_DirectoryString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_TeletexString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_PrintableString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_UniversalString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_BMPString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- case nssStringType_UTF8String:
- {
- NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);
- if( (NSSUTF8 *)NULL == dup ) {
- return (NSSItem *)NULL;
- }
- if( (NSSItem *)NULL == rvOpt ) {
- rv = nss_ZNEW(arenaOpt, NSSItem);
- if( (NSSItem *)NULL == rv ) {
- (void)nss_ZFreeIf(dup);
- return (NSSItem *)NULL;
- }
- } else {
- rv = rvOpt;
- }
- rv->data = dup;
- dup = (NSSUTF8 *)NULL;
- rv->size = nssUTF8_Size(rv->data, &status);
- if( (0 == rv->size) && (PR_SUCCESS != status) ) {
- if( (NSSItem *)NULL == rvOpt ) {
- (void)nss_ZFreeIf(rv);
- }
- return (NSSItem *)NULL;
- }
- }
- break;
- case nssStringType_PHGString:
- nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
- break;
- default:
- nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
- break;
- }
- return rv;
- }
- /*
- * nssUTF8_CopyIntoFixedBuffer
- *
- * This will copy a UTF8 string into a fixed-length buffer, making
- * sure that the all characters are valid. Any remaining space will
- * be padded with the specified ASCII character, typically either
- * null or space.
- *
- * Blah, blah, blah.
- */
- NSS_IMPLEMENT PRStatus
- nssUTF8_CopyIntoFixedBuffer
- (
- NSSUTF8 *string,
- char *buffer,
- PRUint32 bufferSize,
- char pad
- )
- {
- PRUint32 stringSize = 0;
- #ifdef NSSDEBUG
- if( (char *)NULL == buffer ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- return PR_FALSE;
- }
- if( 0 == bufferSize ) {
- nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
- return PR_FALSE;
- }
- if( (pad & 0x80) != 0x00 ) {
- nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
- return PR_FALSE;
- }
- #endif /* NSSDEBUG */
- if( (NSSUTF8 *)NULL == string ) {
- string = (unsigned char*) "";
- }
- stringSize = nssUTF8_Size(string, (PRStatus *)NULL);
- stringSize--; /* don't count the trailing null */
- if( stringSize > bufferSize ) {
- PRUint32 bs = bufferSize;
- (void)nsslibc_memcpy(buffer, string, bufferSize);
-
- if( ( ((buffer[ bs-1 ] & 0x80) == 0x00)) ||
- ((bs > 1) && ((buffer[ bs-2 ] & 0xE0) == 0xC0)) ||
- ((bs > 2) && ((buffer[ bs-3 ] & 0xF0) == 0xE0)) ||
- ((bs > 3) && ((buffer[ bs-4 ] & 0xF8) == 0xF0)) ||
- ((bs > 4) && ((buffer[ bs-5 ] & 0xFC) == 0xF8)) ||
- ((bs > 5) && ((buffer[ bs-6 ] & 0xFE) == 0xFC)) ) {
- /* It fit exactly */
- return PR_SUCCESS;
- }
- /* Too long. We have to trim the last character */
- for( bs; bs > 0; bs-- ) {
- if( (buffer[bs-1] & 0xC0) != 0x80 ) {
- buffer[bs-1] = pad;
- break;
- } else {
- buffer[bs-1] = pad;
- }
- }
- } else {
- (void)nsslibc_memset(buffer, pad, bufferSize);
- (void)nsslibc_memcpy(buffer, string, stringSize);
- }
- return PR_SUCCESS;
- }
- /*
- * nssUTF8_Equal
- *
- */
- NSS_IMPLEMENT PRBool
- nssUTF8_Equal
- (
- const NSSUTF8 *a,
- const NSSUTF8 *b,
- PRStatus *statusOpt
- )
- {
- PRUint32 la, lb;
- #ifdef NSSDEBUG
- if( ((const NSSUTF8 *)NULL == a) ||
- ((const NSSUTF8 *)NULL == b) ) {
- nss_SetError(NSS_ERROR_INVALID_POINTER);
- if( (PRStatus *)NULL != statusOpt ) {
- *statusOpt = PR_FAILURE;
- }
- return PR_FALSE;
- }
- #endif /* NSSDEBUG */
- la = nssUTF8_Size(a, statusOpt);
- if( 0 == la ) {
- return PR_FALSE;
- }
- lb = nssUTF8_Size(b, statusOpt);
- if( 0 == lb ) {
- return PR_FALSE;
- }
- if( la != lb ) {
- return PR_FALSE;
- }
- return nsslibc_memequal(a, b, la, statusOpt);
- }