- /* Copyright (C) 2000 MySQL AB
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
- #include <my_global.h>
- #include <m_ctype.h>
- #include <my_xml.h>
- #ifndef SCO
- #include <m_string.h>
- #endif
- /*
- This files implements routines which parse XML based
- character set and collation description files.
- Unicode collations are encoded according to
- Unicode Technical Standard #35
- Locale Data Markup Language (LDML)
- http://www.unicode.org/reports/tr35/
- and converted into ICU string according to
- Collation Customization
- http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
- */
- static char *mstr(char *str,const char *src,uint l1,uint l2)
- {
- l1= l1<l2 ? l1 : l2;
- memcpy(str,src,l1);
- str[l1]='';
- return str;
- }
- struct my_cs_file_section_st
- {
- int state;
- const char *str;
- };
- #define _CS_MISC 1
- #define _CS_ID 2
- #define _CS_CSNAME 3
- #define _CS_FAMILY 4
- #define _CS_ORDER 5
- #define _CS_COLNAME 6
- #define _CS_FLAG 7
- #define _CS_CHARSET 8
- #define _CS_COLLATION 9
- #define _CS_UPPERMAP 10
- #define _CS_LOWERMAP 11
- #define _CS_UNIMAP 12
- #define _CS_COLLMAP 13
- #define _CS_CTYPEMAP 14
- #define _CS_PRIMARY_ID 15
- #define _CS_BINARY_ID 16
- #define _CS_CSDESCRIPT 17
- #define _CS_RESET 18
- #define _CS_DIFF1 19
- #define _CS_DIFF2 20
- #define _CS_DIFF3 21
- static struct my_cs_file_section_st sec[] =
- {
- {_CS_MISC, "xml"},
- {_CS_MISC, "xml.version"},
- {_CS_MISC, "xml.encoding"},
- {_CS_MISC, "charsets"},
- {_CS_MISC, "charsets.max-id"},
- {_CS_CHARSET, "charsets.charset"},
- {_CS_PRIMARY_ID, "charsets.charset.primary-id"},
- {_CS_BINARY_ID, "charsets.charset.binary-id"},
- {_CS_CSNAME, "charsets.charset.name"},
- {_CS_FAMILY, "charsets.charset.family"},
- {_CS_CSDESCRIPT, "charsets.charset.description"},
- {_CS_MISC, "charsets.charset.alias"},
- {_CS_MISC, "charsets.charset.ctype"},
- {_CS_CTYPEMAP, "charsets.charset.ctype.map"},
- {_CS_MISC, "charsets.charset.upper"},
- {_CS_UPPERMAP, "charsets.charset.upper.map"},
- {_CS_MISC, "charsets.charset.lower"},
- {_CS_LOWERMAP, "charsets.charset.lower.map"},
- {_CS_MISC, "charsets.charset.unicode"},
- {_CS_UNIMAP, "charsets.charset.unicode.map"},
- {_CS_COLLATION, "charsets.charset.collation"},
- {_CS_COLNAME, "charsets.charset.collation.name"},
- {_CS_ID, "charsets.charset.collation.id"},
- {_CS_ORDER, "charsets.charset.collation.order"},
- {_CS_FLAG, "charsets.charset.collation.flag"},
- {_CS_COLLMAP, "charsets.charset.collation.map"},
- {_CS_RESET, "charsets.charset.collation.rules.reset"},
- {_CS_DIFF1, "charsets.charset.collation.rules.p"},
- {_CS_DIFF2, "charsets.charset.collation.rules.s"},
- {_CS_DIFF3, "charsets.charset.collation.rules.t"},
- {0, NULL}
- };
- static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len)
- {
- struct my_cs_file_section_st *s;
- for (s=sec; s->str; s++)
- {
- if (!strncmp(attr,s->str,len))
- return s;
- }
- return NULL;
- }
- #define MY_CS_CSDESCR_SIZE 64
- #define MY_CS_TAILORING_SIZE 128
- typedef struct my_cs_file_info
- {
- char csname[MY_CS_NAME_SIZE];
- char name[MY_CS_NAME_SIZE];
- uchar ctype[MY_CS_CTYPE_TABLE_SIZE];
- uchar to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
- uchar to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
- uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
- uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
- char comment[MY_CS_CSDESCR_SIZE];
- char tailoring[MY_CS_TAILORING_SIZE];
- size_t tailoring_length;
- int (*add_collation)(CHARSET_INFO *cs);
- static int fill_uchar(uchar *a,uint size,const char *str, uint len)
- {
- uint i= 0;
- const char *s, *b, *e=str+len;
- for (s=str ; s < e ; i++)
- {
- for ( ; (s < e) && strchr(" trn",s[0]); s++) ;
- b=s;
- for ( ; (s < e) && !strchr(" trn",s[0]); s++) ;
- if (s == b || i > size)
- break;
- a[i]= (uchar) strtoul(b,NULL,16);
- }
- return 0;
- }
- static int fill_uint16(uint16 *a,uint size,const char *str, uint len)
- {
- uint i= 0;
- const char *s, *b, *e=str+len;
- for (s=str ; s < e ; i++)
- {
- for ( ; (s < e) && strchr(" trn",s[0]); s++) ;
- b=s;
- for ( ; (s < e) && !strchr(" trn",s[0]); s++) ;
- if (s == b || i > size)
- break;
- a[i]= (uint16) strtol(b,NULL,16);
- }
- return 0;
- }
- static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len)
- {
- struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
- struct my_cs_file_section_st *s= cs_file_sec(attr,len);
- if ( s && (s->state == _CS_CHARSET))
- bzero(&i->cs,sizeof(i->cs));
- if (s && (s->state == _CS_COLLATION))
- i->tailoring_length= 0;
- return MY_XML_OK;
- }
- static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len)
- {
- struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
- struct my_cs_file_section_st *s= cs_file_sec(attr,len);
- int state= s ? s->state : 0;
- int rc;
- switch(state){
- rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
- break;
- default:
- rc=MY_XML_OK;
- }
- return rc;
- }
- static int cs_value(MY_XML_PARSER *st,const char *attr, uint len)
- {
- struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
- struct my_cs_file_section_st *s;
- int state= (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0;
- #ifndef DBUG_OFF
- if(0){
- char str[1024];
- mstr(str,attr,len,sizeof(str)-1);
- printf("VALUE %d %s='%s'n",state,st->attr,str);
- }
- #endif
- switch (state) {
- case _CS_ID:
- i->cs.number= strtol(attr,(char**)NULL,10);
- break;
- case _CS_BINARY_ID:
- i->cs.binary_number= strtol(attr,(char**)NULL,10);
- break;
- case _CS_PRIMARY_ID:
- i->cs.primary_number= strtol(attr,(char**)NULL,10);
- break;
- case _CS_COLNAME:
- i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
- break;
- case _CS_CSNAME:
- i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
- break;
- i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
- break;
- case _CS_FLAG:
- if (!strncmp("primary",attr,len))
- i->cs.state|= MY_CS_PRIMARY;
- else if (!strncmp("binary",attr,len))
- i->cs.state|= MY_CS_BINSORT;
- else if (!strncmp("compiled",attr,len))
- i->cs.state|= MY_CS_COMPILED;
- break;
- case _CS_UPPERMAP:
- fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
- i->cs.to_upper=i->to_upper;
- break;
- case _CS_LOWERMAP:
- fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
- i->cs.to_lower=i->to_lower;
- break;
- case _CS_UNIMAP:
- fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
- i->cs.tab_to_uni=i->tab_to_uni;
- break;
- case _CS_COLLMAP:
- fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
- i->cs.sort_order=i->sort_order;
- break;
- case _CS_CTYPEMAP:
- fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
- i->cs.ctype=i->ctype;
- break;
- case _CS_RESET:
- case _CS_DIFF1:
- case _CS_DIFF2:
- case _CS_DIFF3:
- {
- /*
- Convert collation description from
- Locale Data Markup Language (LDML)
- into ICU Collation Customization expression.
- */
- char arg[16];
- const char *cmd[]= {"&","<","<<","<<<"};
- i->cs.tailoring= i->tailoring;
- mstr(arg,attr,len,sizeof(arg)-1);
- if (i->tailoring_length + 20 < sizeof(i->tailoring))
- {
- char *dst= i->tailoring_length + i->tailoring;
- i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
- }
- }
- }
- return MY_XML_OK;
- }
- my_bool my_parse_charset_xml(const char *buf, uint len,
- int (*add_collation)(CHARSET_INFO *cs))
- {
- struct my_cs_file_info i;
- my_bool rc;
- my_xml_parser_create(&p);
- my_xml_set_enter_handler(&p,cs_enter);
- my_xml_set_value_handler(&p,cs_value);
- my_xml_set_leave_handler(&p,cs_leave);
- i.add_collation= add_collation;
- my_xml_set_user_data(&p,(void*)&i);
- rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
- my_xml_parser_free(&p);
- return rc;
- }