mkdict.c
上传用户:hmc_gdtv
上传日期:2013-08-04
资源大小:798k
文件大小:23k
- #include <windows.h>
- #include <stdio.h>
- #include <locale.h>
- #include <tchar.h>
- #include "zlib.h"
- #define BLOCK 65536
- // global stuff
- const TCHAR *progname;
- int src_codepage_num;
- int src_locale_num;
- int dest_codepage_num;
- UINT src_cp;
- LCID src_lcid;
- UINT dest_cp;
- int decode;
- // memory allocation
- void nomem(void) {
- _ftprintf(stderr,_T("%s: Out of memory!n"),progname);
- exit(1);
- }
- void *xmalloc(size_t size) {
- void *p=malloc(size);
- if (p==NULL)
- nomem();
- return p;
- }
- void *xrealloc(void *m,size_t size) {
- void *p=realloc(m,size);
- if (size>0 && p==NULL)
- nomem();
- return p;
- }
- TCHAR *xstrdup(const TCHAR *s) {
- TCHAR *n=_tcsdup(s);
- if (n==NULL)
- nomem();
- return n;
- }
- // error handling
- void syserror(const TCHAR *msg) {
- DWORD code=GetLastError();
- LPTSTR *errmsg;
- if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
- NULL,code,0,(LPTSTR)&errmsg,0,NULL))
- _ftprintf(stderr,_T("%s: %s: %sn"),progname,msg,errmsg);
- else
- _ftprintf(stderr,_T("%s: %s: %sn"),progname,msg,_T("Unknown error"));
- exit(1);
- }
- void liberr(const TCHAR *msg) {
- _ftprintf(stderr,
- #ifdef UNICODE
- _T("%s: %s: %S"),
- #else
- _T("%s: %s: %s"),
- #endif
- progname,msg,strerror(errno));
- exit(1);
- }
- TCHAR *tchar(const char *s) {
- #ifdef UNICODE
- int srclen=strlen(s);
- int wclen;
- TCHAR *ws;
- if (!srclen)
- return xstrdup(_T(""));
- wclen=MultiByteToWideChar(CP_OEMCP,0,s,srclen,NULL,0);
- if (wclen==0)
- syserror(_T("Can't convert string"));
- ws=xmalloc((wclen+1)*sizeof(TCHAR));
- MultiByteToWideChar(CP_OEMCP,0,s,srclen,ws,wclen);
- ws[wclen]=' ';
- return ws;
- #else
- return xstrdup(s);
- #endif
- }
- // custom getopt
- int xgetopt(int *argc,char ***argv,const char *ospec,
- const char **state,const char **arg)
- {
- const char *cp;
- char opt;
- if (!*state || !(*state)[0]) { // look a the next arg
- if (!*argc || !(*argv)[0] || (*argv)[0][0]!='-') // no more options
- return 0;
- if (!(*argv)[0][1]) // a lone '-', treat as an end of list
- return 0;
- if ((*argv)[0][1]=='-') { // '--', ignore rest of text and stop
- --*argc; ++*argv;
- return 0;
- }
- *state=(*argv)[0]+1;
- --*argc;
- ++*argv;
- }
- // we are in a middle of an arg
- opt=*(*state)++;
- for (cp=ospec;*cp;++cp) {
- if (*cp==opt)
- goto found;
- if (cp[1]==':')
- ++cp;
- }
- _ftprintf(stderr,_T("%s: Invalid option: '%c'n"),progname,opt);
- exit(1);
- found:
- if (cp[1]==':') { // option requires an argument
- if (**state) { // use rest of string
- *arg=*state;
- *state=NULL;
- return (unsigned char)opt;
- }
- // use next arg if available
- if (*argc) {
- *arg=(*argv)[0];
- --*argc;
- ++*argv;
- return (unsigned char)opt;
- }
- // barf about missing args
- _ftprintf(stderr,_T("%s: Option '%c' requires an argumentn"),progname,opt);
- exit(1);
- }
- // just return current option
- return (unsigned char)opt;
- }
- // hexdump
- void hexdump(const wchar_t *str) {
- int len=wcslen(str);
- int i;
- while (len>0) {
- printf(" ");
- for (i=0;i<8 && len>0;--i,--len,++str)
- printf("%04x ",*str);
- printf("n");
- }
- }
- // generic arrays support
- void growarray(void **arr,int itemsize,int *maxitems) {
- int mi=*maxitems;
- mi+=mi ? (mi>8192 ? 8192 : mi) : 32;
- *arr=xrealloc(*arr,mi*itemsize);
- *maxitems=mi;
- }
- #define CHECKADD(ptr,cur,max) do {
- if ((cur)>=(max))
- growarray((void **)&(ptr),
- sizeof((ptr)[0]),&(max));
- } while (0)
- // languages support
- struct lang {
- LCID lcid;
- TCHAR *country;
- TCHAR *lang;
- };
- struct lang *languages;
- int curlang,maxlang;
- void addlang(LCID lcid,const TCHAR *country,const TCHAR *lang) {
- CHECKADD(languages,curlang,maxlang);
- languages[curlang].lcid=lcid;
- languages[curlang].country=xstrdup(country);
- languages[curlang].lang=xstrdup(lang);
- ++curlang;
- }
- BOOL CALLBACK EnumLocalesProc(LPTSTR name) {
- LCID lcid;
- TCHAR country[1024],lang[1024];
- int ret;
- if (_stscanf(name,_T("%x"),&lcid)==1) {
- ret=GetLocaleInfo(lcid,LOCALE_SENGCOUNTRY,country,sizeof(country)/sizeof(TCHAR)-1);
- if (ret==0)
- return TRUE;
- country[ret]=' ';
- ret=GetLocaleInfo(lcid,LOCALE_SENGLANGUAGE,lang,sizeof(lang)/sizeof(TCHAR));
- lang[ret]=' ';
- addlang(lcid,country,lang);
- }
- return TRUE;
- }
- int langcmp(const void *v1,const void *v2) {
- const struct lang *l1=v1;
- const struct lang *l2=v2;
- int val;
- val=_tcsicmp(l1->lang,l2->lang);
- if (val==0)
- val=_tcsicmp(l1->country,l2->country);
- return val;
- }
- int langcmp_lcid(const void *v1,const void *v2) {
- const struct lang *l1=v1;
- const struct lang *l2=v2;
- int val;
- val=_tcsicmp(l1->lang,l2->lang);
- if (val==0)
- val=l1->lcid<l2->lcid ? -1 : l1->lcid==l2->lcid ? 0 : 1;
- return val;
- }
- #define LANGFMT1 " %6s %-30s %-30sn"
- #define LANGFMT2 " %6x %-30s %-30sn"
- void showlocales(void) {
- int i;
- LCID deflcid;
-
- qsort(languages,curlang,sizeof(struct lang),langcmp);
- _tprintf(_T("Installed locales:n"));
- _tprintf(_T(LANGFMT1),_T("LCID"),_T("Language"),_T("Country"));
- for (i=0;i<70;++i)
- putc('-',stdout);
- putc('n',stdout);
- deflcid=GetUserDefaultLCID();
- for (i=0;i<curlang;++i)
- _tprintf(_T(LANGFMT2),
- languages[i].lcid,
- languages[i].lang,
- languages[i].country);
- exit(0);
- }
- int find_locale(const char *l) {
- TCHAR *wl=tchar(l);
- LCID id;
- int i;
- TCHAR *country;
-
- country=_tcschr(wl,_T('.'));
- if (country)
- *country++=' ';
- for (i=0;i<curlang;++i)
- if (!_tcsicmp(wl,languages[i].lang) &&
- (!country || !_tcsicmp(country,languages[i].country)))
- goto found;
- if (_stscanf(wl,_T("%x"),&id)==1)
- for (i=0;i<curlang;++i)
- if (languages[i].lcid==id)
- goto found;
- _ftprintf(stderr,_T("%s: Language '%s' not found.n"),progname,wl);
- exit(1);
- found:
- free(wl);
- return i;
- }
- // code pages support
- struct codepage {
- UINT cp;
- const TCHAR *name;
- };
- struct codepage *codepages;
- int curcodepage,maxcodepage;
- void addcodepage(UINT id,const TCHAR *name) {
- CHECKADD(codepages,curcodepage,maxcodepage);
- codepages[curcodepage].cp=id;
- codepages[curcodepage].name=xstrdup(name);
- ++curcodepage;
- }
- BOOL CALLBACK EnumCodePagesProc(LPTSTR name) {
- UINT id;
- CPINFOEX iex;
- if (_stscanf(name,_T("%d"),&id)==1 && GetCPInfoEx(id,0,&iex)) {
- TCHAR *rbr,*lbr=_tcschr(iex.CodePageName,_T('('));
- if (lbr) {
- ++lbr;
- rbr=_tcschr(lbr,_T(')'));
- if (rbr)
- *rbr=' ';
- else
- lbr=iex.CodePageName;
- }
- addcodepage(iex.CodePage,lbr);
- }
- return TRUE;
- }
- int codepagecmp(const void *v1,const void *v2) {
- const struct codepage *c1=v1;
- const struct codepage *c2=v2;
- return c1->cp<c2->cp ? -1 : c1->cp>c2->cp ? 1 : 0;
- }
- void showcodepages(void) {
- int i;
- qsort(codepages,curcodepage,sizeof(struct codepage),codepagecmp);
- _tprintf(_T("Installed code pages:n"));
- for (i=0;i<curcodepage;++i)
- _tprintf(_T(" %5d %sn"),codepages[i].cp,codepages[i].name);
- exit(1);
- }
- int find_codepage(const char *cp) {
- TCHAR *wcp=tchar(cp);
- UINT ucp;
- int i;
- if (sscanf(cp,"%d",&ucp)==1) { // looks like a numeric codepage
- for (i=0;i<curcodepage;++i)
- if (codepages[i].cp==ucp)
- goto found;
- } else {
- for (i=0;i<curcodepage;++i)
- if (!_tcsicmp(wcp,codepages[i].name))
- goto found;
- }
- _ftprintf(stderr,_T("%s: Codepage '%s' not found.n"),progname,wcp);
- exit(1);
- found:
- free(wcp);
- return i;
- }
- // usage
- void usage(void) {
- _tprintf(_T("Usage: %s [options] source destinationn")
- _T(" Options:n")
- _T(" -L list available languagesn")
- _T(" -C list available code pagesn")
- _T(" -l <language> specify the language for dictionary keysn")
- _T(" -c <code page> specify source code pagen")
- _T(" -o <code page> specify output code pagen")
- _T(" -d unpack a compiled dictionaryn"),
- progname
- );
- exit(1);
- }
- // word entry
- struct word {
- const char *entry; // utf8 text
- int elen; // entry length
- const char *key; // unicode sort key
- int klen; // key length
- };
- struct word *words;
- int curword,maxword;
- void addword(const char *entry,int mblen,int line) {
- int wclen;
- int ulen;
- wchar_t *wentry;
- char *sortkey;
- char *uentry;
- int sortkeylen;
- int i;
- if (mblen==0) // ignore empty words
- return;
- wclen=MultiByteToWideChar(src_cp,0,entry,mblen,NULL,0);
- if (wclen==0)
- syserror(_T("Can't convert string to unicode"));
- wentry=xmalloc((wclen+1)*sizeof(wchar_t));
- MultiByteToWideChar(src_cp,0,entry,mblen,wentry,wclen);
- wentry[wclen]=' ';
- for (i=1;i<wclen-1;++i)
- if (wentry[i]==' ' && wentry[i+1]==' ')
- goto found;
- _ftprintf(stderr,_T("%s: Invalid entry: '%s' at line %d (%d,%d)n"),progname,wentry,line,mblen,wclen);
- exit(1);
- found:
- sortkeylen=LCMapStringW(src_lcid,LCMAP_SORTKEY|NORM_IGNORECASE,
- wentry,i,NULL,0);
- if (sortkeylen==0)
- syserror(_T("Can't get sort key"));
- sortkey=xmalloc(sortkeylen);
- LCMapStringW(src_lcid,LCMAP_SORTKEY|NORM_IGNORECASE,
- wentry,i,(wchar_t *)sortkey,sortkeylen);
- --sortkeylen; // we don't want an extra NUL byte
- // replace tabs with 'n'
- for (i=0;i<wclen;++i)
- if (wentry[i]=='t')
- wentry[i]='n';
- // convert to utf8
- ulen=WideCharToMultiByte(dest_cp,0,wentry,wclen,NULL,0,NULL,NULL);
- if (ulen==0)
- syserror(_T("Can't convert string to multibyte"));
- uentry=xmalloc(ulen+1);
- WideCharToMultiByte(dest_cp,0,wentry,wclen,uentry,ulen,NULL,NULL);
- uentry[ulen]=' ';
- free(wentry);
- // add word
- CHECKADD(words,curword,maxword);
- words[curword].entry=uentry;
- words[curword].elen=ulen;
- words[curword].key=sortkey;
- words[curword].klen=sortkeylen;
- ++curword;
- }
- void readfile(const char *filename) {
- FILE *fp;
- char inbuf[65536];
- int line=1;
- _tprintf(_T("Loading... ")); fflush(stdout);
- if ((fp=fopen(filename,"r"))==NULL)
- liberr(_T("Can't open file"));
- setvbuf(fp,NULL,_IOFBF,65536);
- while (fgets(inbuf,sizeof(inbuf),fp)) {
- int len=strlen(inbuf);
- while (len>0 && (inbuf[len-1]=='r' || inbuf[len-1]=='n'))
- --len;
- inbuf[len]=' ';
- if (len)
- addword(inbuf,len,line);
- ++line;
- }
- fclose(fp);
- _tprintf(_T("done (%d entries).n"),curword);
- }
- int wordcmp(const void *v1,const void *v2) {
- return strcmp(((const struct word *)v1)->key,((const struct word *)v2)->key);
- }
- void sortwords(void) {
- _tprintf(_T("Sorting... ")); fflush(stdout);
- qsort(words,curword,sizeof(struct word),wordcmp);
- _tprintf(_T("done.n"));
- }
- void mergewords(void) {
- struct word *nwords;
- int nwp;
- int cur,end,totlen;
- char *ne,*cp;
- int merged;
- _tprintf(_T("Merging... ")); fflush(stdout);
- nwords=xmalloc(curword*sizeof(struct word));
- for (cur=nwp=merged=0;cur<curword;)
- if (cur<curword-1 && !strcmp(words[cur].key,words[cur+1].key)) {
- totlen=words[cur].elen+words[cur+1].elen+1;
- for (end=cur+2;end<curword&&!strcmp(words[cur].key,words[end].key);++end)
- {
- totlen+=words[end].elen;
- totlen++;
- }
- merged+=end-cur-1;
- ne=xmalloc(totlen+1);
- memcpy(ne,words[cur].entry,words[cur].elen);
- nwords[nwp].key=words[cur].key;
- nwords[nwp].klen=words[cur].klen;
- nwords[nwp].elen=totlen;
- nwords[nwp].entry=ne;
- ++nwp;
- cp=ne+words[cur].elen;
- free((void*)words[cur].entry);
- ++cur;
- while (cur<end) {
- *cp++='n';
- memcpy(cp,words[cur].entry,words[cur].elen);
- cp+=words[cur].elen;
- free((void*)words[cur].entry);
- free((void*)words[cur].key);
- ++cur;
- }
- *cp=' ';
- } else {
- nwords[nwp].entry=words[cur].entry;
- nwords[nwp].elen=words[cur].elen;
- nwords[nwp].key=words[cur].key;
- nwords[nwp].klen=words[cur].klen;
- ++nwp;
- ++cur;
- }
- free(words);
- words=nwords;
- maxword=curword=nwp;
- _tprintf(_T("done (%d merged).n"),merged);
- }
- // blocks
- struct block {
- int size;
- int compressed_size;
- char *key;
- int keylen;
- int nent;
- int npara;
- };
- struct block *blocks;
- int curblock,maxblock;
- void putblock(const char *block,int blen,int nent,FILE *fp) {
- char compressed[BLOCK];
- int res;
- uLongf dsize=sizeof(compressed);
- int docomp=0;
- const char *bend;
- CHECKADD(blocks,curblock,maxblock);
- res=compress2((Bytef *)compressed,&dsize,(Bytef *)block,blen,9);
- if (res==Z_MEM_ERROR) // don't want to compress this
- fwrite(block,blen,1,fp);
- else if (res==Z_OK) {
- if (dsize>=(unsigned)blen) // write uncompressed
- fwrite(block,blen,1,fp);
- else {
- docomp=1;
- fwrite(compressed,dsize,1,fp);
- }
- } else {
- _ftprintf(stderr,_T("%s: zlib error: %dn"),progname,res);
- exit(1);
- }
- blocks[curblock].size=blen;
- blocks[curblock].compressed_size=docomp ? dsize : blen;
- blocks[curblock].keylen=strlen(block);
- blocks[curblock].key=xmalloc(blocks[curblock].keylen+1);
- memcpy(blocks[curblock].key,block,blocks[curblock].keylen+1);
- blocks[curblock].nent=nent;
- for (bend=block+blen;block<bend;) {
- while (block<bend && *block)
- ++block;
- if (block<bend)
- ++block;
- while (block<bend && *block) {
- if (*block=='n')
- ++nent;
- ++block;
- }
- if (block<bend)
- ++block;
- }
- blocks[curblock].npara=nent;
- ++curblock;
- }
- void putword(WORD w,FILE *fp) {
- putc(w&0xff,fp);
- putc(w>>8,fp);
- }
- void putdword(DWORD d,FILE *fp) {
- putc(d&0xff,fp);
- putc((d>>8)&0xff,fp);
- putc((d>>16)&0xff,fp);
- putc(d>>24,fp);
- }
- DWORD getdword(FILE *fp) {
- BYTE b1,b2,b3,b4;
- b1=getc(fp); b2=getc(fp); b3=getc(fp); b4=getc(fp);
- return (DWORD)b4<<24|(DWORD)b3<<16|(DWORD)b2<<8|b1;
- }
- WORD getword(FILE *fp) {
- BYTE b1,b2;
- b1=getc(fp); b2=getc(fp);
- return (WORD)b2<<8|b1;
- }
- void writewords(const char *filename) {
- FILE *fp;
- int i;
- char block[BLOCK];
- int blen,nent;
- long tboff,size;
- _tprintf(_T("Writing... ")); fflush(stdout);
- if ((fp=fopen(filename,"wb"))==NULL)
- liberr(_T("Can't open file"));
- setvbuf(fp,NULL,_IOFBF,65536);
- // write header
- fputs("DICq",fp);
- putdword(src_lcid,fp); // key locale
- putdword(curword,fp); // total number of entries
- putdword(dest_cp,fp); // encoding
- putdword(0,fp); // block table offset
- // write blocks
- for (i=blen=nent=0;i<curword;++i) {
- if (words[i].klen+words[i].elen+2>BLOCK) {
- _ftprintf(stderr,_T("%s: Entry too long.n"),progname);
- exit(1);
- }
- if (blen+words[i].klen+1+words[i].elen+1>BLOCK) { // flush block
- putblock(block,blen,nent,fp);
- blen=nent=0;
- if ((curblock&7)==0) {
- putc('.',stdout);
- fflush(stdout);
- }
- }
- memcpy(block+blen,words[i].key,words[i].klen+1);
- blen+=words[i].klen+1;
- memcpy(block+blen,words[i].entry,words[i].elen+1);
- blen+=words[i].elen+1;
- ++nent;
- }
- if (nent>0)
- putblock(block,blen,nent,fp);
- tboff=ftell(fp);
- // write block table
- putdword(curblock,fp); // number of blocks
- for (i=0;i<curblock;++i) {
- putdword(blocks[i].size,fp); // uncompressed size
- putdword(blocks[i].compressed_size,fp); // compressed size
- putdword(blocks[i].keylen,fp); // key length
- putdword(blocks[i].nent,fp); // number of words in this block
- putdword(blocks[i].npara,fp); // number of "paragraphs" in this block
- }
- for (i=0;i<curblock;++i) // keys
- fwrite(blocks[i].key,blocks[i].keylen,1,fp);
- fflush(fp);
- size=ftell(fp);
- // write block table offset
- fseek(fp,16,SEEK_SET);
- putdword(tboff,fp);
- fclose(fp);
- _tprintf(_T(" done (%d block(s), %ld byte(s)).n"),curblock,size);
- }
- // convert string and write
- void write_str(const char *s,int len,UINT srccp,UINT dstcp,FILE *fp) {
- wchar_t *wcs;
- int wclen;
- char *dcs;
- int dclen;
- int j;
- if (srccp==dstcp) {
- for (j=0;j<len;++j)
- if (s[j]=='n')
- putc('t',fp);
- else
- putc(s[j],fp);
- putc('n',fp);
- return;
- }
- if (len==0) {
- putc('n',fp);
- return;
- }
- wclen=MultiByteToWideChar(srccp,0,s,len,NULL,0);
- if (wclen==0)
- syserror(_T("Can't convert string"));
- wcs=xmalloc(wclen*sizeof(wchar_t));
- MultiByteToWideChar(srccp,0,s,len,wcs,wclen);
- dclen=WideCharToMultiByte(dstcp,0,wcs,wclen,NULL,0,NULL,NULL);
- if (dclen==0)
- syserror(_T("Can't convert string"));
- dcs=xmalloc(dclen);
- WideCharToMultiByte(dstcp,0,wcs,wclen,dcs,dclen,NULL,NULL);
- for (j=0;j<dclen;++j)
- if (dcs[j]=='n')
- dcs[j]='t';
- fwrite(dcs,dclen,1,fp);
- putc('n',fp);
- free(wcs);
- free(dcs);
- }
- // find code page names
- void get_cp_name(void) {
- int i;
- if (src_codepage_num<0) {
- CPINFOEX iex;
- if (GetCPInfoEx(src_cp,0,&iex)) {
- for (i=0;i<curcodepage;++i)
- if (codepages[i].cp==iex.CodePage) {
- src_codepage_num=i;
- break;
- }
- }
- }
- if (dest_codepage_num<0) {
- CPINFOEX iex;
- if (GetCPInfoEx(dest_cp,0,&iex)) {
- for (i=0;i<curcodepage;++i)
- if (codepages[i].cp==iex.CodePage) {
- dest_codepage_num=i;
- break;
- }
- }
- }
- }
- // find locale name
- void get_locale_name(void) {
- int i;
- if (src_locale_num<0) {
- for (i=0;i<curlang;++i)
- if (src_lcid==languages[i].lcid) {
- src_locale_num=i;
- break;
- }
- }
- }
- // print codepage info
- void print_cp_lang_info(void) {
- if (src_locale_num>=0)
- _tprintf(_T("Source language: %x %s.%sn"),
- languages[src_locale_num].lcid,languages[src_locale_num].lang,
- languages[src_locale_num].country);
- if (src_codepage_num>=0)
- _tprintf(_T("Source code page: %d %sn"),
- codepages[src_codepage_num].cp,codepages[src_codepage_num].name);
- if (dest_codepage_num>=0)
- _tprintf(_T("Output code page: %d %sn"),
- codepages[dest_codepage_num].cp,codepages[dest_codepage_num].name);
- }
- // decompile an existing dictionary
- void decode_words(const char *ifile,const char *ofile) {
- FILE *ifp,*ofp;
- char buf[5];
- int *csizes;
- int *sizes;
- long *offs;
- int nblk;
- int totwords,nwords;
- long blocktaboff,boff;
- int i;
- char *cblk,*blk;
- char *cp,*ep,*pp;
- int mode;
- if ((ifp=fopen(ifile,"rb"))==NULL)
- liberr(_T("Can't open file"));
- setvbuf(ifp,NULL,_IOFBF,65536);
- if ((ofp=fopen(ofile,"w"))==NULL)
- liberr(_T("Can't open file"));
- setvbuf(ofp,NULL,_IOFBF,65536);
- fread(buf,4,1,ifp);
- buf[4]=' ';
- if (strcmp(buf,"DICq")==0)
- mode=0;
- else if (strcmp(buf,"DICt")==0)
- mode=1;
- else
- goto invalid;
- src_lcid=getdword(ifp);
- totwords=getdword(ifp);
- src_cp=getdword(ifp);
- if (dest_codepage_num<0)
- dest_cp=src_cp;
- get_cp_name();
- get_locale_name();
- print_cp_lang_info();
- _tprintf(_T("Unpacking... ")); fflush(stdout);
- blocktaboff=getdword(ifp);
- fseek(ifp,blocktaboff,SEEK_SET);
- nblk=getdword(ifp);
- csizes=xmalloc(nblk*sizeof(int));
- sizes=xmalloc(nblk*sizeof(int));
- offs=xmalloc(nblk*sizeof(int));
- // read block table
- for (i=0,boff=20;i<nblk;++i) {
- sizes[i]=getdword(ifp);
- csizes[i]=getdword(ifp);
- offs[i]=boff;
- boff+=csizes[i];
- getdword(ifp); // skip key length
- getdword(ifp); // skip number of words in block
- if (mode==0)
- getdword(ifp); // skip number of paragraphs in block
- }
- // read blocks
- for (i=nwords=0;i<nblk;++i) {
- fseek(ifp,offs[i],SEEK_SET);
- cblk=xmalloc(csizes[i]);
- fread(cblk,csizes[i],1,ifp);
- if (csizes[i]<sizes[i]) { // decompress
- uLongf dlen=sizes[i];
- int res;
- blk=xmalloc(sizes[i]);
- res=uncompress((Bytef*)blk,&dlen,(Bytef*)cblk,csizes[i]);
- if (res!=Z_OK) {
- _ftprintf(stderr,_T("%s: zlib error: %dn"),progname,res);
- goto error;
- }
- if (dlen!=(unsigned)sizes[i])
- goto invalid;
- } else {
- blk=cblk;
- cblk=NULL;
- }
- // write entries
- for (cp=blk,ep=blk+sizes[i];cp<ep;) {
- // skip key
- while (cp<ep && *cp)
- ++cp;
- if (cp<ep)
- pp=++cp;
- // skip entry
- while (cp<ep && *cp)
- ++cp;
- if (pp!=cp)
- write_str(pp,cp-pp,src_cp,dest_cp,ofp);
- if (cp<ep)
- ++cp;
- ++nwords;
- }
- // cleanup
- free(blk);
- free(cblk);
- }
- if (nwords!=totwords)
- goto invalid;
- fclose(ifp);
- fclose(ofp);
- _tprintf(_T("done (%d entries).n"),nwords);
- return;
- invalid:
- _ftprintf(stderr,_T("%s: Invalid dictionary file.n"),progname);
- error:
- fclose(ofp);
- remove(ofile);
- exit(1);
- }
- int main(int argc,char **argv) {
- char *state=NULL,*arg;
- int opt;
- // setup default locale
- _tsetlocale(LC_ALL,_T(""));
- // fetch the list of installed locales and code pages
- EnumSystemLocales(EnumLocalesProc,LCID_INSTALLED|LCID_ALTERNATE_SORTS);
- EnumSystemCodePages(EnumCodePagesProc,CP_INSTALLED);
- qsort(languages,curlang,sizeof(struct lang),langcmp_lcid);
- // set program name
- if (argc>0)
- progname=tchar(argv[0]);
- if (progname==NULL)
- progname=_T("mkdict");
- // setup defaults
- src_locale_num=-1;
- src_lcid=GetUserDefaultLCID();
- src_codepage_num=-1;
- dest_codepage_num=-1;
- src_cp=CP_OEMCP;
- dest_cp=CP_UTF8;
- // process options
- if (argc<2)
- usage();
- --argc; ++argv; // skip program name
- while ((opt=xgetopt(&argc,&argv,"LCl:c:o:d",&state,&arg)))
- switch (opt) {
- case 'L':
- showlocales();
- break;
- case 'C':
- showcodepages();
- break;
- case 'c':
- src_codepage_num=find_codepage(arg);
- src_cp=codepages[src_codepage_num].cp;
- if (dest_codepage_num<0) { // also set output cp
- dest_codepage_num=src_codepage_num;
- dest_cp=src_cp;
- }
- break;
- case 'l':
- src_locale_num=find_locale(arg);
- src_lcid=languages[src_locale_num].lcid;
- break;
- case 'o':
- dest_codepage_num=find_codepage(arg);
- dest_cp=codepages[dest_codepage_num].cp;
- break;
- case 'd':
- decode=1;
- break;
- }
- if (argc<2) // at least source and destination files is required
- usage();
- // try to find locale and code page ids
- // print locale and code page info
- // do the work
- if (decode) {
- decode_words(argv[0],argv[1]);
- } else {
- get_locale_name();
- get_cp_name();
- print_cp_lang_info();
- readfile(argv[0]);
- sortwords();
- mergewords();
- writewords(argv[1]);
- }
- return 0;
- }