Dictionary.cpp
资源名称:基本词典程序.rar [点击查看]
上传用户:jxhy0791
上传日期:2007-05-24
资源大小:6173k
文件大小:4k
源码类别:
多国语言处理
开发平台:
Visual C++
- //#include "stdafx.h"
- #include "Dictionary.h"
- #include "Utility.h"
- #include <string.h>
- #include <stdlib.h>
- #include <malloc.h>
- #include <stdio.h>
- #define CC_NUM 6768
- //The number of Chinese Char,including 5 empty position between 3756-3761
- #define WORD_MAXLENGTH 100
- void main()
- {
- struct tagWordResult{
- char sWord[WORD_MAXLENGTH];
- //The word
- int nHandle;
- //the POS of the word
- double dValue;
- //The -log(frequency/MAX)
- };
- typedef struct tagWordResult WORD_RESULT,*PWORD_RESULT;
- /*data structure for word item*/
- struct tagWordItem{
- int nWordLen;
- char *sWord;
- //The word
- int nHandle;
- //the process or information handle of the word
- int nFrequency;
- //The count which it appear
- };
- typedef struct tagWordItem WORD_ITEM,*PWORD_ITEM;
- /*data structure for dictionary index table item*/
- struct tagIndexTable{
- int nCount;
- //The count number of words which initial letter is sInit
- PWORD_ITEM pWordItemHead;
- //The head of word items
- };
- typedef struct tagIndexTable INDEX_TABLE;
- /*data structure for word item chain*/
- struct tagWordChain{
- WORD_ITEM data;
- struct tagWordChain *next;
- };
- typedef struct tagWordChain WORD_CHAIN,*PWORD_CHAIN;
- /*data structure for dictionary index table item*/
- struct tagModifyTable{
- int nCount;
- //The count number of words which initial letter is sInit
- int nDelete;
- //The number of deleted items in the index table
- PWORD_CHAIN pWordItemHead;
- //The head of word items
- };
- typedef struct tagModifyTable MODIFY_TABLE,*PMODIFY_TABLE;
- INDEX_TABLE m_IndexTable[CC_NUM];
- // PMODIFY_TABLE m_pModifyTable;
- FILE *fp;
- int i,j,nBuffer[3];
- if((fp=fopen("coreDict.dct","rb"))==NULL)
- printf("kkkkkkkkk"); //fail while opening the file
- memset(m_IndexTable,0,sizeof(m_IndexTable));
- printf("装入内存");
- for(i=0;i<CC_NUM;i++)
- {
- fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
- if(m_IndexTable[i].nCount>0)
- m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
- else
- {
- m_IndexTable[i].pWordItemHead=0;
- continue;
- }
- j=0;
- while(j<m_IndexTable[i].nCount)
- {
- fread(nBuffer,sizeof(int),3,fp);
- m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
- if(nBuffer[1])//String length is more than 0
- {
- fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
- }
- m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
- m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
- m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
- m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
- j+=1;//Get next item in the original table.
- }
- }
- fclose(fp);
- printf("装入完毕");
- FILE *fp1;
- int a,b,bBuffer[3];
- // PWORD_CHAIN pCur;
- //strcat(sFilename,".sav");
- if((fp1=fopen("coreDict.txt","wt"))==NULL)
- printf("bbbb"); //fail while opening the file
- printf("准备输出....");
- for(a=0;a<CC_NUM;a++)
- {char c1,c2,c3,c4;
- c1=a/94+176;
- c2=a%94+161;
- // fwrite(&m_IndexTable[a].nCount,sizeof(int),1,fp1);
- fprintf(fp1,"%dn",m_IndexTable[a].nCount);
- //write to the file
- b=0;
- while(b<m_IndexTable[a].nCount)
- {
- bBuffer[0]=m_IndexTable[a].pWordItemHead[b].nFrequency;
- bBuffer[1]=m_IndexTable[a].pWordItemHead[b].nWordLen;
- bBuffer[2]=m_IndexTable[a].pWordItemHead[b].nHandle;
- c3=bBuffer[2]/256;
- c4=bBuffer[2]%256;
- // c3=bBuffer[2];
- // c4=bBuffer[2]%256;
- // fwrite(bBuffer,sizeof(int),3,fp1);
- fprintf(fp1,"%dt%dt%dt",bBuffer[0],bBuffer[1],bBuffer[2]/*,c3,c4*/);
- //fprintf(fp1,"词频为:%dt词长为:%dt词标注为:%c%ct",bBuffer[0],bBuffer[1],c3,c4);
- // if(bBuffer[1])//String length is more than 0
- // fwrite(m_IndexTable[a].pWordItemHead[b].sWord,sizeof(char),bBuffer[1],fp1);
- fprintf(fp1,"t%c%c%sn",c1,c2,m_IndexTable[a].pWordItemHead[b].sWord);
- b+=1;//Get next item in the original table.
- }
- }
- fclose(fp1);
- }