stringtable.cpp
上传用户:panfucai
上传日期:2022-05-28
资源大小:4678k
文件大小:6k
- //
- # define MAXSTRINGCOUNT 1000000
- # include "stringtable.h"
- CRITICAL_SECTION csModifyStrTable;
- void InitStringTable(PSTRTABLE &pTable)
- {
- pTable = new STRTABLE[MAXSTRINGCOUNT];
- pTable->iLen = 0;
- pTable->iCount = 0;
- pTable->iSequenceCount = 0;
- pTable->pWord = NULL;
- pTable->pFileList = NULL;
- pTable->iOccuredFiles = 0;
- pTable->iOccuredTimes = 0;
- }
- void AppendChar(PSTRTABLE pTable, TCHAR wchar)
- {
- EnterCriticalSection(&csModifyStrTable);
- long iPos = pTable->iCount + 1;
- (pTable + iPos)->pWord = new TCHAR[2];
- *((pTable + iPos)->pWord) = wchar;
- *((pTable + iPos)->pWord + 1) = 0;
- (pTable + iPos)->iLen = 1;
- (pTable + iPos)->pFileList = NULL;
- (pTable + iPos)->iOccuredFiles = 0;
- (pTable + iPos)->iOccuredTimes = 0;
- pTable->iCount ++;
- LeaveCriticalSection(&csModifyStrTable);
- }
- void AppendString(PSTRTABLE pTable, TCHAR wstr[])
- {
- EnterCriticalSection(&csModifyStrTable);
- long iPos = pTable->iCount + 1;
- (pTable + iPos)->pWord = new TCHAR[wcslen(wstr) + 2];
- wcscpy((pTable + iPos)->pWord, wstr);
- (pTable + iPos)->iLen = wcslen(wstr);
- (pTable + iPos)->pFileList = NULL;
- (pTable + iPos)->iOccuredFiles = 0;
- (pTable + iPos)->iOccuredTimes = 0;
- pTable->iCount ++;
- LeaveCriticalSection(&csModifyStrTable);
- }
- void ConstructStringTable(PSTRTABLE &pTable, PDICTABLE pDic)
- {
- InitStringTable(pTable);
- // Single characters and Words
- TCHAR charLastInitial = 0x4e00;
- for(long i = 1; i <= pDic->iCount; i++)
- {
- while(charLastInitial <= *((pDic + i)->pWord))
- {
- AppendChar(pTable, charLastInitial);
- charLastInitial++;
- }
- AppendString(pTable, (pDic + i)->pWord);
- }
- while(charLastInitial <= 0x9FA5)
- {
- AppendChar(pTable, charLastInitial);
- charLastInitial++;
- }
- pTable->iSequenceCount = pTable->iCount;
- }
- PSTRTABLE LookUpInSortedStringTable(PSTRTABLE pTable, int iLeft, int iRight, TCHAR wstr[])
- {
- if(iLeft > iRight)
- {
- return NULL;
- }
- int iMid = (iLeft + iRight) / 2;
- int iResult = wcsncmp(wstr, (pTable + iMid)->pWord, (pTable + iMid)->iCount);
- if(iResult > 0)
- {
- if(iLeft == iRight)
- {
- return NULL;
- }
- else
- {
- return LookUpInSortedStringTable(pTable, iMid + 1, iRight, wstr);
- }
- }
- else if(iResult < 0)
- {
- if(iLeft == iRight)
- {
- return NULL;
- }
- else
- {
- return LookUpInSortedStringTable(pTable, iLeft, iMid - 1, wstr);
- }
- }
- else
- {
- return pTable + iMid;
- }
- }
- PSTRTABLE LookUpInUnsortedStringTable(PSTRTABLE pTable, TCHAR wstr[])
- {
- long iPos = pTable->iSequenceCount + 1;
- int iResult = 0;
- while(iPos <= pTable->iCount)
- {
- iResult = wcsncmp(wstr, (pTable + iPos)->pWord, (pTable + iPos)->iCount);
- if(iResult == 0)
- {
- return pTable + iPos;
- }
- iPos++;
- }
- return NULL;
- }
- PSTRTABLE LookUpInStringTable(PSTRTABLE pTable, TCHAR wstr[])
- {
- PSTRTABLE pResult = NULL;
- pResult = LookUpInSortedStringTable(pTable, 1, pTable->iSequenceCount, wstr);
- if(pResult != NULL)
- {
- return pResult;
- }
- else
- {
- pResult = LookUpInUnsortedStringTable(pTable, wstr);
- }
- return pResult;
- }
- PFILELIST TraverseFileList(PSTRTABLE pTable, int iDocID)
- {
- PFILELIST pFileList = pTable->pFileList;
- while(pFileList != NULL)
- {
- if(pFileList->iDocID == iDocID)
- {
- return pFileList;
- }
- pFileList = pFileList->pNext;
- }
- return NULL;
- }
- void AppendFileList(PSTRTABLE pTable, int iDocID)
- {
- PFILELIST pFileList = NULL;
- pFileList = TraverseFileList(pTable, iDocID);
- if(pFileList != NULL)
- {
- // Add count
- pFileList->iTimes ++;
- pTable->iOccuredTimes ++;
- }
- else
- {
- // Append a new object
- pFileList = new FILELIST;
- pFileList->iDocID = iDocID;
- pFileList->iTimes = 1;
- pFileList->pNext = NULL;
- if(pTable->pFileList == NULL)
- {
- pTable->pFileList = pFileList;
- }
- else
- {
- PFILELIST pTail = pTable->pFileList;
- while(pTail->pNext != NULL)
- {
- pTail = pTail->pNext;
- }
- pTail->pNext = pFileList;
- }
- pTable->iOccuredTimes ++;
- pTable->iOccuredFiles ++;
- }
- }
- void SortFileListByTimes(PFILELIST pFileList)
- {
- PFILELIST pCurrMax = NULL, p = NULL;
- p = pFileList->pNext;
- if(p == NULL)
- {
- return;
- }
- pCurrMax = p;
- while(p != NULL)
- {
- if(p->iTimes > pCurrMax->iTimes)
- {
- pCurrMax = p;
- }
- p = p->pNext;
- }
- // Put the maximum to the front
- if(!(pFileList->pNext == pCurrMax))
- {
- PFILELIST pFront = pFileList;
- while(pFront->pNext != NULL)
- {
- if(pFront->pNext == pCurrMax)
- {
- break;
- }
- pFront = pFront->pNext;
- }
- pFront->pNext = pCurrMax->pNext;
- pCurrMax->pNext = pFileList->pNext;
- pFileList->pNext = pCurrMax;
- }
- SortFileListByTimes(pCurrMax);
- }
- void SortFileListByTimes(PSTRTABLE pTable)
- {
- PFILELIST pCurrMax = NULL, p = NULL;
- p = pTable->pFileList;
- pCurrMax = p;
- while(p != NULL)
- {
- if(p->iTimes > pCurrMax->iTimes)
- {
- pCurrMax = p;
- }
- p = p->pNext;
- }
- // Put the maximum to the front
- if(pTable->pFileList != pCurrMax)
- {
- PFILELIST pFront = pTable->pFileList;
- while(pFront->pNext != NULL)
- {
- if(pFront->pNext == pCurrMax)
- {
- break;
- }
- pFront = pFront->pNext;
- }
- pFront->pNext = pCurrMax->pNext;
- pCurrMax->pNext = pTable->pFileList;
- pTable->pFileList = pCurrMax;
- }
- SortFileListByTimes(pCurrMax);
- }
- void FillStringTable(PSTRTABLE pTable, int iDocID, PWORDLIST pWordList)
- {
- PSTRTABLE pTarget = NULL;
- PWORDLIST pWord = pWordList->pNext;
- while(pWord != NULL)
- {
- pTarget = LookUpInStringTable(pTable, pWord->pWord);
- if(pTarget != NULL)
- {
- AppendFileList(pTarget, iDocID);
- pWord = pWord->pNext;
- }
- else
- {
- AppendString(pTable, pWord->pWord);
- }
- }
- }