main.cpp
上传用户:panfucai
上传日期:2022-05-28
资源大小:4678k
文件大小:3k
源码类别:

多国语言处理

开发平台:

Visual C++

  1. # include "main.h"
  2. # define MAXPAGECOUNT 1000
  3. CRITICAL_SECTION cs;
  4. PDICTABLE pDic = NULL, pStopwordsDic = NULL, pSurnamesDic = NULL;
  5. char strCurrPath[1000];
  6. PSTRTABLE pTable = NULL, pInvDoc = NULL;
  7. int main()
  8. {
  9. //PWORDLIST pLst;
  10. char *pstrDicPath;
  11. char *pstrResultPath;
  12. DWORD dwStartTime = 0, dwEndTime = 0;
  13. dwStartTime = GetTickCount();
  14. //char *pstrPathPrefix;
  15. //char *pstrHTMLPath;
  16. HANDLE h[2];
  17. // Initialization of Critical Sections
  18. InitializeCriticalSection(&cs);
  19. InitializeCriticalSection(&csModifyStrTable);
  20. getcwd(strCurrPath, 1000);
  21. pstrDicPath = StrConcat(strCurrPath, "\Dict");
  22. cout<<"...Loading dictionary...";
  23. preMatch(pDic, pStopwordsDic, pSurnamesDic, pstrDicPath);
  24. cout<<"done."<<endl;
  25. cout<<"...Constructing the temporary string table...";
  26. ConstructStringTable(pTable, pDic);
  27. cout<<"done."<<endl;
  28. Sleep(0);
  29. delete pstrDicPath;
  30. h[0] = CreateThread(NULL, 0, HTMLThread, (LPVOID)1, 0, NULL);
  31. h[1] = CreateThread(NULL, 0, HTMLThread, (LPVOID)2, 0, NULL);
  32. WaitForMultipleObjects(1, h, TRUE, INFINITE);
  33. DeleteCriticalSection(&cs);
  34. DeleteCriticalSection(&csModifyStrTable);
  35. CloseHandle(h[0]);
  36. CloseHandle(h[1]);
  37. /*
  38. for(int i = 1; i <= MAXPAGECOUNT; i++)
  39. {
  40. cout<<"Processing page No."<<i<<"...";
  41. pstrPathPrefix = StrConcat(strCurrPath, "\Webpages\");
  42. pstrPathPrefix = StrConcat(pstrPathPrefix, i);
  43. pstrHTMLPath = StrConcat(pstrPathPrefix, ".html");
  44. pLst = htmlParse(pDic, pStopwordsDic, pSurnamesDic, pstrHTMLPath);
  45. FillStringTable(pTable, i, pLst);
  46. ClearWordList(pLst);
  47. delete pstrPathPrefix;
  48. delete pstrHTMLPath;
  49. cout<<"done."<<endl;
  50. }
  51. */
  52. dwEndTime = GetTickCount();
  53. cout<<"Time span: "<<dwEndTime - dwStartTime<<" ms."<<endl;
  54. cout<<"Converting...";
  55. pInvDoc = ConstructInvDoc(pTable);
  56. cout<<"done."<<endl;
  57. dwEndTime = GetTickCount();
  58. cout<<"Time span: "<<dwEndTime - dwStartTime<<" ms."<<endl;
  59. cout<<"Outputing into file...";
  60. pstrResultPath = StrConcat(strCurrPath, "\Result.txt");
  61. OutputInvDoc(pInvDoc, pstrResultPath);
  62. cout<<"done."<<endl;
  63. cout<<"Finished."<<endl;
  64. dwEndTime = GetTickCount();
  65. cout<<"Time span: "<<dwEndTime - dwStartTime<<" ms."<<endl;
  66. return 0;
  67. }
  68. void Writeln1(int i, bool bIsFinished)
  69. {
  70. EnterCriticalSection(&cs);
  71. if(bIsFinished)
  72. {
  73. cout<<"Page No."<<i<<" done."<<endl;
  74. }
  75. else
  76. {
  77. cout<<"Processing page No."<<i<<"..."<<endl;
  78. }
  79. LeaveCriticalSection(&cs);
  80. }
  81. DWORD WINAPI HTMLThread(LPVOID iStartOf)
  82. {
  83. PWORDLIST pLst = NULL;
  84. char *pstrPathPrefix = NULL;
  85. char *pstrHTMLPath = NULL;
  86. for(int i = (int)iStartOf; i <= MAXPAGECOUNT; i += 2)
  87. {
  88. Writeln1(i, false);
  89. pstrPathPrefix = StrConcat(strCurrPath, "\Webpages\");
  90. pstrPathPrefix = StrConcat(pstrPathPrefix, i);
  91. pstrHTMLPath = StrConcat(pstrPathPrefix, ".html");
  92. pLst = htmlParse(pDic, pStopwordsDic, pSurnamesDic, pstrHTMLPath);
  93. FillStringTable(pTable, i, pLst);
  94. ClearWordList(pLst);
  95. Writeln1(i, true);
  96. delete pstrPathPrefix;
  97. delete pstrHTMLPath;
  98. }
  99. return 0;
  100. }