Inputer.cpp
上传用户:bingyunhe
上传日期:2013-07-06
资源大小:723k
文件大小:35k
- #include "stdafx.h"
- //#include "..ChineseInputer.h"
- #include "Inputer.h"
- #include "StreamUtils.h"
- namespace Chinese
- {
- //===============================================================================
- //
- // 笔顺类
- //
- //===============================================================================
- /**
- * 横
- */
- Word StrokesOrder::s_Heng(_T("一"));
- /**
- * 竖
- */
- Word StrokesOrder::s_Shu(_T("丨"));
- /**
- * 撇
- */
- Word StrokesOrder::s_Pie(_T("丿"));
- /**
- * 捺
- */
- Word StrokesOrder::s_Na(_T("丶"));
- /**
- * 折
- */
- Word StrokesOrder::s_Zhe(_T("乙"));
- /**
- * 横
- */
- TCHAR StrokesOrder::s_cHeng = TCHAR('1');
-
-
- /**
- * 竖
- */
- TCHAR StrokesOrder::s_cShu = TCHAR('2');;
-
- /**
- * 撇
- */
- TCHAR StrokesOrder::s_cPie = TCHAR('3');
-
- /**
- * 捺
- */
- TCHAR StrokesOrder::s_cNa = TCHAR('4');
-
- /**
- * 折
- */
- TCHAR StrokesOrder::s_cZhe = TCHAR('5');
- /*
- * 构造函数
- */
- StrokesOrder::StrokesOrder( const TCHAR* szOrders )
- {
- VERIFY( Set( szOrders ) );
- }
- /*
- * 初始化
- */
- BOOL StrokesOrder::Set( const TCHAR* szOrders )
- {
- ASSERT( szOrders );
- m_DigitStroks = _T("");
-
- int len = _tcslen(szOrders);
- // 如果是空字符, 返回真
- if ( len <= 0 )
- return TRUE;
- if ( szOrders[0] < 0 ) // 笔顺方式
- {
- ASSERT( len % 2 == 0 );
- TCHAR *szDigitOrder = new TCHAR[len/2+1];
- for ( int i=0; i<len; i+=2 )
- {
- Word dwCur(szOrders[i], szOrders[i+1]);
- TCHAR cCur;
- if ( dwCur == s_Heng )
- cCur = TCHAR('1');
- else if ( dwCur == s_Shu )
- cCur = TCHAR('2');
- else if ( dwCur == s_Pie )
- cCur = TCHAR('3');
- else if ( dwCur == s_Na )
- cCur = TCHAR('4');
- else if ( dwCur == s_Zhe )
- cCur = TCHAR('5');
- else
- ASSERT( FALSE );
- szDigitOrder[i/2] = cCur;
- }
- szDigitOrder[len/2] = TCHAR(' ');
- m_DigitStroks = szDigitOrder;
- delete [] szDigitOrder;
- }
- else // 数字方式
- m_DigitStroks = szOrders;
- return TRUE;
- }
- /*
- * 以笔画得方式返回
- */
- CString StrokesOrder::GetAsStroks() const
- {
- int nLength = GetLength();
- if ( nLength <= 0 )
- return CString(_T(""));
- // 转换
- CString strStroks(TCHAR(' '), nLength*2+1);
-
- for ( int i=0; i<nLength; i++ )
- {
- TCHAR cLow, cHigh;
- switch ( m_DigitStroks[i] )
- {
- case TCHAR('1') :
- cLow = s_Heng.GetLow();
- cHigh = s_Heng.GetHigh();
- break;
- case TCHAR('2') :
- cLow = s_Shu.GetLow();
- cHigh = s_Shu.GetHigh();
- break;
- case TCHAR('3') :
- cLow = s_Pie.GetLow();
- cHigh = s_Pie.GetHigh();
- break;
- case TCHAR('4') :
- cLow = s_Na.GetLow();
- cHigh = s_Na.GetHigh();
- break;
- case TCHAR('5') :
- cLow = s_Zhe.GetLow();
- cHigh = s_Zhe.GetHigh();
- break;
- default :
- ASSERT( FALSE );
- break;
- }
- // set the current stroks
- strStroks.SetAt( i*2, cLow );
- strStroks.SetAt( i*2+1, cHigh );
- } // end of all stroks
- return strStroks;
- }
- //=============================================================================
- //
- // 拼音组
- //
- //=============================================================================
- const CString& SpellGroup::operator []( int i )
- {
- ASSERT( i>= 0 && i<GetCount() );
-
- int idx = 0;
- for ( StringList::iterator it=m_vSpell.begin();
- it!=m_vSpell.end() && idx<i;
- it++ ) ;
-
- return *it;
- }
- void SpellGroup::AddGroup( const TCHAR* szSpell )
- {
- CString strSpell = szSpell;
- if ( strSpell == _T("") ) return;
- int iCur = 0;
- int iNext = -1;
- while ( TRUE )
- {
- iNext=strSpell.Find(TCHAR(':'), iNext+1 );
- if ( iNext == -1 )
- {
- Add( strSpell.Mid( iCur, strSpell.GetLength()-iCur) );
- break;
- }
- Add( strSpell.Mid( iCur, iNext-iCur) );
- iCur = iNext+1;
- }
- }
- CString SpellGroup::GetAsString() const
- {
- CString str;
-
- BOOL bFirst = TRUE;
- const_iterator it;
- for ( it=begin(); it!=end(); it++ )
- {
- if ( bFirst )
- bFirst = FALSE;
- else
- str += _T(":");
- str += *it;
- }
-
- return str;
- }
- SpellGroup::operator CString() const
- {
- return GetAsString();
- }
- void SpellGroup::Add( const TCHAR* szSpell )
- {
- if ( !HaveSpell(szSpell) )
- m_vSpell.push_back( szSpell );
- }
- void SpellGroup::Remove( const TCHAR* szSpell )
- {
- StringList::iterator it = find( m_vSpell.begin(), m_vSpell.end(), szSpell );
- if ( it != m_vSpell.end() )
- m_vSpell.erase (it);
- }
- void SpellGroup::RemoveAll()
- {
- m_vSpell.clear();
- }
- //==========================================================================================
- //
- // 输入法
- //
- //==========================================================================================
- BOOL ChineseInputer::QueryWords ( WordEnumerator& enumer, Sorter* pSorter )
- {
- /**************************************************************************************
- * 过滤顺序
- * 1. 笔画数
- * 2. 部首
- * 3. 笔顺
- * 4. 拼音
- *
- **************************************************************************************/
- Inputer* pInputer = GetInputer();
- ASSERT( pInputer );
- QueryBroker qBroker;
- if ( pInputer->GetSpell() != _T("") )
- {
- qBroker.AddFilter( new SpellFilter(pInputer->GetSpell()), TRUE );
- // 如果是拼音则需要加入拼音前缀
- if ( pSorter )
- {
- if ( *pSorter == *GetSpellSort() )
- static_cast<SpellSort*>(pSorter)->SetPrefix( pInputer->GetSpell() );
- /*
- if ( *pSorter == *GetCompositeSort() )
- static_cast<CompositeSort*>(pSorter)->SetPrefix( pInputer->GetSpell() );
- */
- }// CompisitSort
- }
- if ( pInputer->GetStrokesOrder() != _T("") )
- qBroker.AddFilter(
- new StrokesOrderFilter(
- StrokesOrder(pInputer->GetStrokesOrder() ) ), TRUE );
- if ( pInputer->GetStrokes() != _T("") )
- qBroker.AddFilter( new StrokesFilter(pInputer->GetStrokes()), TRUE );
- if ( pInputer->GetBS() != _T("") )
- qBroker.AddFilter( new BSFilter(pInputer->GetBS()), TRUE );
- if ( !qBroker.QueryWords( enumer ) )
- return FALSE;
- // 排序
- if ( pSorter )
- pSorter->Sort( enumer );
- return TRUE;
- }
- /**
- * 构造函数
- */
- ChineseInputer::ChineseInputer()
- {
- }
- /**
- * 拼音输入码分隔符
- */
- const TCHAR ChineseInputer::Inputer::s_cSpellLeft = TCHAR('{');
- const TCHAR ChineseInputer::Inputer::s_cSpellRight = TCHAR('}');
- /**
- * 部首输入码分隔符
- */
- const TCHAR ChineseInputer::Inputer::s_cBSLeft = TCHAR('(');
- const TCHAR ChineseInputer::Inputer::s_cBSRight = TCHAR(')');
- /**
- * 笔画输入码分隔符
- */
- const TCHAR ChineseInputer::Inputer::s_cStrokesLeft = TCHAR('<');
- const TCHAR ChineseInputer::Inputer::s_cStrokesRight = TCHAR('>');
- /**
- * 笔顺输入码分隔符
- */
- const TCHAR ChineseInputer::Inputer::s_cStrokesOrderLeft = TCHAR('[');
- const TCHAR ChineseInputer::Inputer::s_cStrokesOrderRight = TCHAR(']');
- TCHAR ChineseInputer::Inputer::s_cDividTable[4][2] =
- {
- { s_cSpellLeft, s_cSpellRight },
- { s_cBSLeft, s_cBSRight },
- { s_cStrokesLeft, s_cStrokesRight },
- { s_cStrokesOrderLeft, s_cStrokesOrderRight }
- };
- /**
- * 空排序器
- */
- ChineseInputer::NullSort* ChineseInputer::GetNullSort()
- {
- static NullSort s_NullSort;
- return &s_NullSort;
- }
- /*
- * 拼音排序器
- */
- ChineseInputer::SpellSort* ChineseInputer::GetSpellSort()
- {
- static SpellSort s_SpellSort;
- return &s_SpellSort;
- }
- /*
- * 笔画排序器
- */
- ChineseInputer::StrokesSort* ChineseInputer::GetStrokesSort()
- {
- static StrokesSort s_StrokesSort;
- return &s_StrokesSort;
- }
- /*
- * 部首排序器
- */
- ChineseInputer::BSSort* ChineseInputer::GetBSSort()
- {
- static BSSort s_BSSort;
- return &s_BSSort;
- }
- /*
- * 笔顺排序器
- */
- ChineseInputer::StrokesOrderSort* ChineseInputer::GetStrokesOrderSort()
- {
- static StrokesOrderSort s_StrokesOrderSort;
- return &s_StrokesOrderSort;
- }
- ChineseInputer::CompositeSort* ChineseInputer::GetCompositeSort()
- {
- static CompositeSort sorter;
- return &sorter;
- }
- /*
- * 获得输入器
- */
- ChineseInputer::Inputer* ChineseInputer::GetInputer()
- {
- return &m_Inputer ;
- }
- /*
- * 输入器构造函数
- */
- ChineseInputer::Inputer::Inputer() : m_uMode( Input_Spell )
- {
- Reset();
- }
- /*
- * 设置当前模式
- */
- void ChineseInputer::Inputer::SetMode( int uMode )
- {
- ASSERT( uMode >= Input_Spell && uMode <= Input_StrokesOrder );
- m_strInputs += s_cDividTable[m_uMode][end];
- m_uMode = uMode;
- m_strInputs += s_cDividTable[m_uMode][begin];
- }
- int ChineseInputer::Inputer::GetMode() const
- {
- return m_uMode;
- }
- CString ChineseInputer::Inputer::GetInputs() const
- {
- return _T("");
- }
- void ChineseInputer::Inputer::Put( TCHAR c )
- {
- m_strInputs += c;
- }
- void ChineseInputer::Inputer::Put( const TCHAR* szChars )
- {
- m_strInputs += szChars;
- }
- void ChineseInputer::Inputer::Reset()
- {
- m_strInputs = s_cDividTable[m_uMode][begin];
- }
- CString ChineseInputer::Inputer::GetUnparse( ) const
- {
- CString strTemp = m_strInputs;
- strTemp += s_cDividTable[m_uMode][end];
-
- return strTemp;
- }
- CString ChineseInputer::Inputer::GetSpell() const
- {
- return InputParser(GetUnparse() ).GetSpell();
- }
- CString ChineseInputer::Inputer::GetBS() const
- {
- return InputParser( GetUnparse() ).GetBS();
- }
- CString ChineseInputer::Inputer::GetStrokesOrder() const
- {
- return InputParser( GetUnparse() ).GetStrokesOrder();
- }
- CString ChineseInputer::Inputer::GetStrokes() const
- {
- return InputParser( GetUnparse() ).GetStrokes();
- }
- ChineseInputer::Inputer::InputParser::InputParser( const CString& str ) :
- m_strForParse(str)
- {
- VERIFY( Parse( m_strForParse ) );
- }
- /**
- * 提取字符串
- */
- CString PickPhrase( const CString& str, TCHAR cEnd, int& index )
- {
- int nStart = index;
- while ( str[index] && str[index] != cEnd ) index++;
- ASSERT( str[index] );
- CString strCur = str.Mid( nStart+1, index- nStart -1 );
- index++;
- return strCur;
- }
- BOOL ChineseInputer::Inputer::InputParser::Parse( const CString& str )
- {
- int index = 0;
- int nLength = str.GetLength();
- while ( index < nLength )
- {
- switch ( str[index] )
- {
- case Inputer::s_cBSLeft :
- {
- m_strBS += PickPhrase( str, Inputer::s_cBSRight, index ); /*strCur*/;
- }
- break;
- case Inputer::s_cSpellLeft :
- {
- m_strSpell += PickPhrase( str, Inputer::s_cSpellRight, index );
- }
- break;
- case Inputer::s_cStrokesLeft :
- {
- m_strStrokes = PickPhrase( str, Inputer::s_cStrokesRight, index );
- }
- break;
- case Inputer::s_cStrokesOrderLeft :
- {
- m_strStrokesOrder += PickPhrase( str, Inputer::s_cStrokesOrderRight, index );
- }
- break;
- default :
- return FALSE;
- }
- }
- return TRUE;
- }
- //===================================================================================
- //
- // 查询过滤器
- //
- //===================================================================================
- /*
- * 构造函数
- */
- ChineseInputer::SpellFilter::SpellFilter( const TCHAR* szSpell ) : QueryFilter(szSpell)
- {
- }
- /*
- * 查询
- */
- BOOL ChineseInputer::SpellFilter::QueryWords( WordEnumerator& enumer )
- {
- return ChineseDictionary::GetSpellQuery()->QueryWords( GetFilter(), enumer );
- }
- /*
- * 验证字是否满足要求
- */
- BOOL ChineseInputer::SpellFilter::Pass( Word wd )
- {
- SpellGroup sg = GetComment(wd).GetSpellGroup( );
- SpellGroup::const_iterator it;
- for ( it=sg.begin(); it!=sg.end(); it++ )
- {
- if ( SpellContainTester::Contain( *it, GetFilter() ) )
- return TRUE;
- }
- return FALSE;
- }
- ChineseInputer::BSFilter::BSFilter( const TCHAR* szBS ) : QueryFilter(szBS)
- {
- }
- BOOL ChineseInputer::BSFilter::QueryWords( WordEnumerator& enumer )
- {
- return ChineseDictionary::GetBSQuery()->QueryWords( GetFilter(), enumer );
- }
- BOOL ChineseInputer::BSFilter::Pass( Word wd )
- {
- return GetComment(wd).GetBS() == GetFilter();
- }
- ChineseInputer::StrokesFilter::StrokesFilter( const TCHAR* szStrokes )
- : QueryFilter(szStrokes)
- {
- m_nStrokes = _ttoi(szStrokes);
- }
- BOOL ChineseInputer::StrokesFilter::QueryWords( WordEnumerator& enumer )
- {
- return ChineseDictionary::GetStrokesQuery()->QueryWords( GetFilter(), enumer );
- }
- BOOL ChineseInputer::StrokesFilter::Pass( Word wd )
- {
- return GetComment(wd).GetStrokes() == GetStrokes();
- }
- ChineseInputer::StrokesOrderFilter::StrokesOrderFilter( const StrokesOrder& sodr )
- : QueryFilter(sodr.GetAsDigits())
- {
- }
- BOOL ChineseInputer::StrokesOrderFilter::QueryWords( WordEnumerator& enumer )
- {
- return ChineseDictionary::GetStrokesOrderQuery()->QueryWords( GetFilter(), enumer );
- }
- BOOL ChineseInputer::StrokesOrderFilter::Pass( Word wd )
- {
- return StrokesOrderContainTester::Contain(
- GetComment(wd).GetStrokesOrder().GetAsDigits(),
- GetFilter() );
- }
- ChineseInputer::QueryBroker::~QueryBroker()
- {
- for ( unsigned int i=0; i<m_vFilters.size(); i++ )
- {
- if ( m_vDeletes[i] )
- delete m_vFilters[i];
- }
- }
- /*
- * QueryBroker
- */
- void ChineseInputer::QueryBroker::AddFilter( QueryFilter* pFilter, BOOL bAutoDel )
- {
- m_vFilters.push_back(pFilter);
- m_vDeletes.push_back( bAutoDel );
- }
- BOOL ChineseInputer::QueryBroker::QueryWords( WordEnumerator& enumer )
- {
- if ( m_vFilters.size() < 1 )
- return TRUE;
- // 只有一个过滤器
- if ( m_vFilters.size() == 1 )
- return m_vFilters[0]->QueryWords(enumer);
- // 多个过滤器
- WordEnumerator wdWhole;
- if ( !m_vFilters[0]->QueryWords(wdWhole) )
- return FALSE;
- WordEnumerator::iterator it;
- for ( it=wdWhole.begin(); it!=wdWhole.end(); it++ )
- {
- for ( unsigned int i=1; i<m_vFilters.size(); i++ )
- {
- if ( !m_vFilters[i]->Pass(*it) )
- break;
- }
- if ( i>=m_vFilters.size())
- enumer.push_back( *it );
- }
- return TRUE;
- }
- //==================================================================================
- // 排序器
- //==================================================================================
- struct LessBase
- {
- const ChineseDictionary::WordComment& GetComment( Word wd ) const
- {
- return ChineseDictionary::Get(wd).GetComment( );
- }
- };
- /**
- * 拼音比较器
- */
- struct SpellLess2 : public LessBase, public SpellLess
- {
- protected :
- /**
- * 拼音前缀
- */
- CString m_strPrefix;
- /**
- * 获得拼音
- */
- const CString& GetSpell( Word wd ) const
- {
- const SpellGroup& sg = GetComment(wd).GetSpellGroup();
- SpellGroup::const_iterator it;
- for ( it=sg.begin(); it!=sg.end(); it++ )
- {
- const CString& strSpell = *it;
- if ( SpellContainTester::Contain(strSpell, m_strPrefix) )
- return strSpell;
- }
- return m_strPrefix;
- }
- public :
- SpellLess2( const TCHAR* strPrefix=_T("") ) : m_strPrefix(strPrefix)
- {
- }
- BOOL operator()( const Word& wd1,
- const Word& wd2 ) const
- {
- // 缺省按照第一个拼音排序
- return SpellLess::operator()( GetSpell(wd1), GetSpell(wd2) );
- }
- };
- /**
- * 笔画比较器
- */
- struct StrokesLess2 : public LessBase, public StrokesLess
- {
- BOOL operator()( const Word& wd1,
- const Word& wd2 ) const
- {
- return StrokesLess::operator()( GetComment(wd1).GetStrokes(),
- GetComment(wd2).GetStrokes() );
- }
- };
- /**
- * 笔顺比较器
- */
- struct StrokesOrderLess2 : public LessBase, public StrokesOrderLess
- {
- BOOL operator()( const Word& wd1,
- const Word& wd2 ) const
- {
- // 缺省按照第一个拼音排序
- return StrokesOrderLess::operator()(
- GetComment(wd1).GetStrokesOrder().GetAsDigits(),
- GetComment(wd2).GetStrokesOrder().GetAsDigits() );
- }
- };
- /**
- * 部首比较器
- */
- struct BSLess2 : public LessBase, public BSLess
- {
- BOOL operator()( const Word& wd1,
- const Word& wd2 )
- {
- return BSLess::operator()( GetComment(wd1).GetBS(),
- GetComment(wd2).GetBS() );
- }
- };
- struct CompositeLess : //public LessBase,
- public BSLess,
- public StrokesOrderLess,
- public SpellLess2,
- public StrokesLess
- {
- typedef vector<int> Modes;
- CompositeLess(const Modes& vModes ) : m_vModes(vModes)
- {
- };
- BOOL operator()( const Word& wd1,
- const Word& wd2 )
- {
- Modes::iterator it;
- for ( it=m_vModes.begin(); it!=m_vModes.end(); it++ )
- {
- switch ( *it )
- {
- case ChineseInputer::Sorter::OrderBySpell :
- {
- if ( SpellLess::operator()( GetSpell(wd1),
- GetSpell(wd2) ) )
- return TRUE;
- return FALSE;
- }
- break;
- case ChineseInputer::Sorter::OrderByBS :
- {
- if ( BSLess::operator()( GetComment(wd1).GetBS(),
- GetComment(wd2).GetBS() ) )
- return TRUE;
- }
- break;
- case ChineseInputer::Sorter::OrderByStrokes :
- {
- if ( StrokesLess::operator()( GetComment(wd1).GetStrokes(),
- GetComment(wd2).GetStrokes() ) )
- return TRUE;
- }
- break;
- case ChineseInputer::Sorter::OrderByStrokesOrder :
- {
- if ( StrokesOrderLess::operator()( GetComment(wd1).GetStrokesOrder(),
- GetComment(wd2).GetStrokesOrder() ) )
- return TRUE;
- }
- break;
- default :
- break;
- }
- }
- return FALSE;
- }
- protected :
- Modes m_vModes;
- };
-
- /*
- * 按照拼音排序
- */
- void ChineseInputer::SpellSort::Sort( WordEnumerator& enumer )
- {
- enumer.sort( SpellLess2( GetPrefix( ) ) );
- }
- void ChineseInputer::BSSort::Sort( WordEnumerator& enumer )
- {
- enumer.sort( BSLess2() );
- }
- void ChineseInputer::StrokesSort::Sort( WordEnumerator& enumer )
- {
- enumer.sort( StrokesLess2() );
- }
- void ChineseInputer::StrokesOrderSort::Sort( WordEnumerator& enumer )
- {
- enumer.sort( StrokesOrderLess2() );
- }
- /**
- * 复合排序器
- */
- void ChineseInputer::CompositeSort::AddMode( int uMode )
- {
- if ( find( m_vModes.begin(), m_vModes.end(), uMode ) == m_vModes.end() )
- m_vModes.push_back( uMode );
- }
- void ChineseInputer::CompositeSort::ClearModes()
- {
- m_vModes.clear();
- }
- void ChineseInputer::CompositeSort::Sort(ChineseInputer::WordEnumerator& enumer )
- {
- enumer.sort( CompositeLess(m_vModes) );
- }
- //===================================================================================
- //
- // 汉字字典
- //
- //===================================================================================
- ChineseDictionary::WordTable ChineseDictionary::s_WordTable;
- /*
- * 非法词条
- */
- ChineseDictionary::WordItem ChineseDictionary::NullWordItemRef;
- /*
- * 查询汉字
- */
- ChineseDictionary::WordItemRef ChineseDictionary::Get( Word wdLookup )
- {
- WordTable::iterator it = s_WordTable.find( wdLookup );
- if ( it == s_WordTable.end() )
- return NullWordItemRef;
- return WordItem( it->first, &(it->second) );
- }
- /*
- * 清空所有词条
- */
- void ChineseDictionary::Clear( )
- {
- s_WordTable.clear( );
- }
- /*
- * 加入汉字
- */
- void ChineseDictionary::AddWord( Word wd, const WordComment& wc )
- {
- WordTable::iterator it = s_WordTable.find( wd );
- // 加入新字
- if ( it == s_WordTable.end() )
- {
- s_WordTable[wd] = wc;//WordItem( wd, wc );
- }
- else // 加入新得拼音
- {
- WordComment& wcHave = s_WordTable[wd];
- SpellGroup sg = wcHave.GetSpellGroup();
- sg.AddGroup ( wc.GetSpellGroup().GetAsString() );
-
- wcHave.SetSpellGroup( sg );
- }
- }
- /*
- * 从文件中装入字典
- */
- BOOL ChineseDictionary::Load( const TCHAR* szFile )
- {
- Clear();
- CFile file;
- if ( !file.Open( szFile, CFile::modeRead | CFile::typeBinary ) )
- return FALSE;
- TStreamAdapter<CFile> os(&file);
- // 读入大小
- int nSize;
- os >> nSize;
- // 读入所有条目
- Word wd;
- WordComment wc;
- for ( int i=0; i<nSize; i++ )
- {
- wd << os;
- wc << os;
- AddWord( wd, wc );
- } // end of all item
- file.Close();
- return TRUE;
- }
- ChineseDictionary::WordItem::operator BOOL() const
- {
- return *this != ChineseDictionary::NullWordItemRef;
- }
- /*
- * 将字典写入文件
- */
- BOOL ChineseDictionary::Save( const TCHAR* szFile )
- {
- CFile file;
- if ( !file.Open( szFile, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary ) )
- return FALSE;
- TStreamAdapter<CFile> os(&file);
- // write the size
- int nSize = s_WordTable.size();
- os << nSize;
- WordTable::iterator it;
- for ( it=s_WordTable.begin(); it!=s_WordTable.end(); it++ )
- {
- // 保存字
- const Word& wd = it->first;
- wd >> os;
-
- // 保存注释
- const WordComment& wc = it->second;
- wc >> os;
- }
- // close the file
- file.Close();
- return TRUE;
- }
- /*
- * 部首查询器初始化
- */
- BOOL ChineseDictionary::QueryBase::Initialize( WordTable* pWordTbl )
- {
- m_pWordTbl = pWordTbl;
- return TRUE;
- }
- /*
- * 部首查询器销毁
- */
- void ChineseDictionary::QueryBase::Uninitialize( )
- {
- m_pWordTbl = NULL;
- }
- /*
- * 根据输入部首, 查询汉字
- */
- BOOL ChineseDictionary::BSQuery::QueryWords( const TCHAR* szBuShou,
- WordEnumerator& enumer )
- {
- if ( !m_pWordTbl )
- return FALSE;
- Word wdBS( szBuShou );
- // 查询满足条件的汉字
- WordTable::iterator it;
- for ( it=m_pWordTbl->begin(); it!=m_pWordTbl->end(); it++ )
- {
- const WordComment& wc = it->second;
- if ( wc.GetBS() == wdBS )
- enumer.push_back(it->first);
- }
- return TRUE;
- }
- /*
- * 根据输入部首, 查询汉字
- */
- BOOL ChineseDictionary::StrokesQuery::QueryWords( int nStrokes,
- WordEnumerator& enumer )
- {
- if ( !m_pWordTbl )
- return FALSE;
- // 查询满足条件的汉字
- WordTable::iterator it;
- for ( it=m_pWordTbl->begin(); it!=m_pWordTbl->end(); it++ )
- {
- const WordComment& wc = it->second;
- if ( wc.GetStrokes() == nStrokes )
- enumer.push_back(it->first);
- }
- return TRUE;
- }
- /*
- * 根据输入笔画, 查询汉字
- */
- BOOL ChineseDictionary::StrokesQuery::QueryWords( const TCHAR* szStrokes,
- WordEnumerator& enumer )
- {
- int nStrokes = _ttoi(szStrokes);
- return QueryWords( nStrokes, enumer );
- }
- /**
- * 字符串查询基类
- */
- void ChineseDictionary::StringQueryBase::AddMap( const TCHAR*szString, Word wd)
- {
- m_String2Words[szString].push_back( wd );
- }
- ChineseDictionary::StringQueryBase::String2Words&
- ChineseDictionary::StringQueryBase::GetMapTable( )
- {
- return m_String2Words;
- }
- BOOL ChineseDictionary::StringQueryBase::QueryWords( const TCHAR* szSpell,
- WordEnumerator& enumer )
- {
- String2Words::iterator it;
- for ( it=m_String2Words.begin(); it!=m_String2Words.end(); it++ )
- {
- const CString& strSpell = it->first;
-
- // 拷贝当前拼音包含得所有汉字
- if ( StringContainTester::Contain( strSpell, szSpell ) )
- {
- back_insert_iterator<WordEnumerator> bit(enumer);
-
- Words& words = it->second;
- copy( words.begin(), words.end(), bit );
- } // end of current words
- }
-
- return TRUE;
- }
- BOOL ChineseDictionary::StringQueryBase::Initialize( WordTable* pWordTbl )
- {
- if ( !QueryBase::Initialize(pWordTbl) )
- return FALSE;
- // 查询满足条件的汉字
- WordTable::iterator it;
- for ( it=m_pWordTbl->begin(); it!=m_pWordTbl->end(); it++ )
- InitWord( it->first, it->second );
- return TRUE;
- }
- void ChineseDictionary::StringQueryBase::Uninitialize( )
- {
- m_String2Words.clear();
- QueryBase::Uninitialize();
- }
- /*
- * 初始化查询器
- */
- BOOL ChineseDictionary::SpellQuery::Initialize( WordTable* pWordTable)
- {
- if ( !StringQueryBase::Initialize(pWordTable) )
- return FALSE;
- // 对所有得行排序
- for ( iterator it=begin(); it!=end(); it++ )
- {
- Words& words = *it;
- //sort( words.begin(), words.end(), StrokesLess2() );
- words.sort( StrokesLess2() );
- }
- return TRUE;
- }
- /**
- * 初始化字
- */
- void ChineseDictionary::SpellQuery::InitWord( Word wd,
- const WordComment& wc )
- {
- const SpellGroup& sg = wc.GetSpellGroup();
-
- // 处理所有的拼音
- SpellGroup::const_iterator itSg;
- for ( itSg=sg.begin( ); itSg!=sg.end(); itSg++ )
- AddMap( *itSg, wd ); // 多音字处理结束
- }
- /**
- * 初始化字
- */
- void ChineseDictionary::StrokesOrderQuery::InitWord( Word wd,
- const WordComment& wc )
- {
- AddMap( wc.GetStrokesOrder().GetAsDigits(), wd );
- }
- /*
- * 获得部首查询器
- */
- ChineseDictionary::BSQuery*
- ChineseDictionary::GetBSQuery()
- {
- return GetQuery( TypeTraits<BSQuery>() );
- }
- /*
- * 获得笔画查询器
- */
- ChineseDictionary::StrokesQuery*
- ChineseDictionary::GetStrokesQuery()
- {
- return GetQuery( TypeTraits<StrokesQuery>() );
- }
- /*
- * 获得拼音查询器
- */
- ChineseDictionary::SpellQuery*
- ChineseDictionary::GetSpellQuery()
- {
- return GetQuery( TypeTraits<SpellQuery>() );
- }
- ChineseDictionary::StrokesOrderQuery*
- ChineseDictionary::GetStrokesOrderQuery()
- {
- return GetQuery( TypeTraits<StrokesOrderQuery>() );
- }
- //==============================================================================
- //
- // 生成字典
- //
- //==============================================================================
- BOOL ChineseDictionaryMaker::Make( const TCHAR* szSpell,
- const TCHAR* szBS,
- const TCHAR* szStrokes )
- {
- WordTable wdTbl;
- if ( !ProcessSpell( szSpell, wdTbl ) ||
- !ProcessBS(szBS, wdTbl ) ||
- !ProcessStrokes(szStrokes, wdTbl ) )
- return FALSE;
-
- // 清空
- ChineseDictionary::Clear();
- // 逐条加入到字典
- WordTable::iterator it;
- for ( it=wdTbl.begin(); it!=wdTbl.end(); it++ )
- ChineseDictionary::AddWord( it->first, it->second );
- return TRUE;
- }
- /*
- * 处理拼音
- */
- BOOL ChineseDictionaryMaker::ProcessSpell( const TCHAR* szSpell, WordTable& wtbl )
- {
- CStdioFile file;
-
- if ( !file.Open( szSpell, CStdioFile::modeRead ) )
- return FALSE;
-
- CString strLine; //当前行
- CString strSpell; // 当前拼音
- TCHAR strWord[3]; // 当前字
-
- // 依次处理所有汉字
- while ( true )
- {
- // 读入字
- if ( !file.ReadString(strLine) )
- break;
-
- ASSERT( strLine[0] == TCHAR('<') );
- strSpell = strLine.Mid( 1, strLine.Find(TCHAR('>'), 1)-1 );
-
- // 该拼音得所有汉字
- file.ReadString(strLine);
-
- int nLineLen = strLine.GetLength();
- for ( int i=0; i<nLineLen; i+= 3 )
- {
- strWord[0] = strLine[i];
- strWord[1] = strLine[i+1];
- strWord[2] = TCHAR(' ');
- // 加入新的汉字拼音
- ChineseDictionary::WordComment& wc = wtbl[Word(strWord)];
- SpellGroup sg = wc.GetSpellGroup();
- sg.Add( strSpell );
- wc.SetSpellGroup(sg);
- }
- } // 所有汉字处理完成
-
- file.Close();
- return TRUE;
- };
- /**
- * 处理部首
- */
- BOOL ChineseDictionaryMaker::ProcessBS( const TCHAR* szBS, WordTable& wtbl )
- {
- CStdioFile file;
-
- if ( !file.Open(szBS, CStdioFile::modeRead ) )
- return FALSE;
- /*
- CStdioFile bsFile;
- bsFile.Open( _T("f:\bs.txt"),CStdioFile::modeCreate | CStdioFile::modeWrite );
- */
- CString strLine; //当前行
- CString strBS; // 当前部首
- TCHAR strWord[3]; // 当前字
-
- while ( true )
- {
- if ( !file.ReadString(strLine ) )
- break;
- ASSERT( strLine[0] == TCHAR('<') );
- // 部首
- strBS = strLine.Mid( 1, 2 );
- if ( strBS == _T("左") || strBS == _T("右") )
- strBS = strLine.Mid( 3, 2 );
- Word wdBS( strBS );
- /*
- bsFile.WriteString( _T("BSWord(_T("") );
- bsFile.WriteString( strBS );
- bsFile.WriteString( _T(""), 1, _T("" ) );
- bsFile.WriteString( strLine );
- bsFile.WriteString( _T("") ),n") );
- */
-
- // 该部首所有汉字
- file.ReadString(strLine);
-
- int nLineLen = strLine.GetLength();
- for ( int i=0; i<nLineLen; i+= 3 )
- {
- strWord[0] = strLine[i];
- strWord[1] = strLine[i+1];
- strWord[2] = TCHAR(' ');
-
- // 加入汉字部首
- ChineseDictionary::WordComment& wc = wtbl[Word(strWord)];
- wc.SetBS( wdBS );
- } // end of all word
- }
-
- file.Close();
- /*
- bsFile.Close();
- */
- return TRUE;
- };
- /**
- * 处理笔画
- */
- BOOL ChineseDictionaryMaker::ProcessStrokes( const TCHAR* szStrokes, WordTable& wtbl )
- {
- CStdioFile file;
-
- if ( !file.Open( szStrokes, CStdioFile::modeRead ) )
- return FALSE;
-
- CString strLine; //当前行
- TCHAR strWord[3]; // 当前字
- CString strOrders; // 笔顺
- CString strStrokes; // 笔画数
-
- // 依次处理所有汉字
- while ( true )
- {
- // 读入字
- if ( !file.ReadString(strLine) )
- break;
-
- ASSERT( strLine[0] == TCHAR('<') && strLine[3] == TCHAR('>') );
-
- strWord[0] = strLine[1];
- strWord[1] = strLine[2];
- strWord[2] = TCHAR(' ');
-
- file.ReadString(strLine);
-
- // 笔画数
- int idxSpace = strLine.Find(TCHAR(' '));
- strStrokes = strLine.Left( idxSpace );
- int nStroks = _ttoi(strStrokes);
-
-
- // 笔顺
- CString strTemp = strLine.Right( strLine.GetLength() - idxSpace );
- strTemp.TrimLeft();
- strOrders = strTemp.Left( strTemp.Find(TCHAR(' ')) );
-
- // 加入汉字
- ChineseDictionary::WordComment& wc = wtbl[Word(strWord)];
- wc.SetStrokes( (unsigned short)nStroks);
- wc.SetStrokesOrder( StrokesOrder(strOrders) );
- } // 所有汉字处理完成
-
- file.Close();
-
- return TRUE;
- };
- /**
- * 部首表
- */
- ChineseDictionary::BSWord ChineseDictionary::s_BSWordTable[] =
- {
- BSWord(_T("一"), 1, _T("一部") ),
- BSWord(_T("丨"), 1, _T("丨部") ),
- BSWord(_T("丿"), 1, _T("丿部") ),
- BSWord(_T("丶"), 1, _T("丶部") ),
- BSWord(_T("乙"), 1, _T("乙(乛乚)部") ),
- BSWord(_T("二"), 1, _T("二部") ),
- BSWord(_T("十"), 1, _T("十部") ),
- BSWord(_T("厂"), 1, _T("厂部") ),
- BSWord(_T("匚"), 1, _T("匚部") ),
- BSWord(_T("刂"), 1, _T("刂部") ),
- BSWord(_T("卜"), 1, _T("卜部") ),
- BSWord(_T("冂"), 1, _T("冂部") ),
- BSWord(_T("亻"), 1, _T("亻部") ),
- BSWord(_T("八"), 1, _T("八部") ),
- BSWord(_T("人"), 1, _T("人(入)部") ),
- BSWord(_T("勹"), 1, _T("勹部") ),
- BSWord(_T("几"), 1, _T("几部") ),
- BSWord(_T("儿"), 1, _T("儿部") ),
- BSWord(_T("亠"), 1, _T("亠部") ),
- BSWord(_T("冫"), 1, _T("冫部") ),
- BSWord(_T("冖"), 1, _T("冖部") ),
- BSWord(_T("讠"), 1, _T("讠(言)部") ),
- BSWord(_T("卩"), 1, _T("卩究") ),
- BSWord(_T("阝"), 1, _T("左阝部") ),
- BSWord(_T("阝"), 1, _T("右阝部") ),
- BSWord(_T("凵"), 1, _T("凵部") ),
- BSWord(_T("刀"), 1, _T("刀究") ),
- BSWord(_T("力"), 1, _T("力部") ),
- BSWord(_T("厶"), 1, _T("厶部") ),
- BSWord(_T("又"), 1, _T("又究") ),
- BSWord(_T("廴"), 1, _T("廴部") ),
- BSWord(_T("工"), 1, _T("工部") ),
- BSWord(_T("土"), 1, _T("土部") ),
- BSWord(_T("士"), 1, _T("士部") ),
- BSWord(_T("扌"), 1, _T("扌部") ),
- BSWord(_T("艹"), 1, _T("艹部") ),
- BSWord(_T("寸"), 1, _T("寸部") ),
- BSWord(_T("廾"), 1, _T("廾部") ),
- BSWord(_T("大"), 1, _T("大部") ),
- BSWord(_T("尢"), 1, _T("尢部") ),
- BSWord(_T("弋"), 1, _T("弋部") ),
- BSWord(_T("小"), 1, _T("小究") ),
- BSWord(_T("口"), 1, _T("口部") ),
- BSWord(_T("囗"), 1, _T("囗部") ),
- BSWord(_T("巾"), 1, _T("巾部") ),
- BSWord(_T("山"), 1, _T("山部") ),
- BSWord(_T("彳"), 1, _T("彳部") ),
- BSWord(_T("彡"), 1, _T("彡部") ),
- BSWord(_T("犭"), 1, _T("犭部") ),
- BSWord(_T("夕"), 1, _T("夕部") ),
- BSWord(_T("夂"), 1, _T("夂部") ),
- BSWord(_T("饣"), 1, _T("饣(食)部") ),
- BSWord(_T("广"), 1, _T("广部") ),
- BSWord(_T("忄"), 1, _T("忄究") ),
- BSWord(_T("门"), 1, _T("门(門)部") ),
- BSWord(_T("氵"), 1, _T("氵部") ),
- BSWord(_T("宀"), 1, _T("宀部") ),
- BSWord(_T("辶"), 1, _T("辶部") ),
- BSWord(_T("彐"), 1, _T("彐(彑)部") ),
- BSWord(_T("尸"), 1, _T("尸部") ),
- BSWord(_T("己"), 1, _T("己(巳)部") ),
- BSWord(_T("弓"), 1, _T("弓部") ),
- BSWord(_T("子"), 1, _T("子(孑)部") ),
- BSWord(_T("屮"), 1, _T("屮部") ),
- BSWord(_T("女"), 1, _T("女部") ),
- BSWord(_T("纟"), 1, _T("纟(糹)部") ),
- BSWord(_T("马"), 1, _T("马(馬)部") ),
- BSWord(_T("幺"), 1, _T("幺部") ),
- BSWord(_T("巛"), 1, _T("巛部") ),
- BSWord(_T("王"), 1, _T("王部") ),
- BSWord(_T("韦"), 1, _T("韦(韋)部") ),
- BSWord(_T("木"), 1, _T("木部") ),
- BSWord(_T("犬"), 1, _T("犬部") ),
- BSWord(_T("歹"), 1, _T("歹部") ),
- BSWord(_T("车"), 1, _T("车(車)部") ),
- BSWord(_T("戈"), 1, _T("戈部") ),
- BSWord(_T("比"), 1, _T("比部") ),
- BSWord(_T("瓦"), 1, _T("瓦部") ),
- BSWord(_T("止"), 1, _T("止部") ),
- BSWord(_T("攴"), 1, _T("攴部") ),
- BSWord(_T("日"), 1, _T("日部") ),
- BSWord(_T("曰"), 1, _T("曰部") ),
- BSWord(_T("水"), 1, _T("水(氺)部") ),
- BSWord(_T("贝"), 1, _T("贝(貝)部") ),
- BSWord(_T("见"), 1, _T("见(見)部") ),
- BSWord(_T("牛"), 1, _T("牛(牜)部") ),
- BSWord(_T("手"), 1, _T("手部") ),
- BSWord(_T("毛"), 1, _T("毛部") ),
- BSWord(_T("气"), 1, _T("气部") ),
- BSWord(_T("攵"), 1, _T("攵部") ),
- BSWord(_T("片"), 1, _T("片部") ),
- BSWord(_T("斤"), 1, _T("斤部") ),
- BSWord(_T("爪"), 1, _T("爪(爫)部") ),
- BSWord(_T("父"), 1, _T("父部") ),
- BSWord(_T("月"), 1, _T("月部") ),
- BSWord(_T("欠"), 1, _T("欠部") ),
- BSWord(_T("风"), 1, _T("风(風)部") ),
- BSWord(_T("殳"), 1, _T("殳部") ),
- BSWord(_T("文"), 1, _T("文部") ),
- BSWord(_T("方"), 1, _T("方部") ),
- BSWord(_T("火"), 1, _T("火部") ),
- BSWord(_T("斗"), 1, _T("斗部") ),
- BSWord(_T("灬"), 1, _T("灬部") ),
- BSWord(_T("户"), 1, _T("户部") ),
- BSWord(_T("礻"), 1, _T("礻(示)部") ),
- BSWord(_T("心"), 1, _T("心部") ),
- BSWord(_T("肀"), 1, _T("肀(聿)部") ),
- BSWord(_T("爿"), 1, _T("爿部") ),
- BSWord(_T("毋"), 1, _T("毋(母)部") ),
- BSWord(_T("示"), 1, _T("示(见礻)部") ),
- BSWord(_T("石"), 1, _T("石部") ),
- BSWord(_T("龙"), 1, _T("龙(龍)部") ),
- BSWord(_T("业"), 1, _T("业部") ),
- BSWord(_T("目"), 1, _T("目部") ),
- BSWord(_T("田"), 1, _T("田部") ),
- BSWord(_T("罒"), 1, _T("罒部") ),
- BSWord(_T("皿"), 1, _T("皿部") ),
- BSWord(_T("钅"), 1, _T("钅(金)部") ),
- BSWord(_T("矢"), 1, _T("矢部") ),
- BSWord(_T("禾"), 1, _T("禾部") ),
- BSWord(_T("白"), 1, _T("白部") ),
- BSWord(_T("瓜"), 1, _T("瓜部") ),
- BSWord(_T("用"), 1, _T("用部") ),
- BSWord(_T("鸟"), 1, _T("鸟(鳥)部") ),
- BSWord(_T("疒"), 1, _T("疒部") ),
- BSWord(_T("立"), 1, _T("立部") ),
- BSWord(_T("穴"), 1, _T("穴部") ),
- BSWord(_T("衤"), 1, _T("衤部") ),
- BSWord(_T("疋"), 1, _T("疋部") ),
- BSWord(_T("皮"), 1, _T("皮部") ),
- BSWord(_T("癶"), 1, _T("癶部") ),
- BSWord(_T("矛"), 1, _T("矛部") ),
- BSWord(_T("耒"), 1, _T("耒部") ),
- BSWord(_T("老"), 1, _T("老部") ),
- BSWord(_T("耳"), 1, _T("耳部") ),
- BSWord(_T("臣"), 1, _T("臣部") ),
- BSWord(_T("西"), 1, _T("西(覀)部") ),
- BSWord(_T("页"), 1, _T("页(頁)部") ),
- BSWord(_T("虍"), 1, _T("虍部") ),
- BSWord(_T("虫"), 1, _T("虫部") ),
- BSWord(_T("缶"), 1, _T("缶部") ),
- BSWord(_T("舌"), 1, _T("舌部") ),
- BSWord(_T("竹"), 1, _T("竹部") ),
- BSWord(_T("臼"), 1, _T("臼部") ),
- BSWord(_T("自"), 1, _T("自部") ),
- BSWord(_T("血"), 1, _T("血部") ),
- BSWord(_T("舟"), 1, _T("舟部") ),
- BSWord(_T("衣"), 1, _T("衣部") ),
- BSWord(_T("羊"), 1, _T("羊究") ),
- BSWord(_T("米"), 1, _T("米部") ),
- BSWord(_T("艮"), 1, _T("艮部") ),
- BSWord(_T("羽"), 1, _T("羽部") ),
- BSWord(_T("糸"), 1, _T("糸部") ),
- BSWord(_T("麦"), 1, _T("麦(麥)部") ),
- BSWord(_T("走"), 1, _T("走部") ),
- BSWord(_T("赤"), 1, _T("赤部") ),
- BSWord(_T("豆"), 1, _T("豆部") ),
- BSWord(_T("酉"), 1, _T("酉部") ),
- BSWord(_T("辰"), 1, _T("辰部") ),
- BSWord(_T("豕"), 1, _T("豕部") ),
- BSWord(_T("卤"), 1, _T("卤(鹵)部") ),
- BSWord(_T("里"), 1, _T("里部") ),
- BSWord(_T("足"), 1, _T("足部") ),
- BSWord(_T("身"), 1, _T("身部") ),
- BSWord(_T("采"), 1, _T("采部") ),
- BSWord(_T("谷"), 1, _T("谷部") ),
- BSWord(_T("豸"), 1, _T("豸部") ),
- BSWord(_T("角"), 1, _T("角部") ),
- BSWord(_T("言"), 1, _T("言部") ),
- BSWord(_T("辛"), 1, _T("辛部") ),
- BSWord(_T("青"), 1, _T("青部") ),
- BSWord(_T("其"), 1, _T("其部") ),
- BSWord(_T("雨"), 1, _T("雨部") ),
- BSWord(_T("齿"), 1, _T("齿(齒)部") ),
- BSWord(_T("黾"), 1, _T("黾(黽)部") ),
- BSWord(_T("隹"), 1, _T("隹部") ),
- BSWord(_T("金"), 1, _T("金部") ),
- BSWord(_T("鱼"), 1, _T("鱼(魚)部") ),
- BSWord(_T("革"), 1, _T("革部") ),
- BSWord(_T("骨"), 1, _T("骨部") ),
- BSWord(_T("鬼"), 1, _T("鬼部") ),
- BSWord(_T("食"), 1, _T("食部") ),
- BSWord(_T("音"), 1, _T("音部") ),
- BSWord(_T("鬥"), 1, _T("鬥部") ),
- BSWord(_T("髟"), 1, _T("髟部") ),
- BSWord(_T("麻"), 1, _T("麻部") ),
- BSWord(_T("鹿"), 1, _T("鹿部") ),
- BSWord(_T("黑"), 1, _T("黑部") ),
- BSWord(_T("鼠"), 1, _T("鼠部") ),
- BSWord(_T("鼻"), 1, _T("鼻部") )
- };
- ChineseDictionary::BSWords::iterator ChineseDictionary::BSWords::begin() const
- {
- return &ChineseDictionary::s_BSWordTable[0];
- }
- ChineseDictionary::BSWords::iterator ChineseDictionary::BSWords::end() const
- {
- return &ChineseDictionary::s_BSWordTable[size()];
- }
- int ChineseDictionary::BSWords::size() const
- {
- return sizeof(ChineseDictionary::s_BSWordTable) /
- sizeof(ChineseDictionary::s_BSWordTable[0]);
- }
- }