Span.h
上传用户:chen_dj
上传日期:2013-04-22
资源大小:111k
文件大小:3k
- /****************************************************************************
- *
- * Copyright (c) 2000, 2001
- * Machine Group
- * Software Research Lab.
- * Institute of Computing Tech.
- * Chinese Academy of Sciences
- * All rights reserved.
- *
- * This file is the confidential and proprietary property of
- * Institute of Computing Tech. and the posession or use of this file requires
- * a written license from the author.
- * Filename: Span.h:
- * Abstract:
- * interface for the CSpan class.
- * Author: Kevin Zhang
- * (zhanghp@software.ict.ac.cn)
- * Date: 2002-4-23
- *
- * Notes: Tagging with Hidden Markov Model
- *
- ****************************************************************************/
- #if !defined(AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_)
- #define AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_
- #if _MSC_VER > 1000
- #pragma once
- #endif // _MSC_VER > 1000
- #include "..\Utility\Dictionary.h"
- #include "..\Utility\ContextStat.h"
- #include "..\Segment\DynamicArray.h"
- #define MAX_WORDS_PER_SENTENCE 120
- #define MAX_UNKNOWN_PER_SENTENCE 200
- #define MAX_POS_PER_WORD 20
- #define LITTLE_FREQUENCY 6
- enum TAG_TYPE{
- TT_NORMAL,
- TT_PERSON,
- TT_PLACE,
- TT_TRANS
- };
- class CSpan
- {
- public:
- bool TransRecognize(CDictionary &dictCore,CDictionary &transDict);
- bool PlaceRecognize(CDictionary &dictCore,CDictionary &placeDict);
- bool PersonRecognize(CDictionary &personDict);
- bool POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown);
- //POS tagging with Hidden Markov Model
- void SetTagType(enum TAG_TYPE nType=TT_NORMAL);
- //Set the tag type
- bool LoadContext(char *sFilename);
- CSpan();//CDictionary &dict
- virtual ~CSpan();
- int m_nUnknownIndex;
- //The number of unknown word
- int m_nUnknownWords[MAX_UNKNOWN_PER_SENTENCE][2];
- //The start and ending possition of unknown position
- ELEMENT_TYPE m_dWordsPossibility[MAX_UNKNOWN_PER_SENTENCE];
- //The possibility of unknown words
- CContextStat m_context;//context
- protected:
- ELEMENT_TYPE ComputePossibility(int nStartPos,int nLength,CDictionary &dict);
- int GetFrom(PWORD_RESULT pWordItems,int nIndex,CDictionary &dictCore,CDictionary &dictUnknown);
- //Get words from the word items, start from nIndex, Function for unknown words recognition
- bool GuessPOS(int nIndex,int *pSubIndex);
- bool GetBestPOS();
- bool Reset(bool bContinue=true);
- bool UnknownMatch();
- bool SplitPersonPOS(CDictionary &unlistDict);
- bool Disamb();
- private:
- enum TAG_TYPE m_tagType;//The type of tagging
- int m_nStartPos;
-
- int m_nBestTag[MAX_WORDS_PER_SENTENCE];
- //Record the Best Tag
- char m_sWords[MAX_WORDS_PER_SENTENCE][WORD_MAXLENGTH];
- int m_nWordPosition[MAX_WORDS_PER_SENTENCE];
- int m_nTags[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];
- char m_nBestPrev[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];
- char m_nCurLength;
- double m_dFrequency[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];
- };
- #endif // !defined(AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_)