DefaultSplitter.cs
资源名称:2.rar [点击查看]
上传用户:hshongkong
上传日期:2021-11-20
资源大小:10241k
文件大小:2k
源码类别:
多国语言处理
开发平台:
C#
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.IO;
- ////该源码首发自win.51aspx.com(51aspx.com)
- namespace ChineseSplitter
- {
- /// <summary>
- /// 词义切分,即中文分词
- /// </summary>
- public class DefaultSplitter : BaseSplitter
- {
- static NICTCLAS nictclas;
- /// <summary>
- /// 构造函数
- /// </summary>
- /// <param name="wordFilePath">中文词库目录路径</param>
- public DefaultSplitter(DirectoryInfo wordFilePath)
- {
- try
- {
- if (nictclas == null)
- {
- nictclas = new NICTCLAS(wordFilePath.FullName);
- nictclas.OperateType = (global::eOperateType)eOperateType.FirstTag;
- nictclas.OutputFormat = (global::eOutputFormat)eOutputFormat.PKU;
- }
- }
- catch(Exception ex)
- {
- throw ex;
- }
- }
- public override string Split(string inputStr)
- {
- string result = "";
- nictclas.ParagraphProcessing(inputStr, ref result);
- return result;
- }
- /// <summary>
- /// 释放内存
- /// </summary>
- public override void Dispose()
- {
- nictclas.Dispose();
- }
- }
- /// <summary>
- /// 标注类型
- /// </summary>
- public enum eOperateType
- {
- /// <summary>
- /// 词语切分
- /// </summary>
- OnlySegment = 0,
- /// <summary>
- /// 一级标注
- /// </summary>
- FirstTag = 1,
- /// <summary>
- /// 二级标注
- /// </summary>
- SecondTag = 2,
- }
- /// <summary>
- /// 输出格式
- /// </summary>
- public enum eOutputFormat
- {
- /// <summary>
- /// 北大标注
- /// </summary>
- PKU = 0,
- /// <summary>
- /// 973标注
- /// </summary>
- _973 = 1,
- /// <summary>
- /// XML格式
- /// </summary>
- XML = 2,
- }
- }