mail.cpp
资源名称:Knn.rar [点击查看]
上传用户:xmhs66
上传日期:2022-07-26
资源大小:989k
文件大小:2k
源码类别:
生物技术
开发平台:
Visual C++
- /*************************************************************************
- * * filename: mail.cpp
- * * description: Mail类实现:从文件中解析数据,以欧式距离或余弦计算相似度
- * * student: Liwanjun
- * * data: 2010-03-29
- * **********************************************************************/
- #include "mail.h"
- #include <cstdlib>
- #include <cmath>
- #include <iostream>
- #include <fstream>
- #include <algorithm>
- using std::string;
- using std::ofstream;
- using std::endl;
- const string delim(",");
- //初始化Mail对象,读取文件的每一行,解析并保存维度值和分类值
- void Mail::initialize(string rec)
- {
- recstr = rec;
- string::size_type begIdx = 0, endIdx = 0;
- int count = 0;
- int i = 1;
- string tempstr;
- while(true)
- {
- endIdx = rec.find_first_of(delim, begIdx);
- if(endIdx == string::npos)
- {
- endIdx = rec.length();
- break;
- }
- tempstr = rec.substr(begIdx, endIdx - begIdx);
- dimvec.push_back(atof(tempstr.c_str()));
- begIdx = endIdx + 1;
- }
- tempstr = rec.substr(begIdx, endIdx - begIdx);
- spam = atof(tempstr.c_str());
- }
- //构造Mail对象,由于测试集没有分类属性,因此用istest来区分不同的初始化行为
- //istest = 1表示测试集
- Mail::Mail(string rec, bool istest)
- {
- initialize(rec);
- if(istest)
- {
- dimvec.push_back(spam);
- spam = 0;
- }
- }
- //当前Mail对象与指定Mail对象间的欧氏距离
- double Mail::euclidDis(const Mail& mail)
- {
- double total = 0.;
- for(int i = 0; i < dimvec.size(); i++)
- {
- total += (dimvec[i] - mail.dimvec[i]) *
- (dimvec[i] - mail.dimvec[i]);
- }
- double dis = sqrt(total);
- return dis;
- }
- //当前Mail对象与指定Mail对象间的向量夹角余弦
- double Mail::cosin(const Mail& mail)
- {
- double cos = 0., inner = 0., norm = 0., tempx = 0., tempy = 0.;
- for(int i = 0; i < dimvec.size(); i++)
- {
- inner += dimvec[i] * mail.dimvec[i];
- tempx += dimvec[i] * dimvec[i];
- tempy += mail.dimvec[i] * mail.dimvec[i];
- }
- norm = sqrt(tempx) * sqrt(tempy);
- cos = inner / norm;
- return 1 - cos;
- }