XlsxDocument.cs
上传用户:huiyue
上传日期:2022-04-08
资源大小:1429k
文件大小:4k
源码类别:

搜索引擎

开发平台:

ASP/ASPX

  1. using System;
  2. using System.IO;
  3. using System.Xml;
  4. using ionic.utils.zip;
  5. namespace Searcharoo.Common
  6. {
  7.     /// <summary>
  8.     /// Load a Microsoft Excel 2007 Xml file format
  9.     /// </summary>
  10.     /// <remarks>
  11.     /// <see cref="DocxDocument" />
  12.     /// 
  13.     /// Xlsx...
  14.     /// http://www.gemboxsoftware.com/Excel2007/DemoApp.htm
  15.     /// </remarks>
  16.     public class XlsxDocument : DownloadDocument
  17.     {
  18.         private string _WordsOnly;
  19.         public XlsxDocument(Uri location)
  20.             : base(location)
  21.         {
  22.             Extension = "xlsx";
  23.         }
  24.         public override void Parse()
  25.         {
  26.             // no parsing (for now). perhaps in future we can regex look for urls (www.xxx.com) and try to link to them...
  27.         }
  28.         public override string WordsOnly
  29.         {
  30.             get { return _WordsOnly; }
  31.         }
  32.         /// <remarks>
  33.         /// .NET System.IO.Compression and zip files
  34.         /// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx
  35.         /// </remarks>
  36.         public override bool GetResponse(System.Net.HttpWebResponse webresponse)
  37.         {
  38.             string filename = System.IO.Path.Combine(
  39.                           Preferences.DownloadedTempFilePath
  40.                         , (System.IO.Path.GetFileName(this.Uri.LocalPath)));
  41.             this.Title = System.IO.Path.GetFileNameWithoutExtension(filename);
  42.             SaveDownloadedFile(webresponse, filename);
  43.             try
  44.             {   // Will be accessing this data in the xlsx file
  45.                 //  xl/workbook.xml              sheet
  46.                 //  xl/worksheets/sheet{0}.xml   v
  47.                 try
  48.                 {
  49.                     using (ZipFile zip = ZipFile.Read(filename))
  50.                     {
  51.                         int slideCount = 0;
  52.                         using (MemoryStream streamroot = new MemoryStream())
  53.                         {   // open the presentation 'root' file to see how many slides there are
  54.                             zip.Extract("xl/workbook.xml", streamroot);
  55.                             streamroot.Seek(0, SeekOrigin.Begin);
  56.                             XmlDocument xmldocroot = new XmlDocument();
  57.                             xmldocroot.Load(streamroot);
  58.                             XmlNodeList objXML = xmldocroot.GetElementsByTagName("sheet");
  59.                             slideCount = objXML.Count;
  60.                         }
  61.                         XmlDocument xmlSheet;
  62.                         string entryToExtractPattern = @"xl/worksheets/sheet{0}.xml";
  63.                         for (int slideId = 1; slideId <= slideCount; slideId++)
  64.                         {   // now open each slide file to extract text
  65.                             using (MemoryStream stream = new MemoryStream())
  66.                             {
  67.                                 string entryToExtract = String.Format(entryToExtractPattern, slideId);
  68.                                 zip.Extract(entryToExtract, stream);
  69.                                 stream.Seek(0, SeekOrigin.Begin);
  70.                                 xmlSheet = new XmlDocument();
  71.                                 xmlSheet.Load(stream);
  72.                             }
  73.                             string slideWords = "";
  74.                             foreach (XmlElement x in xmlSheet.GetElementsByTagName("v"))
  75.                             {
  76.                                 slideWords = slideWords + " " + x.InnerText;
  77.                             }
  78.                             _WordsOnly = _WordsOnly + " " + slideWords + Environment.NewLine + Environment.NewLine;
  79.                             this.All = _WordsOnly;
  80.                         }
  81.                     }
  82.                 }
  83.                 catch (Exception ex)
  84.                 {
  85.                     Console.WriteLine(ex.Message);
  86.                 }
  87.                 System.IO.File.Delete(filename);    // clean up
  88.             }
  89.             catch (Exception)
  90.             {
  91.                 //                ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + ""));
  92.             }
  93.             if (this.All != string.Empty)
  94.             {
  95.                 this.Description = base.GetDescriptionFromWordsOnly(WordsOnly);
  96.                 return true;
  97.             }
  98.             else
  99.             {
  100.                 return false;
  101.             }
  102.         }
  103.     }
  104. }