PptxDocument.cs
上传用户:huiyue
上传日期:2022-04-08
资源大小:1429k
文件大小:4k
源码类别:

搜索引擎

开发平台:

ASP/ASPX

  1. using System;
  2. using System.IO;
  3. using System.Xml;
  4. using ionic.utils.zip;
  5. namespace Searcharoo.Common
  6. {
  7.     /// <summary>
  8.     /// Load a Microsoft PowerPoint 2007 Xml file format
  9.     /// </summary>
  10.     /// <remarks>
  11.     /// <see cref="DocxDocument" />
  12.     /// </remarks>
  13.     public class PptxDocument : DownloadDocument
  14.     {
  15.         private string _WordsOnly;
  16.         public PptxDocument(Uri location)
  17.             : base(location)
  18.         {
  19.             Extension = "pptx";
  20.         }
  21.         
  22.         public override void Parse()
  23.         {
  24.             // no parsing (for now). perhaps in future we can regex look for urls (www.xxx.com) and try to link to them...
  25.         }
  26.         public override string WordsOnly
  27.         {
  28.             get { return _WordsOnly; }
  29.         }
  30.         /// <remarks>
  31.         /// .NET System.IO.Compression and zip files
  32.         /// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx
  33.         /// </remarks>
  34.         public override bool GetResponse(System.Net.HttpWebResponse webresponse)
  35.         {
  36.             string filename = System.IO.Path.Combine(
  37.                           Preferences.DownloadedTempFilePath
  38.                         , (System.IO.Path.GetFileName(this.Uri.LocalPath)) );
  39.             this.Title = System.IO.Path.GetFileNameWithoutExtension(filename);
  40.             SaveDownloadedFile(webresponse, filename);
  41.             try
  42.             {   // Will be accessing this data in the pptx file
  43.                 //  ppt/presentation.xml      p:presentation/sldIdLst
  44.                 //  ppt/slides/slide{0}.xml   a:t
  45.                 try
  46.                 {
  47.                     using (ZipFile zip = ZipFile.Read(filename))
  48.                     {
  49.                         int slideCount = 0;
  50.                         using (MemoryStream streamroot = new MemoryStream())
  51.                         {   // open the presentation 'root' file to see how many slides there are
  52.                             zip.Extract("ppt/presentation.xml", streamroot);
  53.                             streamroot.Seek(0, SeekOrigin.Begin);
  54.                             XmlDocument xmldocroot = new XmlDocument();
  55.                             xmldocroot.Load(streamroot);
  56.                             XmlNodeList objXML = xmldocroot.GetElementsByTagName("p:sldId");
  57.                             slideCount = objXML.Count;
  58.                         }
  59.                         XmlDocument xmlSlide;
  60.                         string entryToExtractPattern = @"ppt/slides/slide{0}.xml";
  61.                         for (int slideId = 1; slideId <= slideCount; slideId++)
  62.                         {   // now open each slide file to extract text
  63.                             using (MemoryStream stream = new MemoryStream())
  64.                             {
  65.                                 string entryToExtract = String.Format(entryToExtractPattern, slideId);
  66.                                 zip.Extract(entryToExtract, stream);
  67.                                 stream.Seek(0, SeekOrigin.Begin);
  68.                                 xmlSlide = new XmlDocument();
  69.                                 xmlSlide.Load(stream);
  70.                             }
  71.                             string slideWords = "";
  72.                             foreach (XmlElement x in xmlSlide.GetElementsByTagName("a:t"))
  73.                             {
  74.                                 slideWords = slideWords + " " + x.InnerText;
  75.                             }
  76.                             _WordsOnly = _WordsOnly + " " + slideWords + Environment.NewLine + Environment.NewLine;
  77.                             this.All = _WordsOnly;
  78.                         }
  79.                     }
  80.                 }
  81.                 catch (Exception ex)
  82.                 {
  83.                     Console.WriteLine(ex.Message);
  84.                 }
  85.                 System.IO.File.Delete(filename);    // clean up
  86.             }
  87.             catch (Exception)
  88.             {
  89.                 //                ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + ""));
  90.             }
  91.             if (this.All != string.Empty)
  92.             {
  93.                 this.Description = base.GetDescriptionFromWordsOnly(WordsOnly);
  94.                 return true;
  95.             }
  96.             else
  97.             {
  98.                 return false;
  99.             }
  100.         }
  101.        
  102.     }
  103. }