DocumentFactory.cs
上传用户:huiyue
上传日期:2022-04-08
资源大小:1429k
文件大小:8k
源码类别:

搜索引擎

开发平台:

ASP/ASPX

  1. using System;
  2. #region Microsoft Office 2007 MimeTypes (for reference)
  3. /*
  4.  * http://www.therightstuff.de/2006/12/16/Office+2007+File+Icons+For+Windows+SharePoint+Services+20+And+SharePoint+Portal+Server+2003.aspx
  5.  * MIME Types for Office 2007 documents
  6. .docm,application/vnd.ms-word.document.macroEnabled.12
  7. .docx,application/vnd.openxmlformats-officedocument.wordprocessingml.document
  8. .dotm,application/vnd.ms-word.template.macroEnabled.12
  9. .dotx,application/vnd.openxmlformats-officedocument.wordprocessingml.template
  10. .potm,application/vnd.ms-powerpoint.template.macroEnabled.12
  11. .potx,application/vnd.openxmlformats-officedocument.presentationml.template
  12. .ppam,application/vnd.ms-powerpoint.addin.macroEnabled.12
  13. .ppsm,application/vnd.ms-powerpoint.slideshow.macroEnabled.12
  14. .ppsx,application/vnd.openxmlformats-officedocument.presentationml.slideshow
  15. .pptm,application/vnd.ms-powerpoint.presentation.macroEnabled.12
  16. .pptx,application/vnd.openxmlformats-officedocument.presentationml.presentation
  17. .xlam,application/vnd.ms-excel.addin.macroEnabled.12
  18. .xlsb,application/vnd.ms-excel.sheet.binary.macroEnabled.12
  19. .xlsm,application/vnd.ms-excel.sheet.macroEnabled.12
  20. .xlsx,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
  21. .xltm,application/vnd.ms-excel.template.macroEnabled.12
  22. .xltx,application
  23.  */
  24. #endregion
  25. namespace Searcharoo.Common
  26. {
  27.     /// <summary>
  28.     /// Return a Document subclass capable of downloading and parsing the
  29.     /// given Uri/ContentType header information
  30.     /// </summary>
  31.     /// <remarks>
  32.     /// [v6] Added reference to JpgDocument and XpsDocument
  33.     /// </remarks>
  34.     public static class DocumentFactory
  35.     {
  36.         /// <summary>
  37.         /// Construct a Document instance 
  38.         /// </summary>
  39.         /// <remarks>
  40.         /// In future, rather than being hardcoded switch statement, this method could
  41.         /// use a 'provider' model where MIME-types and/or extensions are defined
  42.         /// in the .config file, along with the assembly/class to use to process
  43.         /// that type...
  44.         /// </remarks>
  45.         public static Document New (Uri uri, System.Net.HttpWebResponse contentType)
  46.         {
  47.             Document newDoc = new IgnoreDocument(uri);
  48.             string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower();
  49.             string encoding = ParseEncoding(contentType.ToString()).ToLower();
  50.             string extension = ParseExtension(uri.AbsoluteUri).ToLower();
  51.             switch (mimeType)
  52.             {
  53.                 case "text/css":
  54.                     break;
  55.                 case "application/x-msdownload":
  56.                     break;
  57.                 case "application/octet-stream":    // ZIP file or something unknown... give some a try
  58.                     switch (extension)
  59.                     { 
  60.                         case ".docx":
  61.                             newDoc = new DocxDocument(uri);
  62.                             break;
  63.                         case ".xlsx":
  64.                             newDoc = new XlsxDocument(uri);
  65.                             break;
  66.                         case ".pptx":
  67.                             newDoc = new PptxDocument(uri);
  68.                             break;
  69.                         case ".pdf":
  70.                             newDoc = new PdfDocument(uri);
  71.                             break;
  72. #if NET35
  73.                         case ".xps"
  74.                             newDoc = new XpsDocument(uri);
  75.                             break;
  76. #endif                   
  77.                     }
  78.                     break;
  79.                                                                                      // docx
  80.                 case "application/vnd.ms-word.document.12": 
  81.                 case "application/vnd.openxmlformats-officedocument.wordprocessingml":
  82.                 case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
  83.                     newDoc = new DocxDocument(uri);
  84.                     break;
  85.                                                                                     // pptx
  86.                 case "application/vnd.openxmlformats-officedocument.presentationml":
  87.                 case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
  88.                     newDoc = new PptxDocument(uri);
  89.                     break;
  90.                                                                                     // xlsx
  91.                 case "application/vnd.openxmlformats-officedocument.spreadsheetml":
  92.                 case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
  93.                     newDoc = new XlsxDocument(uri);
  94.                     break;
  95.                 case "application/pdf":                                             // pdf; changed from FilterDocument in v7
  96.                     newDoc = new PdfDocument(uri);
  97.                     break;
  98.                 case "application/vnd.ms-powerpoint":                               // ppt
  99.                 case "application/msword":                                          // xls
  100.                     newDoc = new FilterDocument(uri);
  101.                     break;
  102.                 case "text/plain":
  103.                     newDoc = new TextDocument(uri);
  104.                     break;
  105.                 case "text/xml":
  106.                 case "application/xml":
  107.                     newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser
  108.                     break;
  109.                 case "application/rss+xml":
  110.                 case "application/rdf+xml":
  111.                 case "application/atom+xml":
  112.                     newDoc = new HtmlDocument(uri); // TODO: RssDocument parser
  113.                     break;
  114.                 case "application/xhtml+xml":
  115.                     newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser
  116.                     break;
  117.                 case "text/html":
  118.                     newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2"
  119.                     break;
  120.                 case "image/jpeg":
  121.                     newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data
  122.                     break;
  123.                 default:
  124.                     // none of the above matched...
  125.                     if (mimeType.IndexOf("html") >= 0)
  126.                     {   // If we got 'text' data (not images)
  127.                         newDoc = new HtmlDocument(uri);
  128.                     }
  129.                     else if (mimeType.IndexOf("text") >= 0)
  130.                     {   // If we got 'text' data (not images)
  131.                         newDoc = new TextDocument(uri);
  132.                     }
  133.                     break;
  134.             } // switch; if not set, defaults to IgnoreDocument
  135.             newDoc.MimeType = mimeType;
  136.             
  137.             return newDoc;
  138.         }
  139.         #region Private Methods: ParseExtension, ParseMimeType, ParseEncoding
  140.         private static string ParseExtension(string filename)
  141.         {
  142.             return System.IO.Path.GetExtension(filename).ToLower();
  143.         }
  144.         private static string ParseMimeType(string contentType)
  145.         {
  146.             string mimeType = string.Empty;
  147.             string[] contentTypeArray = contentType.Split(';');
  148.             // Set MimeType if it's blank
  149.             if (mimeType == String.Empty && contentTypeArray.Length >= 1)
  150.             {
  151.                 mimeType = contentTypeArray[0];
  152.             }
  153.             return mimeType;
  154.         }
  155.         private static string ParseEncoding(string contentType)
  156.         {
  157.             string encoding = string.Empty;
  158.             string[] contentTypeArray = contentType.Split(';');
  159.             // Set Encoding if it's blank
  160.             if (encoding == String.Empty && contentTypeArray.Length >= 2)
  161.             {
  162.                 int charsetpos = contentTypeArray[1].IndexOf("charset");
  163.                 if (charsetpos > 0)
  164.                 {
  165.                     encoding = contentTypeArray[1].Substring(charsetpos + 8, contentTypeArray[1].Length - charsetpos - 8);
  166.                 }
  167.             }
  168.             return encoding;
  169.         }
  170.         #endregion
  171.     }
  172. }