refresh.c
上传用户:seven77cht
上传日期:2007-01-04
资源大小:486k
文件大小:15k
源码类别:

浏览器

开发平台:

Unix_Linux

  1. /***************************************
  2.   $Header: /home/amb/wwwoffle/RCS/refresh.c 2.50 1999/09/11 14:00:04 amb Exp $
  3.   WWWOFFLE - World Wide Web Offline Explorer - Version 2.5.
  4.   The HTML interactive page to refresh a URL.
  5.   ******************/ /******************
  6.   Written by Andrew M. Bishop
  7.   This file Copyright 1997,98,99 Andrew M. Bishop
  8.   It may be distributed under the GNU Public License, version 2, or
  9.   any higher version.  See section COPYING of the GNU Public license
  10.   for conditions under which this file may be redistributed.
  11.   ***************************************/
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <string.h>
  15. #include <unistd.h>
  16. #include "wwwoffle.h"
  17. #include "document.h"
  18. #include "misc.h"
  19. #include "config.h"
  20. #include "sockets.h"
  21. #include "errors.h"
  22. /*+ The options for recursive or normal fetching. +*/
  23. static int recursive=0;
  24. static int recursive_depth=0,recursive_mode=0,force=0;
  25. static int stylesheets=0,images=0,frames=0,scripts=0,objects=0;
  26. static char *RefreshFormParse(int fd,Body *request_body);
  27. static int request_url(URL *Url,char *refresh,URL *refUrl);
  28. /*++++++++++++++++++++++++++++++++++++++
  29.   Send to the client a page to allow refreshes using HTML.
  30.   char *RefreshPage Returns a modified URLs for a simple refresh.
  31.   int fd The file descriptor of the client.
  32.   URL *Url The URL that was used to request this page.
  33.   Body *request_body The HTTP request body sent by the browser.
  34.   int *recurse Return value set to true if a recursive fetch was asked for.
  35.   ++++++++++++++++++++++++++++++++++++++*/
  36. char *RefreshPage(int fd,URL *Url,Body *request_body,int *recurse)
  37. {
  38.  char *newurl=NULL;
  39.  if(!strcmp("/refresh-options/",Url->path))
  40.     HTMLMessage(fd,200,"WWWOFFLE Refresh Form",NULL,"RefreshPage",
  41.                 "url",Url->args,
  42.                 "stylesheets",FetchStyleSheets?"yes":NULL,
  43.                 "images",FetchImages?"yes":NULL,
  44.                 "frames",FetchFrames?"yes":NULL,
  45.                 "scripts",FetchScripts?"yes":NULL,
  46.                 "objects",FetchObjects?"yes":NULL,
  47.                 NULL);
  48.  else if(!strcmp("/refresh-request/",Url->path))
  49.    {
  50.     if((newurl=RefreshFormParse(fd,request_body)))
  51.        *recurse=1;
  52.    }
  53.  else if(!strcmp("/refresh/",Url->path))
  54.    {
  55.     newurl=(char*)malloc(strlen(Url->args)+1);
  56.     strcpy(newurl,Url->args);
  57.    }
  58.  else
  59.    {
  60.     newurl=(char*)malloc(strlen(Url->args)+1);
  61.     strcpy(newurl,Url->args);
  62.     *recurse=1;
  63.    }
  64.  if(*recurse)
  65.     ParseRecurseOptions(Url->path+8);
  66.  return(newurl);
  67. }
  68. /*++++++++++++++++++++++++++++++++++++++
  69.   Parse the reply from the form.
  70.   char *RefreshFormParse Returns the first URL to get.
  71.   int fd The file descriptor of the client.
  72.   Body *request_body The body of the HTTP request sent by the browser.
  73.   ++++++++++++++++++++++++++++++++++++++*/
  74. static char *RefreshFormParse(int fd,Body *request_body)
  75. {
  76.  int i;
  77.  char *copy,*url=NULL,*method=NULL,*force="";
  78.  char *stylesheets="",*images="",*frames="",*scripts="",*objects="";
  79.  URL *Url;
  80.  char *new_url;
  81.  if(!request_body)
  82.    {
  83.     HTMLMessage(fd,404,"WWWOFFLE Refresh Form Error",NULL,"RefreshFormError",
  84.                 "body",NULL,
  85.                 NULL);
  86.     return(NULL);
  87.    }
  88.  copy=(char*)malloc(request_body->length+1);
  89.  strcpy(copy,request_body->content);
  90.  for(i=0;copy[i];i++)
  91.    {
  92.     if(i!=0 && copy[i-1]=='&')
  93.        copy[i-1]=0;
  94.     if(i==0 || copy[i-1]==0)
  95.       {
  96.        if(!strncmp("method=",&copy[i],7))
  97.           method=&copy[i+7];
  98.        if(!strncmp("force=",&copy[i],6))
  99.           force=&copy[i+6];
  100.        if(!strncmp("stylesheets=",&copy[i],12))
  101.           stylesheets=&copy[i+12];
  102.        if(!strncmp("images=",&copy[i],7))
  103.           images=&copy[i+7];
  104.        if(!strncmp("frames=",&copy[i],7))
  105.           frames=&copy[i+7];
  106.        if(!strncmp("scripts=",&copy[i],8))
  107.           scripts=&copy[i+8];
  108.        if(!strncmp("objects=",&copy[i],8))
  109.           objects=&copy[i+8];
  110.        if(!strncmp("url=",&copy[i],4))
  111.           url=&copy[i+4];
  112.       }
  113.    }
  114.  if(url==NULL || *url==0 || method==NULL || *method==0)
  115.    {
  116.     HTMLMessage(fd,404,"WWWOFFLE Refresh Form Error",NULL,"RefreshFormError",
  117.                 "body",request_body->content,
  118.                 NULL);
  119.     free(copy);
  120.     return(NULL);
  121.    }
  122.  url=URLDecode(url,1);
  123.  Url=SplitURL(url);
  124.  new_url=(char*)malloc(request_body->length+64);
  125.  strcpy(new_url,"/refresh");
  126.  strcat(new_url,method);
  127.  strcat(new_url,force);
  128.  strcat(new_url,stylesheets);
  129.  strcat(new_url,images);
  130.  strcat(new_url,frames);
  131.  strcat(new_url,scripts);
  132.  strcat(new_url,objects);
  133.  strcat(new_url,"/?");
  134.  strcat(new_url,Url->file);
  135.  FreeURL(Url);
  136.  free(copy);
  137.  return(new_url);
  138. }
  139. /*++++++++++++++++++++++++++++++++++++++
  140.   Parse the url method to decide what needs fetching recursively.
  141.   char *method The method to use, encoding the depth and other options.
  142.   ++++++++++++++++++++++++++++++++++++++*/
  143. void ParseRecurseOptions(char *method)
  144. {
  145.  if(method)
  146.    {
  147.     char *copy=(char*)malloc(strlen(method)+1),*dash,*slash;
  148.     strcpy(copy,method);
  149.     if((slash=strchr(copy,'/')))
  150.        *slash=0;
  151.     PrintMessage(Debug,"Refresh method='%s'.",copy);
  152.     if(*copy=='-')
  153.        copy++;
  154.     do
  155.       {
  156.        if((dash=strchr(copy,'-')))
  157.           *dash=0;
  158.        if(!strcmp(copy,"refresh"))
  159.           ;
  160.        else if(!strcmp(copy,"none"))
  161.           ;
  162.        else if(!strcmp(copy,"dir"))
  163.           recursive_mode=1;
  164.        else if(!strcmp(copy,"host"))
  165.           recursive_mode=2;
  166.        else if(!strcmp(copy,"any"))
  167.           recursive_mode=3;
  168.        else if(!strcmp(copy,"force"))
  169.           force=1;
  170.        else if(!strcmp(copy,"stylesheets"))
  171.           stylesheets=1;
  172.        else if(!strcmp(copy,"images"))
  173.           images=1;
  174.        else if(!strcmp(copy,"frames"))
  175.           frames=1;
  176.        else if(!strcmp(copy,"scripts"))
  177.           scripts=1;
  178.        else if(!strcmp(copy,"objects"))
  179.           objects=1;
  180.        else if(atoi(copy))
  181.           recursive_depth=atoi(copy);
  182.        copy=dash+1;
  183.       }
  184.     while(dash);
  185.     recursive=1;
  186.    }
  187.  else
  188.    {
  189.     stylesheets=FetchStyleSheets;
  190.     images=FetchImages;
  191.     frames=FetchFrames;
  192.     scripts=FetchScripts;
  193.     objects=FetchObjects;
  194.    }
  195. }
  196. /*++++++++++++++++++++++++++++++++++++++
  197.   Replies with whether the page is to be forced to refresh.
  198.   int RefreshForced Returns the force flag.
  199.   ++++++++++++++++++++++++++++++++++++++*/
  200. int RefreshForced(void)
  201. {
  202.  return(force);
  203. }
  204. /*++++++++++++++++++++++++++++++++++++++
  205.   Fetch the images, etc from the just parsed page.
  206.   int RecurseFetch Returns 1 if there are more to be fetched.
  207.   URL *Url The URL that was fetched.
  208.   int new Set to true if the page is new to the cache, else we may be in infinite recursion.
  209.   ++++++++++++++++++++++++++++++++++++++*/
  210. int RecurseFetch(URL *Url,int new)
  211. {
  212.  char **list,*metarefresh;
  213.  int more=0;
  214.  int j;
  215.  /* A Meta-Refesh header. */
  216.  if(new && (metarefresh=MetaRefresh()))
  217.    {
  218.     URL *metarefreshUrl=SplitURL(metarefresh);
  219.     if(!metarefreshUrl->local && metarefreshUrl->Protocol)
  220.       {
  221.        char *refresh=NULL;
  222.        if(recursive)
  223.           refresh=CreateRefreshPath(recursive_depth,recursive_mode,force,
  224.                                     stylesheets,images,frames,scripts,objects);
  225.        PrintMessage(Debug,"Meta-Refresh=%s",metarefreshUrl->name);
  226.        more+=request_url(metarefreshUrl,refresh,Url);
  227.       }
  228.     FreeURL(metarefreshUrl);
  229.    }
  230.  /* Any style sheets. */
  231.  if(stylesheets && (list=GetReferences(RefStyleSheet)))
  232.     for(j=0;list[j];j++)
  233.       {
  234.        URL *stylesheetUrl=SplitURL(list[j]);
  235.        if(!stylesheetUrl->local && stylesheetUrl->Protocol)
  236.          {
  237.           PrintMessage(Debug,"Style-Sheet=%s",stylesheetUrl->name);
  238.           more+=request_url(stylesheetUrl,NULL,Url);
  239.          }
  240.        FreeURL(stylesheetUrl);
  241.       }
  242.  /* Any images. */
  243.  if(images && (list=GetReferences(RefImage)))
  244.     for(j=0;list[j];j++)
  245.       {
  246.        URL *imageUrl=SplitURL(list[j]);
  247.        if(!imageUrl->local && imageUrl->Protocol)
  248.          {
  249.           PrintMessage(Debug,"Image=%s",imageUrl->name);
  250.           more+=request_url(imageUrl,NULL,Url);
  251.          }
  252.        FreeURL(imageUrl);
  253.       }
  254.  /* Any frames */
  255.  if(new && frames && (list=GetReferences(RefFrame)))
  256.     for(j=0;list[j];j++)
  257.       {
  258.        URL *frameUrl=SplitURL(list[j]);
  259.        if(!frameUrl->local && frameUrl->Protocol)
  260.          {
  261.           char *refresh=NULL;
  262.           if(recursive)
  263.              refresh=CreateRefreshPath(recursive_depth,recursive_mode,force,
  264.                                        stylesheets,images,frames,scripts,objects);
  265.           PrintMessage(Debug,"Frame=%s",frameUrl->name);
  266.           more+=request_url(frameUrl,refresh,Url);
  267.          }
  268.        FreeURL(frameUrl);
  269.       }
  270.  /* Any scripts. */
  271.  if(scripts && (list=GetReferences(RefScript)))
  272.     for(j=0;list[j];j++)
  273.       {
  274.        URL *scriptUrl=SplitURL(list[j]);
  275.        if(!scriptUrl->local && scriptUrl->Protocol)
  276.          {
  277.           PrintMessage(Debug,"Script=%s",scriptUrl->name);
  278.           more+=request_url(scriptUrl,NULL,Url);
  279.          }
  280.        FreeURL(scriptUrl);
  281.       }
  282.  /* Any Objects. */
  283.  if(objects && (list=GetReferences(RefObject)))
  284.     for(j=0;list[j];j++)
  285.       {
  286.        URL *objectUrl=SplitURL(list[j]);
  287.        if(!objectUrl->local && objectUrl->Protocol)
  288.          {
  289.           PrintMessage(Debug,"Object=%s",objectUrl->name);
  290.           more+=request_url(objectUrl,NULL,Url);
  291.          }
  292.        FreeURL(objectUrl);
  293.       }
  294.  if(new && objects && (list=GetReferences(RefInlineObject)))
  295.     for(j=0;list[j];j++)
  296.       {
  297.        URL *objectUrl=SplitURL(list[j]);
  298.        if(!objectUrl->local && objectUrl->Protocol)
  299.          {
  300.           char *refresh=CreateRefreshPath(recursive_depth,recursive_mode,
  301.                                           force,stylesheets,images,frames,scripts,objects);
  302.           PrintMessage(Debug,"InlineObject=%s",objectUrl->name);
  303.           more+=request_url(objectUrl,refresh,Url);
  304.          }
  305.        FreeURL(objectUrl);
  306.       }
  307.  /* Any links */
  308.  if(recursive_depth && (list=GetReferences(RefLink)))
  309.     for(j=0;list[j];j++)
  310.       {
  311.        URL *linkUrl=SplitURL(list[j]);
  312.        if(!linkUrl->local && linkUrl->Protocol)
  313.          {
  314.           int get=1;
  315.           if(recursive_mode!=3)
  316.             {
  317.              if(strcmp(Url->host,linkUrl->host))
  318.                 get=0;
  319.              else
  320.                 if(recursive_mode!=2)
  321.                   {
  322.                    char *end=Url->path+strlen(Url->path);
  323.                    while(end>Url->path)
  324.                       if(*end=='/')
  325.                          break;
  326.                       else
  327.                          end--;
  328.                    if(*end)
  329.                       *++end=0;
  330.                    if(end!=Url->path && strncmp(Url->path,linkUrl->path,end-Url->path))
  331.                       get=0;
  332.                   }
  333.             }
  334.           if(get)
  335.             {
  336.              char *refresh=CreateRefreshPath(recursive_depth-1,recursive_mode,force,
  337.                                              stylesheets,images,frames,scripts,objects);
  338.              PrintMessage(Debug,"Link=%s",linkUrl->name);
  339.              more+=request_url(linkUrl,refresh,Url);
  340.             }
  341.          }
  342.        FreeURL(linkUrl);
  343.       }
  344.  return(more);
  345. }
  346. /*++++++++++++++++++++++++++++++++++++++
  347.   Fetch the relocated page.
  348.   int RecurseFetchRelocation Returns 1 if there are more to be fetched.
  349.   URL *Url The URL that was fetched.
  350.   char *location The new location of the URL.
  351.   ++++++++++++++++++++++++++++++++++++++*/
  352. int RecurseFetchRelocation(URL *Url,char *location)
  353. {
  354.  int more=0;
  355.  URL *locationUrl=SplitURL(location);
  356.  if(!locationUrl->local && locationUrl->Protocol)
  357.    {
  358.     char *refresh=NULL;
  359.     int get=1;
  360.     if(recursive)
  361.       {
  362.        if(recursive_mode!=3)
  363.          {
  364.           if(strcmp(Url->host,locationUrl->host))
  365.              get=0;
  366.           else
  367.              if(recursive_mode!=2)
  368.                {
  369.                 char *end=Url->path+strlen(Url->path);
  370.                 while(end>Url->path)
  371.                    if(*end=='/')
  372.                       break;
  373.                    else
  374.                       end--;
  375.                 if(*end)
  376.                    *++end=0;
  377.                 if(end!=Url->path && strncmp(Url->path,locationUrl->path,end-Url->path))
  378.                    get=0;
  379.                }
  380.          }
  381.        if(get)
  382.           refresh=CreateRefreshPath(recursive_depth,recursive_mode,force,
  383.                                     stylesheets,images,frames,scripts,objects);
  384.       }
  385.     if(get)
  386.       {
  387.        PrintMessage(Debug,"Location=%s",locationUrl->name);
  388.        more+=request_url(locationUrl,refresh,Url);
  389.       }
  390.    }
  391.  return(more);
  392. }
  393. /*++++++++++++++++++++++++++++++++++++++
  394.   Make a request for a URL.
  395.   int request_url Returns 1 on success, else 0.
  396.   URL *Url The URL that was asked for.
  397.   char *refresh The URL path that is required for refresh information.
  398.   URL *refUrl The refering URL.
  399.   ++++++++++++++++++++++++++++++++++++++*/
  400. static int request_url(URL *Url,char *refresh,URL *refUrl)
  401. {
  402.  int retval=0;
  403.  if(recursive && IsNotGotRecursive(Url->proto,Url->host,Url->path,Url->args))
  404.     PrintMessage(Inform,"The server '%s://%s' and/or path '%s' is on the list not to get recursively.",Url->proto,Url->host,Url->path);
  405.  else if(IsNotGot(Url->proto,Url->host,Url->path,Url->args))
  406.     PrintMessage(Inform,"The server '%s://%s' and/or path '%s' is on the list not to get.",Url->proto,Url->host,Url->path);
  407.  else
  408.    {
  409.     int new_outgoing=OpenOutgoingSpoolFile(0);
  410.     if(new_outgoing==-1)
  411.        PrintMessage(Warning,"Cannot open the new outgoing request to write.");
  412.     else
  413.       {
  414.        URL *reqUrl;
  415.        Header *new_request_head;
  416.        char *head;
  417.        if(refUrl->pass && !strcmp(refUrl->host,Url->host))
  418.           AddURLPassword(Url,refUrl->user,refUrl->pass);
  419.        if(refresh)
  420.          {
  421.           char *url=(char*)malloc(strlen(Url->file)+strlen(refresh)+4);
  422.           strcpy(url,refresh);
  423.           strcat(url,"/?");
  424.           strcat(url,Url->file);
  425.           reqUrl=SplitURL(url);
  426.           free(url);
  427.          }
  428.        else
  429.           reqUrl=Url;
  430.        new_request_head=RequestURL(reqUrl,refUrl->name);
  431.        if(force)
  432.           AddToHeader(new_request_head,"Pragma","no-cache");
  433.        head=HeaderString(new_request_head);
  434.        write_string(new_outgoing,head);
  435.        CloseOutgoingSpoolFile(new_outgoing,reqUrl);
  436.        retval=1;
  437.        if(reqUrl!=Url)
  438.           free(reqUrl);
  439.        free(head);
  440.        FreeHeader(new_request_head);
  441.       }
  442.    }
  443.  return(retval);
  444. }
  445. /*++++++++++++++++++++++++++++++++++++++
  446.   Create a Path for doing a refresh with options.
  447.   char *CreateRefreshPath Returns a pointer to a static string.
  448.   ++++++++++++++++++++++++++++++++++++++*/
  449. char *CreateRefreshPath(int recursive_depth,int recursive_mode,int force,
  450.                         int stylesheets,int images,int frames,int scripts,int objects)
  451. {
  452.  static char refresh[64];
  453.  strcpy(refresh,"/refresh");
  454.  if(recursive_depth)
  455.    {
  456.     if(recursive_mode==1)
  457.        strcat(refresh,"-dir");
  458.     else if(recursive_mode==2)
  459.        strcat(refresh,"-host");
  460.     else /* recursive_mode==3 */
  461.        strcat(refresh,"-any");
  462.     sprintf(&refresh[strlen(refresh)],"-%d",recursive_depth);
  463.    }
  464.  if(force)
  465.     strcat(refresh,"-force");
  466.  if(stylesheets)
  467.     strcat(refresh,"-stylesheets");
  468.  if(images)
  469.     strcat(refresh,"-images");
  470.  if(frames)
  471.     strcat(refresh,"-frames");
  472.  if(scripts)
  473.     strcat(refresh,"-scripts");
  474.  if(objects)
  475.     strcat(refresh,"-objects");
  476.  if(!recursive_depth && !force && !stylesheets && !images && !frames && !scripts && !objects)
  477.     strcat(refresh,"-none");
  478.  return(refresh);
  479. }