file.c
上传用户:qin5330
上传日期:2007-01-05
资源大小:114k
文件大小:12k
- /*
- ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
- ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
- **
- ** This program and library is free software; you can redistribute it and/or
- ** modify it under the terms of the GNU (Library) General Public License
- ** as published by the Free Software Foundation; either version 2
- ** of the License, or any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU (Library) General Public License for more details.
- **
- ** You should have received a copy of the GNU (Library) General Public License
- ** along with this program; if not, write to the Free Software
- ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- **-------------------------------------------------------------
- ** Changed getdefaults to allow metaNames in the user
- ** configuration file
- ** G.Hill 4/16/97 ghill@library.berkeley.edu
- */
- #include "swish.h"
- #include "file.h"
- #include "mem.h"
- #include "string.h"
- #include "error.h"
- #include "list.h"
- #include "hash.h"
- #include "index.h"
- /* Is a file a directory?
- */
- int isdirectory(path)
- char *path;
- {
- struct stat stbuf;
-
- if (stat(path, &stbuf))
- return 0;
- return ((stbuf.st_mode & S_IFMT) == S_IFDIR) ? 1 : 0;
- }
- /* Is a file a regular file?
- */
- int isfile(path)
- char *path;
- {
- struct stat stbuf;
-
- if (stat(path, &stbuf))
- return 0;
- return ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 1 : 0;
- }
- /* Is a file a link?
- */
- int islink(path)
- char *path;
- {
- #ifndef NO_SYMBOLIC_FILE_LINKS
- struct stat stbuf;
-
- if (lstat(path, &stbuf))
- return 0;
- return ((stbuf.st_mode & S_IFLNK) == S_IFLNK) ? 1 : 0;
- #else
- return 0;
- #endif
- }
- /* Get the size, in bytes, of a file.
- ** Return -1 if there's a problem.
- */
- int getsize(path)
- char *path;
- {
- struct stat stbuf;
-
- if (stat(path, &stbuf))
- return -1;
- return stbuf.st_size;
- }
- /* Add an entry to the metaEntryList with the given value and the
- ** appropriate index
- */
- void addMetaEntry(metaList, metaWord, isDocProp)
- struct metaEntry** metaList;
- char* metaWord;
- int isDocProp;
- {
- static int counter;
- int i;
- struct metaEntry* newEntry;
- struct metaEntry* tmpEntry;
-
- if (counter == 0)
- counter = 2;
- else if (counter == 1 || (!counter % 128) )
- counter++;
- for( i=0; metaWord[i]; i++)
- metaWord[i] = tolower(metaWord[i]);
- tmpEntry = *metaList;
- while (tmpEntry)
- {
- if (strcmp(tmpEntry->metaName, metaWord) == 0)
- {
- #ifdef SUPPORT_DOC_PROPERTIES
- /*
- * found a duplicate entry already in the list.
- * Since there are two different config tags that can
- * be used to get here (MetaNames and PropertyNames)
- * and that might be using the same Meta tag name,
- * we cannot assume that either one of these was
- * called first.
- * The semantics we want for the metaEntry are:
- * isDocProperty = 1 if in PropertyNames, else 0
- * isOnlyDocProperty = 1 if not in MetaNames, else 0
- */
- if (isDocProp)
- {
- /* this is a DocumentProperty tag */
- if (!tmpEntry->isDocProperty)
- {
- tmpEntry->isDocProperty = 1;
- }
- }
- else
- {
- /* this is a MetaName tag */
- if (tmpEntry->isDocProperty)
- {
- tmpEntry->isOnlyDocProperty = 0;
- }
- }
- #endif
- return;
- }
- tmpEntry = tmpEntry->next;
- }
- newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
- #ifdef SUPPORT_DOC_PROPERTIES
- /* isDocProp is true when we see the PropertyNames config tag */
- newEntry->isDocProperty = isDocProp;
- newEntry->isOnlyDocProperty = isDocProp;
- #endif
- newEntry->metaName = (char*)mystrdup(metaWord);
- newEntry->index = counter++;
- newEntry->next = NULL;
- if (*metaList)
- {
- for(tmpEntry=*metaList;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
- ;
- tmpEntry->next = newEntry;
- }
- else
- *metaList = newEntry;
-
- return;
- }
- /*
- * Some handy routines for parsing the Configuration File
- */
- int grabYesNoField(line, commandTag, yesNoValue)
- char* line;
- char* commandTag;
- int* yesNoValue;
- {
- char value[MAXSTRLEN];
- if (getconfvalue(line, commandTag, value) != NULL)
- {
- *yesNoValue = (lstrstr(value, "yes")) ? 1 : 0;
- return 1; /* matched commandTag */
- }
- return 0;
- }
- int grabStringValueField(line, commandTag, singleValue)
- char* line;
- char* commandTag;
- char* singleValue;
- {
- /* line must be "<commandTag> <stringValue>" */
- char value[MAXSTRLEN];
- if (getconfvalue(line, commandTag, value) != NULL)
- {
- strcpy(singleValue, value);
- return 1; /* matched commandTag */
- }
- return 0;
- }
- int grabIntValueField(line, commandTag, singleValue, dontToIt)
- char* line;
- char* commandTag;
- int* singleValue;
- int dontToIt;
- {
- char value[MAXSTRLEN];
- if (!grabStringValueField(line, commandTag, value))
- return 0;
-
- if ((value[0]) && (value[0] != 'n') && !dontToIt)
- {
- *singleValue = atoi(value);
- }
- return 1; /* matched commandTag */
- }
- int grabCmdOptionsMega(line,
- commandTag,
- listOfWords,
- gotAny, dontToIt)
- char* line;
- char* commandTag;
- struct swline **listOfWords;
- int* gotAny;
- int dontToIt;
- {
- /*
- * parse the line if it contains commandTag
- * (commandTag is not required to be the first token in the line)
- * Grab all of the words after commandTag and place them in the listOfWords.
- * If "gotAny" is not NULL then set it to 1 if we grabbed any words.
- * If dontDoIt is "1" then do not grab the words.
- * Line may be "<commandTag> <stringValue> .." but it could also
- * be "<other commands> <commandTag> <stringValue> .."
- */
- line = lstrstr(line, commandTag); /* includes main command tag? */
- if (line == NULL)
- return 0;
- line += strlen(commandTag);
-
- /* grab all words after the command tag */
- if (!dontToIt)
- {
- char value[MAXSTRLEN];
- int skiplen;
- while (1)
- {
- strcpy(value, getword(line, &skiplen));
- if (!skiplen | value[0] == ' ' || value[0] == 'n')
- {
- break;
- }
- else
- {
- line += skiplen;
- *listOfWords = (struct swline *) addswline(*listOfWords, value);
- if (gotAny)
- *gotAny = 1;
- }
- }
- }
- return 1;
- }
- int grabCmdOptions(line, commandTag, listOfWords)
- char* line;
- char* commandTag;
- struct swline **listOfWords;
- {
- return grabCmdOptionsMega(line, commandTag, listOfWords, NULL, 0);
- }
- /* Reads the configuration file and puts all the right options
- ** in the right variables and structures.
- */
- void getdefaults(conffile, hasdir, hasindex, plimit, flimit, hasverbose)
- char *conffile;
- int *hasdir;
- int *hasindex;
- long *plimit;
- long *flimit;
- int hasverbose;
- {
- int skiplen, gotdir, gotindex;
- char *c, line[MAXSTRLEN], value[MAXSTRLEN];
- FILE *fp;
- int linenumber = 0;
- int baddirective = 0;
-
- gotdir = gotindex = 0;
-
- if ((fp = fopen(conffile, "r")) == NULL ||
- !isfile(conffile) )
- {
- sprintf(errorstr, "Couldn't open the configuration file "%s".", conffile);
- progerr(errorstr);
- }
- while (fgets(line, MAXSTRLEN, fp) != NULL)
- {
- linenumber++;
- if (line[0] == '#' || line[0] == 'n')
- continue;
- if (grabCmdOptionsMega(line, "IndexDir", &dirlist, &gotdir, *hasdir)) {}
- else if (grabCmdOptions(line, "NoContents", &nocontentslist)) {}
- else if (grabCmdOptionsMega(line, "IndexFile", &indexlist, &gotindex, *hasindex)) {}
- else if (grabIntValueField(line, "IndexReport", &verbose, hasverbose)) {}
- else if (grabIntValueField(line, "MinWordLimit", &minwordlimit, 0)) {}
- else if (grabIntValueField(line, "IndexComments", &indexComments, 0)) {}
- else if (grabIntValueField(line, "MaxWordLimit", &maxwordlimit, 0)) {}
- else if (grabStringValueField(line, "WordCharacters", wordchars)) {}
- else if (grabStringValueField(line, "BeginCharacters", beginchars)) {}
- else if (grabStringValueField(line, "EndCharacters", endchars)) {}
- else if (grabStringValueField(line, "IgnoreLastChar", ignorelastchar)) {}
- else if (grabStringValueField(line, "IgnoreFirstChar", ignorefirstchar)) {}
- else if (grabCmdOptions(line, "ReplaceRules", &replacelist)) { checkReplaceList(); }
- else if (grabYesNoField(line, "FollowSymLinks", &followsymlinks)) {}
- else if (grabStringValueField(line, "IndexName", indexn)) {}
- else if (grabStringValueField(line, "IndexDescription", indexd)) {}
- else if (grabStringValueField(line, "IndexPointer", indexp)) {}
- else if (grabStringValueField(line, "IndexAdmin", indexa)) {}
- else if (grabYesNoField(line, "UseStemming", &applyStemmingRules)) {} /* 11/24/98 MG */
- else if (grabYesNoField(line, "IgnoreTotalWordCountWhenRanking", &ignoreTotalWordCountWhenRanking)) {} /* 11/24/98 MG */
- else if (c = (char *) lstrstr(line, "MetaNames"))
- {
- c += strlen("MetaNames");
- while (1)
- {
- strcpy(value, (char *) getword(c, &skiplen));
- if (!skiplen | value[0] == ' ' || value[0] == 'n')
- {
- break;
- }
- else
- {
- c += skiplen;
- addMetaEntry(&metaEntryList, value, 0);
- }
- }
- }
- #ifdef SUPPORT_DOC_PROPERTIES
- else if (c = (char *) lstrstr(line, "PropertyNames")) /* 11/24/98 MG */
- {
- c += strlen("PropertyNames");
- while (1)
- {
- strcpy(value, (char *) getword(c, &skiplen));
- if (!skiplen | value[0] == ' ' || value[0] == 'n')
- {
- break;
- }
- else
- {
- c += skiplen;
- addMetaEntry(&metaEntryList, value, 1); /* isDocProp = 1 */
- }
- }
- }
- #endif
- else if (c = (char *) lstrstr(line, "IgnoreWords")) {
- c += strlen("IgnoreWords");
- while (1) {
- strcpy(value, (char *) getword(c, &skiplen));
- if (!skiplen || value[0] == ' ' || value[0] == 'n')
- break;
- else {
- c += skiplen;
- if (lstrstr(value, "SwishDefault"))
- readdefaultstopwords();
- else
- addstophash(value);
- }
- }
- }
- else if ((c = (char *) lstrstr(line, "IgnoreLimit"))) {
- c += strlen("IgnoreLimit");
- strcpy(value, (char *) getword(c, &skiplen));
- if (!skiplen || value[0] == ' ' || value[0] == 'n')
- continue;
- else {
- c += skiplen;
- *plimit = atoi(value);
- }
- strcpy(value, (char *) getword(c, &skiplen));
- if (!skiplen || value[0] == ' ' || value[0] == 'n')
- continue;
- else {
- c += skiplen;
- *flimit = atoi(value);
- }
- }
- /* IndexVerbose is supported for backwards compatibility */
- else if (c = (char *) lstrstr(line, "IndexVerbose")) {
- c += strlen("IndexVerbose");
- strcpy(value, (char *) getword(c, &skiplen));
- verbose = (lstrstr(value, "yes")) ? 3 : 0;
- }
- else if (!parseconfline(line)) {
- printf("Bad directive on line #%d: %s", linenumber, line );
- baddirective = 1;
- }
- }
- fclose(fp);
-
- if (baddirective)
- exit(1);
- if (gotdir && !(*hasdir))
- *hasdir = 1;
- if (gotindex && !(*hasindex))
- *hasindex = 1;
- }
- /* Checks that all the regex in the replace list are correct */
- void checkReplaceList()
- {
- struct swline *tmpReplace;
- char rule[MAXSTRLEN], patt[MAXSTRLEN];
- regex_t re;
- int status;
-
- tmpReplace = replacelist;
- while (tmpReplace) {
- strcpy(rule,tmpReplace->line);
-
- /* If it is not replace, just do nothing */
- if (lstrstr(rule,"append") || lstrstr(rule,"prepend") ) {
- if (tmpReplace->next){
- tmpReplace = tmpReplace->next;
- }
- else
- return;
- }
- if (lstrstr(rule,"replace")) {
- tmpReplace = tmpReplace->next;
- strcpy(patt,tmpReplace->line);
- if (patt == NULL)
- return;
- status = regcomp(&re,patt, REG_EXTENDED);
- if (status != 0) {
- printf ("Illegal regular expression %sn", patt);
- exit(0);
- }
-
- if (tmpReplace->next)
- tmpReplace = tmpReplace->next;
- else
- return;
- }
- tmpReplace = tmpReplace->next;
- }
- }
- /* This is similar to the previous one, just kept separated because */
- /* of the different structure of the list */
- void checkListRegex (list)
- struct swline *list;
- {
- struct swline *tmpReplace;
- char patt[MAXSTRLEN];
- regex_t re;
- int status;
-
- tmpReplace = replacelist;
- while (tmpReplace) {
- strcpy(patt,tmpReplace->line);
- if (patt == NULL)
- return;
- status = regcomp(&re,patt, REG_EXTENDED);
- if (status != 0) {
- printf ("Illegal regular expression %sn", patt);
- exit(0);
- }
- tmpReplace = tmpReplace->next;
- }
- }/* end of checkListRegex */