搜索引擎

开发平台：
Perl

search.c：源码内容
							/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-----------------------------------------------------------------
** Changes in expandstar and parseterm to fix the wildcard * problem.
** G. Hill, ghill@library.berkeley.edu  3/11/97
**
** Changes in notresultlist, parseterm, and fixnot to fix the NOT problem
** G. Hill, ghill@library.berkeley.edu 3/13/97
**
** Changes in search, parseterm, fixnot, operate, getfileinfo
** to support METADATA
** G. Hill 3/18/97 ghill@library.berkeley.edu
**
** Change in search to allow for search with a list including
** also some empty indexes.
** G. Hill after a suggestion by J. Winstead 12/18/97
**
** Created countResults for number of hits in search
** G. Hill 12/18/97
*/
#include "swish.h"
#include "search.h"
#include "file.h"
#include "list.h"
#include "string.h"
#include "merge.h"
#include "hash.h"
#include "mem.h"
#include "docprop.h"
#include "stemmer.h"
/* The main search function.
** Parentheses are stripped out, things made lowercase,
** extra blanks removed, etc.
*/
void search(words, indexlist, structure)
char *words;
struct swline *indexlist;
int structure;
{
	int i, j, metaName, indexYes, totalResults;
	float num;
	char word[MAXWORDLEN];
	struct result *resultlist;
	struct sortresult *sortresultlist;
	struct swline *tmplist;
	FILE *fp;
#ifdef DEBUG
	struct swline *newp2;
#endif
#if IGNORE_STOPWORDS_IN_QUERY
    struct swline *pointer1, *pointer2;
#endif
	
	searchwordlist = NULL;
	metaName = 1;
	indexYes = 0;
	
	for (i = j = 0; words[i] != '' && words[i] != 'n'; i++) 
	{
		if (isspace(words[i]) || words[i] == '(' || words[i] == ')' || words[i] == '=') 
		{
			if (words[i] == '=')
			{
				if (j != 0)
				{
					if (words[i-1] != '\')
					{ 
						word[j] = '';
						searchwordlist = (struct swline *) addswline(searchwordlist, 
															(char *) convertentities(word));
						j = 0;
						searchwordlist = (struct swline *) addswline(searchwordlist, "=");
					}
					else
					{
						/* Needs to erase the '' */
						j--;
						word[j] = tolower(words[i]);
						j++;
					}
				}
				else
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, "=");
				}
			}
			else
			{
				if (j) 
				{
					word[j] = '';
					searchwordlist = (struct swline *) addswline(searchwordlist, 
														(char *) convertentities(word));
					j = 0;
				}
				if (words[i] == '(') 
				{
					searchwordlist = (struct swline *) addswline(searchwordlist, "(");
				}
				if (words[i] == ')') 
				{
					searchwordlist = (struct swline *)
						addswline(searchwordlist, ")");
				}
			}
		}
		else 
		{
			word[j] = tolower(words[i]);
			j++;
		}
	}
	if (j) 
	{
		word[j] = '';
		searchwordlist = (struct swline *) addswline(searchwordlist, 
											(char *) convertentities(word));
	}
	
	printf("%sn", INDEXHEADER);
	if (words[0] == '') 
	{
		printf("err: no search words specifiedn.n");
		exit(0);
	}
	while (indexlist != NULL) {
		
		commonerror = bigrank = 0;
		
		if ((fp = openIndexFileForRead(indexlist->line)) == NULL) {
			printf("# Name: unknown indexn");
			printf("err: could not open index filen.n");
			exit(0);
		}
		
		if (!isokindexheader(fp)) {
			printf("err: the index file format is unknownn.n");
			exit(0);
		}
		
		/* Was stemming applied to the index? If so, we want
		* to apply stemming to the search terms as well */
		applyStemmingRules = wasStemmingAppliedToIndex(fp);
		
		getheader(fp);
		
		if (!getindexfilenum(fp)) {
			indexlist = indexlist->next;
			continue;
		}
		else
		{ indexYes = 1; /*There is a non-empty index */ }
		
		readoffsets(fp);
		readstopwords(fp);
		readfileoffsets(fp);
		readMetaNames(fp);
		
#if IGNORE_STOPWORDS_IN_QUERY
		/* Added JM 1/10/98. */
		
		pointer1 = searchwordlist;
		pointer2 = searchwordlist->next;
		
		while (pointer1 != NULL &&
			isstopword(pointer1->line) && !isrule(pointer1->line)) {
			
			searchwordlist = pointer2;
			free(pointer1);
			pointer1 = pointer2;
			pointer2 = pointer2 ? pointer2->next : NULL;
		}
		if (pointer1 == NULL)
		{
			/* This query contained only stopwords! */
			printf("err: all search words too common to be usefuln.n");
			exit(0);
		}
		
		while (pointer2 != NULL) {
			if (isstopword(pointer2->line) &&  !isrule(pointer2->line)) {
				pointer1->next = pointer2->next;
				free(pointer2);
			}
			else {
				pointer1 = pointer1->next;
			}
			pointer2 = pointer2->next;
		}
#endif
		
		printf("# Search words:");
		tmplist = searchwordlist;
		while (tmplist != NULL) {
			printf(" %s", tmplist->line);
			tmplist = tmplist->next;
		}
		putchar('n');
		
		resultlist = NULL;
		tmplist = searchwordlist;
		tmplist = (struct swline *) fixnot(tmplist);
		searchwordlist = (struct swline *) expandstar(tmplist, fp);
#ifdef DEBUG
		newp2 = searchwordlist;
		while (newp2 != NULL) {
			printf("%s ", newp2->line);
			newp2 = newp2->next;
		}
		putchar('n');
#endif
		
		#ifdef SUPPORT_DOC_PROPERTIES
		initSearchResultProperties();
		#endif
		resultlist = (struct result *) parseterm(fp, 0, metaName);
		
		sortresultlist = NULL;
		while (resultlist != NULL) 
		{
			if (resultlist->structure & structure)
			{
				long propPos;
				char* fileInfo = lookupfile(resultlist->filenum, fp, &propPos);
				sortresultlist = (struct sortresult *)
								addsortresult(sortresultlist, resultlist->rank,
												fileInfo,
												propPos,
												resultlist->filenum);
			}
			resultlist = resultlist->next;
		}
		
		if (sortresultlist == NULL) {
			if (commonerror)
				printf("err: a word is too commonn");
			else
				printf("err: no resultsn");
		}
		else {
			if (bigrank)
				num = 1000.0f / (float) bigrank;
			else
				num = 1000.0f;
			totalResults = countResults(sortresultlist);
			printf("# Number of hits: %dn",totalResults);
			printsortedresults(sortresultlist, num, fp);
			
		}
		
		/* keep file open during printsortedresults() so that 
		* doc properties can be retrieved */
		fclose(fp);
		searchwordlist = tmplist;
		indexlist = indexlist->next;
		
	}
	if (!indexYes)
	{
		printf("err: the index file(s) is emptyn.n");
		exit(0);
	}
	printf(".n");
}
/* This puts parentheses in the right places around not structures
** so the parser can do its thing correctly.
** It does it both for 'not' and '='; the '=' is used for the METADATA (GH)
*/
struct swline *fixnot(sp)
struct swline *sp;
{
	int openparen, hasnot;
	int openMeta, hasMeta;
	struct swline *tmpp, *newp;
#ifdef DEBUG
	struct swline *newp2;
#endif
	
	tmpp = sp;
	newp = NULL;
	
	openparen = 0;
	openMeta = 0;
	hasMeta = 0;
	hasnot = 0;
	while (tmpp != NULL) {
		if ( ((tmpp->line)[0] == '(') && hasnot)
			openparen++;
		else if ( ((tmpp->line)[0] == '(') && hasMeta)
			openMeta++;
		else if ( ((tmpp->line)[0] == ')') && hasnot)
			openparen--;
		else if ( ((tmpp->line)[0] == ')') && hasMeta)
			openMeta--;
		if (isMetaName(tmpp->next)) {
			/* If it is a metaName add the name and = and skip to next */
			hasMeta = 1;
			newp = (struct swline *) addswline(newp, "(");
			newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, "=");
			tmpp = tmpp->next;
			tmpp = tmpp->next;
			continue;
		}
		if (!strcmp(tmpp->line, "not") ) {
			hasnot = 1;
			newp = (struct swline *) addswline(newp, "(");
		}
		else if (hasnot && !openparen) {
			hasnot = 0;
			newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, ")");
			tmpp = tmpp->next;
			continue;
		}
		else if (hasMeta && !openMeta) {
			hasMeta = 0;
			newp = (struct swline *) addswline(newp, tmpp->line);
			newp = (struct swline *) addswline(newp, ")");
			tmpp = tmpp->next;
			continue;
		}
		newp = (struct swline *) addswline(newp, tmpp->line);
		if (!strcmp(tmpp->line, "=") ) {
			hasMeta = 1;
			newp = (struct swline *) addswline(newp, "(");
		}
		tmpp = tmpp->next;
	}
	
#ifdef DEBUG
	newp2 = newp;
	while (newp2 != NULL) {
		printf("%s ", newp2->line);
		newp2 = newp2->next;
	}
	putchar('n');
#endif
	
	return newp;
}
/* Expands words with asterisks as wildcards into a series of
** "or" searches. Terms like "quick*" are expanded into
** "quicktime or quickly", etc.
*/
struct swline *expandstar(sp, fp)
struct swline *sp;
FILE *fp;
{
	int i, firsttime, gotstar;
	char foundword[MAXWORDLEN], searchword[MAXWORDLEN];
	struct swline *newp;
	
	newp = NULL;
	while (sp != NULL) {
		strcpy(searchword, sp->line);
		if (searchword[0] != '*' && strchr(searchword, '*')) {
			for (i = gotstar = 0; searchword[i]; i++)
				if (gotstar)
				searchword[i] = '';
			else if (searchword[i] == '*') {
				searchword[i] = '';
				gotstar = 1;
			}
			firsttime = 0;
			do {
				strcpy(foundword, getmatchword(searchword,
					fp, firsttime));
				if (strcmp(foundword, NOWORD)) {
					/* Add "(" if it is the first time */
					if (firsttime == 0) 
						newp = (struct swline *)
						addswline(newp, "(");
					if (firsttime)
						newp = (struct swline *)
						addswline(newp, "or");
					newp = (struct swline *)
						addswline(newp, foundword);
				}
				else {
					if (!firsttime)
						newp = (struct swline *)
						addswline(newp, NOWORD);
					else  /*Add ")" if last of many */
						newp = (struct swline *)
						addswline(newp, ")");
					break;
				}
				firsttime++;
			} while (strcmp(foundword, NOWORD));
		}
		else {
			newp = (struct swline *) addswline(newp,
				searchword);
		}
		sp = sp->next;
	}
	return newp;
}
/* If firsttime is 1, returns the first match to a beginnng of a word.
** Else if it's 0, returns the next match, until nothing is found,
** in which case NULL is returned.
*/
char *getmatchword(word, fp, firsttime)
char *word;
FILE *fp;
int firsttime;
{
	int i, c, found;
	char *d;
	static char fileword[MAXWORDLEN];
	
	if (!firsttime) {
		for (i = found = 0; indexchars[i] != ''; i++)
			if (word[0] == indexchars[i]) {
			fseek(fp, offsets[i], 0);
			found = 1;
		}
		if (!found)
			return NOWORD;
	}
	
	if (offsets[STOPWORDPOS] == ftell(fp))
		return NOWORD;
	for (i = 0; (c = fgetc(fp)) != 0; ) {
		if (c == ':') {
			fileword[i] = '';
			i = 0;
			while ((c = fgetc(fp)) != 0)
				;
			if (fileword[0] != word[0])
				return NOWORD;
			d = (char *) strstr(fileword, word);
			if (d != NULL && d == &fileword[0])
				return fileword;
			else {
				if (offsets[STOPWORDPOS] == ftell(fp))
					return NOWORD;
			}
		}
		else
			fileword[i++] = c;
	}
	return NOWORD;
}
/* Reads and prints the header of an index file.
*/
void getheader(fp)
FILE *fp;
{
	int c;
	char line[MAXSTRLEN];
	
	fgets(line, MAXSTRLEN, fp);
	while (1) {
		c = fgetc(fp);
		ungetc(c, fp);
		if (c == '#') {
			fgets(line, MAXSTRLEN, fp);
			printf("%s", line);
			continue;
		}
		else
			break;
	}
	fseek(fp, 0, 0);
}
/* Reads the offsets in the index file so word lookup is faster.
*/
void readoffsets(fp)
FILE *fp;
{
	int c, i, k;
	long j, num;
	
	for (i = 0; i < MAXCHARS; i++)
		offsets[i] = 0;
	
	fseek(fp, 0, 0);
	while (1) {
		c = fgetc(fp);
		if (c == '#') {
			do {
				c = fgetc(fp);
			} while (c && c != 'n');
			continue;
		}
		else
			break;
	}
	
	j = 0;
	while (c != EOF && c != 'n') {
		k = MAXLONGLEN;
		for (num = 0; c && isdigit(c) && k--; ) {
			num = (num * 10) + (c - '0');
			c = fgetc(fp);
		}
		offsets[j++] = num;
	}
}
/* Reads the stopwords in the index file.
*/
void readstopwords(fp)
FILE *fp;
{
	int i, c;
	char word[MAXWORDLEN];
	
	fseek(fp, offsets[STOPWORDPOS], 0);
	for (i = 0; (c = fgetc(fp)) != 'n' && c != EOF; )
		if (!isspace(c))
		word[i++] = c;
	else {
		word[i] = '';
		addstophash(word);
		i = 0;
	}
}
/* Reads the metaNames from the index
*/
void readMetaNames(fp)
FILE *fp;
{
	int i, c;
	char word[MAXWORDLEN];
	
	fseek(fp, offsets[METANAMEPOS], 0);
	for (i = 0; (c = fgetc(fp)) != 'n' && c != EOF; )
	{
		if (!isspace(c))
		{
			word[i++] = c;
		}
		else 
		{
			int docPropStyle = 0;
			char* docPropStyleTmp;
			word[i] = '';
			/* parse the meta name style:
			 * <name>"0   -> normal meta name [default]
			 * <name>"1   -> doc property name
			 * <name>"2   -> both
			 */
			docPropStyleTmp = strrchr(word, '"');	
			if (docPropStyleTmp != NULL)
			{
				*docPropStyleTmp++ = '';	/* remove (and move past) quote */
				docPropStyle = atoi(docPropStyleTmp);
			}
			/* add the meta tag, possible twice */
			if ((docPropStyle == 0) || (docPropStyle == 2))
				addMetaEntry(&metaEntryList, word, 0);	/* as metaName */
			if ((docPropStyle == 1) || (docPropStyle == 2))
				addMetaEntry(&metaEntryList, word, 1);	/* as docProp */
			i = 0;
		}
	}
}
/* Reads the file offset table in the index file.
*/
void readfileoffsets(fp)
FILE *fp;
{
	int j, k, c;
	long num;
	
	j = 0;
	fseek(fp, offsets[FILEOFFSETPOS], 0);
	c = fgetc(fp);
	while (c != EOF && c != 'n') {
		k = MAXLONGLEN;
		for (num = 0; c != EOF && isdigit(c) && k--; ) {
			num = (num * 10) + (c - '0');
			c = fgetc(fp);
		}
		addtofilehashlist(j++, num);
	}
}
/* The recursive parsing function.
** This was a headache to make but ended up being surprisingly easy. :)
** parseone tells the function to only operate on one word or term.
*/
struct result *parseterm(fp, parseone, metaName)
FILE *fp;
int parseone;
int metaName;
{
	int rulenum;
	char word[MAXWORDLEN];
	struct result *rp, *newrp;
	/*
	 * The andLevel is used to help keep the ranking function honest
	 * when it ANDs the results of the latest search term with
	 * the results so far (rp).  The idea is that if you AND three
	 * words together you ultimately want to resulting rank to
	 * be the average of all three individual work ranks. By keeping
	 * a running total of the number of terms already ANDed, the
	 * next AND operation can properly scale the average-rank-so-far
	 * and recompute the new average properly (see andresultlists()).
	 * This implementation is a little weak in that it will not average
	 * across terms that are in parenthesis. (It treats an () expression
	 * as one term, and weights it as "one".)
	 */
	int andLevel = 0;	/* number of terms ANDed so far */
	
	
	rp = NULL;
	
	rulenum = OR_RULE;
	while (searchwordlist != NULL) {
		strcpy(word, searchwordlist->line);
		
		if (rulenum == NO_RULE)
			rulenum = DEFAULT_RULE;
		if (isunaryrule(word)) {
			searchwordlist = searchwordlist->next;
			rp = (struct result *) parseterm(fp, 1, metaName);
			rp = (struct result *) notresultlist(rp, fp);
			/* Wild goose chase */
			rulenum = NO_RULE;
			continue;
		}
		else if (isbooleanrule(word)) {
			rulenum = getrulenum(word);
			searchwordlist = searchwordlist->next;
			continue;
		}
		
		if (rulenum != AND_RULE)
			andLevel = 0;	/* reset */
		else if (rulenum == AND_RULE)
			andLevel++;
		
		if (word[0] == '(') {
			
			searchwordlist = searchwordlist->next;
			newrp = (struct result *) parseterm(fp, 0, metaName);
			
			if (rulenum == AND_RULE)
				rp = (struct result *)
				andresultlists(rp, newrp, andLevel);
			else if (rulenum == OR_RULE)
				rp = (struct result *)
				orresultlists(rp, newrp);
			if (searchwordlist == NULL)
				break;
			
			rulenum = NO_RULE;
			continue;
			
		}
		else if (word[0] == ')') {
			searchwordlist = searchwordlist->next;
			break;
		}
		
		/* Check if the next word is '=' */
		if ( isMetaName(searchwordlist->next) ) {
			metaName = getMetaName(word);
			if (metaName == 1){
				printf ("err: The metaName %s doesn't exist in  user configfilen", word);
				exit(0);
			}
			/* Skip both the metaName end the '=' */
			searchwordlist = searchwordlist->next->next;
			newrp = (struct result *) parseterm(fp, 1, metaName);
			if (rulenum == AND_RULE)
				rp = (struct result *) andresultlists(rp, newrp, andLevel);
			else if (rulenum == OR_RULE)
				rp = (struct result *) orresultlists(rp, newrp);
			if (searchwordlist == NULL)
				break;
			
			rulenum = NO_RULE;
			metaName = 1;
			continue;
		}
		
		rp = (struct result *) operate(rp, rulenum, word, 
					       fp, metaName,
						   andLevel);
		
		if (parseone) {
			searchwordlist = searchwordlist->next;
			break;
		}
		rulenum = NO_RULE;
		
		searchwordlist = searchwordlist->next;
	}
	
	return rp;
}
/* Looks up a word in the index file -
** it calls getfileinfo(), which does the real searching.
*/
struct result *operate(rp, rulenum, word, fp, metaName, andLevel)
struct result *rp;
int rulenum;
char *word;
FILE *fp;
int metaName;
int andLevel;
{
	int i, found;
	struct result *newrp, *returnrp;
	
	if (applyStemmingRules)
	{
		/* apply stemming algorithm to the search term */
		Stem(word);
	}
	if (isstopword(word) && !isrule(word)) 
	{
		if (rulenum == OR_RULE && rp != NULL)
			return rp;
		else
			commonerror = 1;
	}
	
	for (i = found = 0; indexchars[i] != ''; i++)
	{
		if (word[0] == indexchars[i]) 
		{
			fseek(fp, offsets[i], 0);
			found = 1;
		}
	}
	if (!found) 
	{
		if (rulenum == AND_RULE)
			return NULL;
		else if (rulenum == OR_RULE)
			return rp;
	}
	
	newrp = (struct result *) getfileinfo(word, fp, metaName);
	if (rulenum == AND_RULE)
		returnrp = (struct result *) andresultlists(rp, newrp, andLevel);
	else if (rulenum == OR_RULE)
		returnrp = (struct result *) orresultlists(rp, newrp);
	else if (rulenum == NOT_RULE)
		returnrp = (struct result *) notresultlist(newrp, fp);
	return returnrp;
}
/* Looks up a file name in the index file.
*/
char *lookupfile(filenum, fp, propPos)
     int filenum;
     FILE *fp;
     long *propPos;
{
	static char line[MAXSTRLEN];
	
	fseek(fp, getfilenum(decodefilenum(filenum) - 1), 0);
	fgets(line, MAXSTRLEN, fp);
	
	#ifdef SUPPORT_DOC_PROPERTIES
	if (propPos != NULL)
		*propPos = ftell(fp);
	#endif
	
	return line;
}
/* Finds a word and returns its corresponding file and rank information list.
** If not found, NULL is returned.
*/
struct result *getfileinfo(word, fp, metaName)
char *word;
FILE *fp;
int metaName;
{
	int i, c, x, countnum, rank, filenum, structure;
	char fileword[MAXWORDLEN];
	struct result *rp;
	int res;
	
	rp = NULL;
	
	for (i = 0; (c = fgetc(fp)) != 0; ) {
		if (c == ':') {
			fileword[i] = '';
			i = 0;
			res = strcmp(word,fileword);
			if (!res)
				break;
			else if (res > 0){
				while ((c = fgetc(fp)) != 0)
					;
				if (offsets[STOPWORDPOS] == ftell(fp))
					return NULL;
				continue;
			}
			else if (res < 0)
				return NULL;
		}
		else
			fileword[i++] = c;
	}
	if (c == 0)
		return NULL;
	
	countnum = 1;
	
	ungetc(c, fp);
	while ((c = fgetc(fp)) != 0) 
	{
		x = 0;
		do {
			c = fgetc(fp);
			if (c == 0)
				return rp;
			x *= 128;
			x += c & 127;
		} while (c & 128);
		if (x) 
		{
			if (countnum == 1) {
				filenum = x;
				countnum++;
			}
			else if (countnum == 2) {
				rank = x;
				countnum++;
			}
			else if (countnum == 3) {
				structure = x;
				countnum++;
			}
			else if (countnum == 4) {
				if ( x == metaName )
				{
					rp = (struct result *) addtoresultlist(rp, filenum, rank, structure);
					if (verbose == 4)
					{
						/* dump diagnostic info */
						char* pos;
						char* fileinfo;
						long curFilePos;
						curFilePos = ftell(fp);	/* save */
						fileinfo = lookupfile(filenum, fp, NULL);
						pos = strchr(fileinfo, '"');	/* after file name */
						if (pos)
							*(pos-1) = '';	/* truncate */
						
						printf("# diagt%st%st%dn",
							fileinfo, 
							word, 
							rank);
						
						if (pos)
							*(pos-1) = ' ';	/* restore */
						fseek(fp, curFilePos, 0); /* restore */
					}
					
				}
				countnum = 1;
			}
		}
	}
	
	return rp;
}
/* Is a word a rule?
*/
int isrule(word)
char *word;
{
	if (!strcmp(word, "and") || !strcmp(word, "or") || !strcmp(word, "not"))
		return 1;
	else
		return 0;
}
/* Is a word a boolean rule?
*/
int isbooleanrule(word)
char *word;
{
	if (!strcmp(word, "and") || !strcmp(word, "or"))
		return 1;
	else
		return 0;
}
/* Is a word a unary rule?
*/
int isunaryrule(word)
char *word;
{
	if (!strcmp(word, "not"))
		return 1;
	else
		return 0;
}
/* Return the number for a rule.
*/
int getrulenum(word)
char *word;
{
	if (!strcmp(word, "and"))
		return AND_RULE;
	else if (!strcmp(word, "or"))
		return OR_RULE;
	else if (!strcmp(word, "not"))
		return NOT_RULE;
	return NO_RULE;
}
/* Takes two lists of results from searches and ANDs them together.
*/
struct result *andresultlists(r1, r2, andLevel)
     struct result *r1;
     struct result *r2;
     int andLevel;
{
	static struct result *tmpnode, *newnode;
	
	if (r1 == NULL || r2 == NULL)
		return NULL;
	
	newnode = NULL;
	if (andLevel < 1)
		andLevel = 1;
	
	while (r1 != NULL) {
		tmpnode = r2;
		while (tmpnode != NULL) {
			if (r1->filenum == tmpnode->filenum)
			{
				/*
				 * Computing the new rank is interesting because
				 * we want to weight each of the words that was
				 * previously ANDed equally along with the new word.
				 * We compute a running average using andLevel and
				 * simply scale up the old average (in r1->rank)
				 * and recompute a new, equally weighted average.
				 */
				int newRank;
				/*newRank = (r1->rank + tmpnode->rank) / 2;*/
				newRank = ((r1->rank * andLevel) + tmpnode->rank) / (andLevel+1);
				newnode = (struct result *)
					addtoresultlist(newnode, 
					r1->filenum,
					newRank,
					r1->structure & tmpnode->structure);
			}
			tmpnode = tmpnode->next;
		}
		r1 = r1->next;
	}
	
	return newnode;
}
/* Takes two lists of results from searches and ORs them together.
*/
struct result *orresultlists(r1, r2)
struct result *r1;
struct result *r2;
{
	int i;
	struct result *rp;
	static struct result *newnode;
	
	newnode = NULL;
	
	if (r1 == NULL)
		return r2;
	else if (r2 == NULL)
		return r1;
	
	initresulthashlist();
	while (r1 != NULL) {
		mergeresulthashlist(r1->filenum, r1->rank, r1->structure);
		r1 = r1->next;
	}
	while (r2 != NULL) {
		mergeresulthashlist(r2->filenum, r2->rank, r2->structure);
		r2 = r2->next;
	}
	for (i = 0; i < HASHSIZE; i++) {
		rp = resulthashlist[i];
		while (rp != NULL) {
			newnode = (struct result *) addtoresultlist(newnode,
				rp->filenum, rp->rank, rp->structure);
			rp = rp->next;
		}
	}
	
	return newnode;
}
/* This performs the NOT unary operation on a result list.
** NOTed files are marked with a default rank of 1000.
**
** Basically it returns all the files that have not been
** marked (GH)
*/
struct result *notresultlist(rp, fp)
struct result *rp;
FILE *fp;
{
	int i, filenums;
	struct result *newp;
	
	newp = NULL;
	
	initmarkentrylist();
	while (rp != NULL) {
		marknum(rp->filenum);
		rp = rp->next;
	}
	
	filenums = getindexfilenum(fp);
	
	for (i = 1; i <= filenums; i++) {
		if (!ismarked(i))
			newp = (struct result *) addtoresultlist(newp, i, 1000, IN_ALL);
	}
	
	return newp;
}
/* Adds a file number and rank to a list of results.
*/
struct result *addtoresultlist(rp, filenum, rank, structure)
struct result *rp;
int filenum;
int rank;
int structure;
{
	struct result *newnode;
	static struct result *head;
	
	newnode = (struct result *) emalloc(sizeof(struct result));
	newnode->filenum = filenum;
	newnode->rank = rank;
	newnode->structure = structure;
	newnode->next = NULL;
	
	if (rp == NULL)
		rp = newnode;
	else
		head->next = newnode;
	
	head = newnode;
	
	return rp;
}
/* Adds the results of a search, sorts them by rank.
*/
struct sortresult *addsortresult(sp, rank, fileinfo, propPos, filenum)
     struct sortresult *sp;
     int rank;
     char *fileinfo;
     long propPos;
     int filenum;
{
	if (rank > bigrank)
		bigrank = rank;
	
	if (sp == NULL) {
		char* endOfLinePos;
		sp = (struct sortresult *) emalloc(sizeof(struct sortresult));
		sp->rank = rank;
		sp->fileinfo = (char *) mystrdup(fileinfo);
		sp->left = sp->right = NULL;
		/* formatting search results is easier without the newline */
		endOfLinePos = strchr(sp->fileinfo, 'n');
		if (endOfLinePos)
			*endOfLinePos = '';
		#ifdef SUPPORT_DOC_PROPERTIES
		sp->propPos = propPos; /* allows later lookup of doc properties */
		#endif
	}
	else {
		if (sp->rank < rank)
			sp->left = (struct sortresult *) addsortresult(sp->left, rank, fileinfo, propPos, filenum);
		else
			sp->right = (struct sortresult *) addsortresult(sp->right, rank, fileinfo, propPos, filenum);
	}
	
	return sp;
}
/* Counts the number of files that are the result
   of a search
*/
int countResults(sp)
struct sortresult *sp;
{
	int tot;
	
	if (sp == NULL)
		return 0;
	else
    {
		tot = countResults(sp->right) + countResults(sp->left) + 1;
    }
	return tot;
}
/* Prints the final results of a search.
*/
void printsortedresults(sp, num, fp)
     struct sortresult *sp;
     double num;
     FILE* fp;
{
	int rank;
	
	if (sp != NULL) 
	{
		printsortedresults(sp->left, num, fp);
		rank = (int) ((float) sp->rank * num);
		if (rank >= 999)
			rank = 1000;
		if (maxhits) 
		{
			if (useCustomOutputDelimiter)
			{
				/* parse fileinfo into filename and title */
				char* filename;
				char* title;
				char* endOfTitle = NULL;
				char* fileSize;
				filename = sp->fileinfo;
				title = strchr(filename, '"');
				if (title == NULL)
				{
					title = "";
					fileSize = "0";
				}
				else
				{
					*(title-1) = '';	/* remove space between filename and title */
					title++;	/* past double quote */
					endOfTitle = strchr(title, '"');	/* end of title */
					if (endOfTitle)
					{
						*endOfTitle = '';
						fileSize = endOfTitle+1;
						while (*fileSize == ' ')
							fileSize++;
					}
					else
					{
						fileSize = "0";
					}
				}
				printf("%d%s%s%s%s%s%s", 
						(rank <= 0) ? 1 : rank, 
						customOutputDelimiter,
						filename,
						customOutputDelimiter,
						title,
						customOutputDelimiter,
						fileSize);
				if (*title)
				{
					/* restore fileinfo... */
					*(--title) = ' ';	/* restore space */
					if (endOfTitle)
						*endOfTitle = '"';
				}
			}
			else
			{
				printf("%d %s", (rank <= 0) ? 1 : rank, sp->fileinfo);
			}
			#ifdef SUPPORT_DOC_PROPERTIES
			printSearchResultProperties(sp->propPos, fp);
			#endif
			printf("n");
			
			if (maxhits > 0) 
			{
				maxhits--;
			}
			
		}
		printsortedresults(sp->right, num, fp);
	}
}
/* Reads a compressed line. This is just here for testing, etc.
*/
void getrawindexline(fp)
FILE *fp;
{
	int c, inword;
	
	inword = 1;
	while ((c = fgetc(fp)) != EOF) {
		if (c == ':' && inword)
			inword = 0;
		if (!inword) {
			do {
				c = fgetc(fp);
				if (c == 0)
					return;
			} while (c & 128);
		}
	}
}
/* Does an index file have a readable format?
*/
int isokindexheader(fp)
FILE *fp;
{
	char line[MAXSTRLEN];
	
	fseek(fp, 0, 0);
	fgets(line, MAXSTRLEN, fp);
	if (line[strlen(line) - 1] == 'n')
		line[strlen(line) - 1] = '';
	if (strcmp(line, INDEXHEADER)) {
		fseek(fp, 0, 0);
		return 0;
	}
	fseek(fp, 0, 0);
	return 1;
}
int wasStemmingAppliedToIndex(fp)
     FILE *fp;
{
	/* Check the header for the magic line "# Stemming Applied:"
	 * and see if stemming was applied.
	 * Return 1 if it was, 0 otherwise
	 */
	char line[MAXSTRLEN];
	int stemmingDone = 0;	/* assume no stemming */
	int hdrLen;
	hdrLen = strlen(STEMMINGHEADER);
	fseek(fp, 0, 0);
	fgets(line, MAXSTRLEN, fp);
	while (line[0] == '#')
	{
		if (strncmp(line, STEMMINGHEADER, hdrLen) == 0)
		{
			/* found the line. what does it say? */
			stemmingDone = atoi(line+hdrLen);
			break;
		}
		fgets(line, MAXSTRLEN, fp);
	}
	fseek(fp, 0, 0);
	return stemmingDone;
}
/* Returns the value associated with the metaName if it exists
*/
int getMetaName(word)
char * word;
{
	struct metaEntry* temp;
	
	for (temp = metaEntryList; temp != NULL; temp = temp->next) 
		if (!strcmp(temp->metaName, word))
			return temp->index;
	
	return 1;
}
/* Checks if the next word is "="
*/
int isMetaName (searchWord)
struct swline* searchWord;
{
	if (searchWord == NULL)
		return 0;
	if (!strcmp(searchWord->line, "=") )
		return 1;
	return 0;
}