搜索引擎

开发平台：
Perl

merge.c：源码内容
							/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-----------------------------------------------------------------
** Fixed the merge option -M
** G. Hill 3/7/97
**
** Changed readindexline, mergeindexentries, printindexentry and
** added marknumMerge, addtoresultlistMerge, markentrylistMerge,
** ismarkedMerge to add support for METADATA
** G. Hill 3/26/97 ghill@library.berkeley.edu
*/
#include "swish.h"
#include "merge.h"
#include "error.h"
#include "search.h"
#include "index.h"
#include "string.h"	
#include "hash.h"
#include "mem.h"
#include "docprop.h"
/* The main merge functions - it accepts three file names.
** This is a bit hairy. It basically acts as a zipper,
** zipping up both index files into one.
*/
void readmerge(file1, file2, outfile)
char *file1;
char *file2;
char *outfile;
{
	int i, j, indexfilenum1, indexfilenum2, result, totalfiles,
		skipwords, skipfiles;
	long limit1, limit2, fileinfo1, fileinfo2, offsetstart;
	char line[MAXSTRLEN];
	struct indexentry *ip1, *ip2, *ip3;
	struct indexentry *buffer1, *buffer2;
	FILE *fp1, *fp2, *fp3;
	struct metaMergeEntry *metaFile1, *metaFile2;
	int firstTime = 1;
	
	initindexfilehashlist();
	
	/* remapVar is used into addindexfilelist and need to be
	** initialized each time two indexes are merged.
	*/
	remapVar = 0;
	metaFile1 = metaFile2 = NULL;
	
	initmapentrylist();
	
	if ((fp1 = openIndexFileForRead(file1)) == NULL) {
		sprintf(errorstr, "Couldn't read the index file "%s".",
			file1);
		progerr(errorstr);
	}
	if (!isokindexheader(fp1)) {
		sprintf(errorstr, ""%s" has an unknown format.",
			file1);
		progerr(errorstr);
	}
	if ((fp2 = openIndexFileForRead(file2)) == NULL) {
		sprintf(errorstr, "Couldn't read the index file "%s".",
			file2);
		progerr(errorstr);
	}
	if (!isokindexheader(fp2)) {
		sprintf(errorstr, ""%s" has an unknown format.",
			file2);
		progerr(errorstr);
	}
	
	/* Was stemming applied to both indexes? If so, we want
	 * the new index to be marked as "stemming", otherwise
	 * it will be marked as "non-stemming" */
	applyStemmingRules = wasStemmingAppliedToIndex(fp1) && wasStemmingAppliedToIndex(fp2);
	ip1 = ip2 = ip3 = NULL;
	buffer1 = buffer2 = NULL;
	if (verbose)
		printf("Counting files... ");
	indexfilenum1 = getindexfilenum(fp1);
	indexfilenum2 = getindexfilenum(fp2);
	totalfiles = indexfilenum1 + indexfilenum2;
	if (verbose) {
		printf("%d files.n", indexfilenum1 + indexfilenum2);
		printf("Reading stopwords...");
	}
	readoffsets(fp1);
	readstopwords(fp1);
	limit1 = offsets[STOPWORDPOS];
	fileinfo1 = offsets[FILELISTPOS];
	metaFile1 = readMergeMeta(metaFile1,fp1);
	
	readoffsets(fp2);
	readstopwords(fp2);
	limit2 = offsets[STOPWORDPOS];
	fileinfo2 = offsets[FILELISTPOS];
	metaFile2 = readMergeMeta(metaFile2,fp2);
	
	/* Create the merged list and modify the
	   individual ones with the new meta index
	*/	   
	metaEntryList = createMetaMerge(metaFile1, metaFile2);
	
	if (verbose)
		printf("nReading file info...");
	fseek(fp1, fileinfo1, 0);
	for (i = 1; i <= indexfilenum1; i++) {
		/* Keep into account that 128th files are skipped */
		/*ii = i + (i/128); */
		/* addindexfilelist(ii, line, &totalfiles); */
		struct docPropertyEntry *docProperties;
		readFileEntry(fp1, line, &docProperties);
		addindexfilelist(i, line, docProperties, &totalfiles);
		#ifdef SUPPORT_DOC_PROPERTIES
		/* swap metaName values for properties */
		swapDocPropertyMetaNames(docProperties, metaFile1);
		#endif
	}
	fseek(fp2, fileinfo2, 0);
	for (i = 1; i <= indexfilenum2; i++) {
		/* keep into account that 128th files are skipped */
		/*ii = i + (i / 128) + indexfilenum1 + (indexfilenum1 / 128);*/
		/* This one is to keep in account the ones skipped earlier */
		/*if (!(ii%128))*/
		/*  ii++;*/
		/*addindexfilelist(ii, line, &totalfiles); */
		struct docPropertyEntry *docProperties;
		readFileEntry(fp2, line, &docProperties);
		addindexfilelist(i + indexfilenum1, line, docProperties, &totalfiles);
		#ifdef SUPPORT_DOC_PROPERTIES
		/* swap metaName values for properties */
		swapDocPropertyMetaNames(docProperties, metaFile2);
		#endif
	}
	
	if ((fp3 = openIndexFileForWrite(outfile)) == NULL) {
		sprintf(errorstr,
			"Couldn't write the merged index file "%s".",
			outfile);
		progerr(errorstr);
	}
	
	if (verbose)
		printf("nMerging words... ");
	
	printheader(fp3, outfile, 0, totalfiles,1);
	
	offsetstart = ftell(fp3);
	for (i = 0; i < MAXCHARS; i++)
		fprintf(fp3, "%016li", offsets[i]);
	fputc('n', fp3);
	
	readoffsets(fp1);
	readoffsets(fp2);
	
	for (i = 0; i < MAXCHARS; i++)
		offsets[i] = 0;
	
	skipwords = 0;
	while (1) {
		if (buffer1 == NULL) {
			ip1 = (struct indexentry *) 
				readindexline(fp1, limit1,metaFile1);
			if (ip1 == NULL) {
				if (ip2 == NULL && !firstTime) {
					break;
				}
			}
			buffer1 = ip1;
		}
		firstTime =0;
		if (buffer2 == NULL) {
			ip2 = (struct indexentry *) 
				readindexline(fp2, limit2,metaFile2);
			if (ip2 == NULL){
				if (ip1 == NULL) {
					break;
				}
			}
			else 
				addfilenums(ip2, indexfilenum1);
			buffer2 = ip2;
		}
		if (ip1 == NULL)
			result = 1;
		else if (ip2 == NULL)
			result = -1;
		else
			result = wordcompare(ip1->word, ip2->word);
		if (!result) {
			ip3 = (struct indexentry *) mergeindexentries(ip1, ip2);
			printindexentry(ip3, fp3);
			freeindexentry(ip1);
			freeindexentry(ip2);
			freeindexentry(ip3);
			buffer1 = buffer2 = NULL;
			skipwords++;
		}
		else if (result < 0) {
			printindexentry(ip1, fp3);
			freeindexentry(ip1);
			buffer1 = NULL;
		}
		else {
			printindexentry(ip2, fp3);
			freeindexentry(ip2);
			buffer2 = NULL;
		}
	}
	
	if (verbose) {
		if (skipwords)
			printf("%d redundant word%s.", skipwords,
			(skipwords == 1) ? "" : "s");
		else
			printf("no redundant words.");
	}
	
	printstopwords(fp3);
	
	if (verbose)
		printf("nMerging file info... ");
	
	offsets[FILELISTPOS] = ftell(fp3);
	for (i = j = 1; i <= indexfilenum1 + indexfilenum2; i++)
	{
		if (getmap(i) == j) {
			struct docPropertyEntry* docProperties;
			char* fileInfo;
			addtofilehashlist(j++ - 1, ftell(fp3));
			fileInfo = lookupindexfilenum(i, &docProperties);
			fprintf(fp3, "%s", fileInfo);
			#ifdef SUPPORT_DOC_PROPERTIES
			storeDocProperties(docProperties, fp3);
			#endif
		}
	}
	
	skipfiles = (indexfilenum1 + indexfilenum2) - totalfiles;
	if (verbose) {
		if (skipfiles)
			printf("%d redundant file%s.", skipfiles,
			(skipfiles == 1) ? "" : "s");
		else
			printf("no redundant files.");
	}
	printfileoffsets(fp3);
	printMetaNames(fp3);
	
	fseek(fp3, offsetstart, 0);
	for (i = 0; i < MAXCHARS; i++)
		fprintf(fp3, "%016li", offsets[i]);
	fclose(fp3);
	
	fclose(fp1);
	fclose(fp2);
	
	if (verbose)
		printf("nDone.n");
}
/* Gets the number of files in an index file.
*/
int getindexfilenum(fp)
FILE *fp;
{
	int i;
	char line[MAXSTRLEN];
	
	readoffsets(fp);
	fseek(fp, offsets[FILELISTPOS], 0);
	
	i = 0;
	while(ftell(fp) != offsets[FILEOFFSETPOS]) 
	{
		readFileEntry(fp, line, NULL);
		i++;
	}
	
	return i;
}
/* This adds an offset to the file numbers in a particular
** result list. For instance, file 1 has file numbers going from
** 1 to 10, but so does file 2, so I have to add 10 to all the
** file numbers in file 2 before merging.
*/
void addfilenums(ip, num)
struct indexentry *ip;
int num;
{
	struct resultMerge *rp;
	
	rp = ip->result;
	while (rp != NULL) {
		rp->filenum =
			encodefilenum(getmap(decodefilenum(rp->filenum) + num));
		rp = rp->next;
	}
}
/* This reads the next line in the index file and puts the results
** in a result structure.
*/
struct indexentry *readindexline(fp, limit, metaFile)
FILE *fp;
long limit;
struct metaMergeEntry *metaFile;
{
	int i, c, x, countnum, rank, filenum, structure,metaName;
	char fileword[MAXWORDLEN];
	struct resultMerge *rp;
	struct indexentry *ip;
	struct metaMergeEntry* tmp;
	
	rp = NULL;
	
	if (limit == ftell(fp))
		return NULL;
	for (i = 0; (c = fgetc(fp)) != 0; ) {
		if (c == ':') {
			fileword[i] = '';
			break;
		}
		else
			fileword[i++] = c;
	}
	
	countnum = 1;
	
	ungetc(c, fp);
	while ((c = fgetc(fp)) != 0) {
		x = 0;
		do {
			c = fgetc(fp);
			if (c == 0)
				break;
			x *= 128;
			x += c & 127;
		} while (c & 128);
		if (c == 0)
			break;
		if (x) {
			if (countnum == 1) {
				filenum = x;
				countnum++;
			}
			else if (countnum == 2) {
				rank = x;
				countnum++;
			}
			else if (countnum == 3) {
				structure = x;
				countnum++;
			}
			else if (countnum ==4) {
				metaName = x;
				/*Need to modify metaName with new list*/
				for(tmp=metaFile;tmp;tmp=tmp->next) {
					if (tmp->oldIndex == metaName) {
						metaName = tmp->newIndex;
						break;
					}
				}
				rp = (struct resultMerge *)
					addtoresultlistMerge(rp, filenum,
					rank, structure,metaName);
				countnum = 1;
			}
		}
	}
	
	ip = (struct indexentry *) emalloc(sizeof(struct indexentry));
	ip->word = (char *) mystrdup(fileword);
	ip->result = rp;
	
	return ip;
}
/* This puts all the file info into a hash table so that it can
** be looked up by its pathname and filenumber. This is how
** we find redundant file information.
*/
void addindexfilelist(num, info, docProperties, totalfiles)
int num;
char *info;
struct docPropertyEntry *docProperties;
int *totalfiles;
{
	int i;
	unsigned hashval;
	char tmpstr[MAXSTRLEN], path[MAXSTRLEN];
	struct indexfileinfo *ip1, *ip2;
	
	strcpy(path, extractpath(info));
	
	i = lookupindexfilepath(path);
	if (i != -1) {
		*totalfiles = *totalfiles - 1;
		remap(num, i);
		return;
	}
	
	remap(num, remapVar + 1);
	remapVar++;
	
	ip1 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
	ip1->filenum = num;
	ip1->fileinfo = (char *) mystrdup(info);
	ip1->path = (char *) mystrdup(path);
	#ifdef SUPPORT_DOC_PROPERTIES
	ip1->docProperties = docProperties;
	#endif
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	ip1->next = indexfilehashlist[hashval];
	indexfilehashlist[hashval] = ip1;
	
	ip2 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
	ip2->filenum = num;
	ip2->fileinfo = (char *) mystrdup(info);
	ip2->path = (char *) mystrdup(path);
	#ifdef SUPPORT_DOC_PROPERTIES
	ip2->docProperties = docProperties; /* two pointers to the same list! - be careful */
	#endif
	
	hashval = bighash(path);
	ip2->next = indexfilehashlist[hashval];
	indexfilehashlist[hashval] = ip2;
}
/* This extracts the pathname information from the file information
** line as stored in the index file.
*/
char *extractpath(s)
char *s;
{
	int i;
	static char path[MAXSTRLEN];
	
	for (i = 0; s[i] && s[i] != '"'; i++)
		path[i] = s[i];
	path[i - 1] = '';
	path[i] = '';
	
	return path;
}
/* This returns the file information corresponding to a file number.
*/
char *lookupindexfilenum(num, docProperties)
int num;
struct docPropertyEntry** docProperties;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct indexfileinfo *ip;
	
	if (docProperties != NULL)
		*docProperties = NULL;
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	ip = indexfilehashlist[hashval];
	
	while (ip != NULL) {
		if (ip->filenum == num)
		{
			#ifdef SUPPORT_DOC_PROPERTIES
			if (docProperties != NULL)
				*docProperties = ip->docProperties;
			#endif
			return ip->fileinfo;
		}
		ip = ip->next;
	}
	return NULL;
}
/* This returns the file number corresponding to a pathname.
*/
int lookupindexfilepath(path)
char *path;
{
	unsigned hashval;
	struct indexfileinfo *ip;
	
	hashval = bighash(path);
	ip = indexfilehashlist[hashval];
	
	while (ip != NULL) {
		if (!strcmp(ip->path, path))
			return ip->filenum;
		ip = ip->next;
	}
	return -1;
}
/* This simply concatenates two information lists that correspond
** to a word found in both index files.
*/
struct indexentry *mergeindexentries(ip1, ip2)
struct indexentry *ip1;
struct indexentry *ip2;
{
	struct resultMerge *newrp, *rp1, *rp2;
	struct indexentry *ep;
	
	rp1 = ip1->result;
	rp2 = ip2->result;
	newrp = NULL;
	
	while (rp1 != NULL) {
		newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
			rp1->filenum, rp1->rank, rp1->structure,rp1->metaName);
		rp1 = rp1->next;
	}
	while (rp2 != NULL) {
		newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
			rp2->filenum, rp2->rank, rp2->structure,rp2->metaName);
		rp2 = rp2->next;
	}
	
	ep = (struct indexentry *) emalloc(sizeof(struct indexentry));
	ep->word = (char *) mystrdup(ip1->word);
	ep->result = newrp;
	
	return ep;
}
/* This prints a new word entry into the merged index file,
** removing redundant file information as it goes along.
*/
void printindexentry(ip, fp)
struct indexentry *ip;
FILE *fp;
{
	int i, num, metaName;
	struct resultMerge *rp;
	
	for (i = 0; indexchars[i] != ''; i++)
	{
		if ((ip->word)[0] == indexchars[i] && !offsets[i])
			offsets[i] = ftell(fp);
	}
	fprintf(fp, "%s:", ip->word);
	initmarkentrylistMerge();
	rp = ip->result;
	while (rp != NULL) {
		num = rp->filenum;
		metaName = rp->metaName;
		if (!ismarkedMerge(num,metaName)) {
			marknumMerge(num,metaName);
			compress(num, fp);
			compress(rp->rank, fp);
			compress(rp->structure, fp);
			compress(rp->metaName,fp);
		}
		rp = rp->next;
	}
	fputc(0, fp);
}
/* This associates a number with a new number.
** This function is used to remap file numbers from index
** files to a new merged index file.
*/
void remap(oldnum, newnum)
int oldnum;
int newnum;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct mapentry *mp;
	
	mp = (struct mapentry *) emalloc(sizeof(struct mapentry));
	mp->oldnum = oldnum;
	mp->newnum = newnum;
	
	sprintf(tmpstr, "%d", oldnum);
	hashval = bighash(tmpstr);
	mp->next = mapentrylist[hashval];
	mapentrylist[hashval] = mp;
}
/* This retrieves the number associated with another.
*/
int getmap(num)
int num;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct mapentry *mp;
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	mp = mapentrylist[hashval];
	
	while (mp != NULL) {
		if (mp->oldnum == num)
			return mp->newnum;
		mp = mp->next;
	}
	return num;
}
/* This marks a number as having been printed.
*/
void marknum(num)
int num;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct markentry *mp;
	
	mp = (struct markentry *) emalloc(sizeof(struct markentry));
	mp->num = num;
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	mp->next = markentrylist[hashval];
	markentrylist[hashval] = mp;
}
/* Same thing but for merge only */
void marknumMerge(num, metaName)
int num;
int metaName;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct markentryMerge *mp;
	
	mp = (struct markentryMerge *) emalloc(sizeof(struct markentryMerge));
	mp->num = num;
	mp->metaName = metaName;
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	mp->next = markentrylistMerge[hashval];
	markentrylistMerge[hashval] = mp;
}      
/* Has a number been printed?
*/
int ismarked(num)
int num;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct markentry *mp;
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	mp = markentrylist[hashval];
	
	while (mp != NULL) {
		if (mp->num == num)
			return 1;
		mp = mp->next;
	}
	return 0;
}
int ismarkedMerge(num,metaName)
int num;
int metaName;
{
	unsigned hashval;
	char tmpstr[MAXSTRLEN];
	struct markentryMerge *mp;
	
	sprintf(tmpstr, "%d", num);
	hashval = bighash(tmpstr);
	mp = markentrylistMerge[hashval];
	
	while (mp != NULL) {
		if ( (mp->num == num) && (mp->metaName == metaName) )
			return 1;
		mp = mp->next;
	}
	return 0;
}
/* Initialize the marking list.
*/
void initmarkentrylist()
{
	int i;
	struct markentry *mp;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		mp = markentrylist[i];
		if (mp != NULL)
			free(mp);
		markentrylist[i] = NULL;
	}
}
void initmarkentrylistMerge()
{
	int i;
	struct markentryMerge *mp;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		mp = markentrylistMerge[i];
		if (mp != NULL)
			free(mp);
		markentrylistMerge[i] = NULL;
	}
}      
/* Initialize the main file list.
*/
void initindexfilehashlist()
{
	int i;
	struct indexfileinfo *ip;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		ip = indexfilehashlist[i];
		if (ip != NULL)
			free(ip);
		indexfilehashlist[i] = NULL;
	}
}
/* Initialize the mapentrylist 
*/
void initmapentrylist()
{
	int i;
	struct mapentry *ip;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		ip = mapentrylist[i];
		if (ip != NULL)
			free(ip);
		mapentrylist[i] = NULL;
	}
}
/* Frees up used index entries, my best attempt at memory management...
** I still have bytes leaking elsewhere...
*/
void freeindexentry(ip)
struct indexentry *ip;
{
	struct resultMerge *rp, *oldp;
	
	free(ip->word);
	rp = ip->result;
	while (rp != NULL) {
		oldp = rp;
		rp = rp->next;
		free(oldp);
	}
	free(ip);
}
/* Translates a file number into something that can be compressed.
*/
int encodefilenum(num)
int num;
{
	int i, j;
	
	for (i = j = 0; i != num; i++) {
		j++;
		if (!(j % 128))
			j++;
	}
	return j;
}
/* Translates a compressed file number into a correct file number.
*/
int decodefilenum(num)
int num;
{
	int i, extra;
	
	for (i = 1, extra = 0; i < num; i++)
		if (!(i % 128)) {
		extra++;
		i++;
	}
	num -= extra;
	
	return num;
}
/* Similar to addtoresultlist, but also adding the meta name
*/
struct resultMerge *addtoresultlistMerge(rp, filenum, rank, 
										 structure, metaName)
										 struct resultMerge *rp;
int filenum;
int rank;
int structure;
int metaName;
{
	struct resultMerge *newnode;
	static struct resultMerge *head;
	
	newnode = (struct resultMerge *) emalloc(sizeof(struct resultMerge));
	newnode->filenum = filenum;
	newnode->rank = rank;
	newnode->structure = structure;
	newnode->metaName = metaName;
	newnode->next = NULL;
	
	if (rp == NULL)
		rp = newnode;
	else
		head->next = newnode;
	
	head = newnode;
	
	return rp;
}
/* Reads the meta names from the index. Needs to be different from
** readMetaNames because needs to zero out the counter.
*/
struct metaMergeEntry* readMergeMeta(metaFile,fp)
struct metaMergeEntry* metaFile;
FILE* fp;
{     
	int i, c, counter;
	char word[MAXWORDLEN];
	
	counter = 0;
	fseek(fp, offsets[METANAMEPOS], 0);
	for (i = 0; (c = fgetc(fp)) != 'n' && c != EOF; ){
		if (!isspace(c))
			word[i++] = c;
		else {
			int docPropStyle = 0;
			char* docPropStyleTmp;
			word[i] = '';
			/* parse the meta name style:
			* <name>"0   -> normal meta name [default]
			* <name>"1   -> doc property name
			* <name>"2   -> both
			*/
			docPropStyleTmp = strrchr(word, '"');	
			if (docPropStyleTmp != NULL)
			{
				*docPropStyleTmp++ = '';	/* remove (and move past) quote */
				docPropStyle = atoi(docPropStyleTmp);
				
			}
			metaFile = addMetaMerge(metaFile, 
									word, 
									(docPropStyle > 0) ? 1 : 0,
									(docPropStyle == 1) ? 1 : 0,
									&counter);
			i = 0;
		}
	}
	return metaFile;
}
/* Adds an entry to the list of meta names for one index, 
** setting the new index to 0 - it will then be set by 
** createMetaMerge.
*/
struct metaMergeEntry* addMetaMerge(metaFile, metaWord, isDocProp, isOnlyDocProp, counter)
struct metaMergeEntry* metaFile;
char* metaWord;
int isDocProp, isOnlyDocProp;
int* counter;
{
	int i;
	struct metaMergeEntry* newEntry;
	struct metaMergeEntry* tmpEntry;
	
	if (*counter == 0)
		*counter = 2;
	else if ((*counter) == 1 ||  (!((*counter) % 128)) )
		(*counter)++;
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);
	newEntry = (struct metaMergeEntry*) emalloc(sizeof(struct metaMergeEntry));
	newEntry->metaName = (char*)mystrdup(metaWord);
	#ifdef SUPPORT_DOC_PROPERTIES
	newEntry->isDocProperty = isDocProp;
	newEntry->isOnlyDocProperty = isOnlyDocProp;
	#endif
	newEntry->oldIndex = (*counter)++;
	newEntry->newIndex = 0;
	newEntry->next = NULL;
	if (metaFile)
    {
		for(tmpEntry=metaFile;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
			;
		tmpEntry->next = newEntry;
    }
	else
		metaFile = newEntry;
	
	return metaFile;
}
/* Creates a list of all the meta names in the indexes
*/
struct metaEntry* createMetaMerge(metaFile1, metaFile2)
struct metaMergeEntry* metaFile1;
struct metaMergeEntry* metaFile2;
{
	struct metaMergeEntry* tmpEntry;
	int counter;
	
	metaEntryList = NULL;
	counter = 0;
	for (tmpEntry=metaFile1;tmpEntry;tmpEntry=tmpEntry->next)
		metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
	
	for (tmpEntry=metaFile2;tmpEntry;tmpEntry=tmpEntry->next)
		metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
	
	return metaEntryList;
}
/* Adds an entry to the merged meta names list and changes the
 ** new index in the idividual file entry
 */
struct metaEntry* addMetaMergeList(metaEntryList,metaFileEntry,count)
struct metaEntry* metaEntryList;
struct metaMergeEntry* metaFileEntry;
int* count;
{
	int i, wordExists, newIndex;
	struct metaEntry* newEntry;
	struct metaEntry* tmpEntry;
	struct metaEntry* last;
	char *metaWord, *compWord;
	int isOnlyDocProperty = 0;
	int isDocProperty = 0;
	
	wordExists = 0;
	if ((*count) == 0)
		*count = 2;
	else if ((*count) == 1 ||  (!((*count) % 128)) )
		(*count)++;
	metaWord = metaFileEntry->metaName;
	#ifdef SUPPORT_DOC_PROPERTIES
	isDocProperty = metaFileEntry->isDocProperty;
	isOnlyDocProperty = metaFileEntry->isOnlyDocProperty;
	#endif
	
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);
	if (metaEntryList)
    {
		for(tmpEntry=metaEntryList;tmpEntry;tmpEntry=tmpEntry->next)
		{
			if (tmpEntry->next == NULL)
				last = tmpEntry;
			compWord = tmpEntry->metaName;
			if (!strcmp(compWord,metaWord) ) 
			{
				wordExists = 1;
				newIndex = tmpEntry->index;
				/*
				 * Keep the docProperties fields in synch.
				 * The semantics we want for the metaEntry are:
				 *	isDocProperty = 1 if either index is using as PropertyName
				 *	isOnlyDocProperty = 1 if neither index is using as MetaName
				 */
				if (isDocProperty)	/* new entry is docProp, so assert it */
				{
					tmpEntry->isDocProperty = 1;
				}
				if (!isOnlyDocProperty)	/* new entry is not *only* docProp, so unassert that */
				{
					tmpEntry->isOnlyDocProperty = 0;
				}
				break;
			}
		}
		if (wordExists)
		{
			metaFileEntry->newIndex = newIndex;
		}
		else 
		{
			newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
			newEntry->metaName = (char*)mystrdup(metaWord);
			newEntry->index = *count;
			newEntry->next = NULL;
			#ifdef SUPPORT_DOC_PROPERTIES
			newEntry->isDocProperty = isDocProperty;
			newEntry->isOnlyDocProperty = isOnlyDocProperty;
			#endif
			metaFileEntry->newIndex = (*count)++;
			last->next = newEntry;
		}
    }
	else {
		newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
		newEntry->metaName = (char*)mystrdup(metaWord);
		newEntry->index = *count;
		newEntry->next = NULL;
		#ifdef SUPPORT_DOC_PROPERTIES
		newEntry->isDocProperty = isDocProperty;	/* init */
		newEntry->isOnlyDocProperty = isOnlyDocProperty;	/* init */
		#endif
		metaFileEntry->newIndex = (*count)++;
		metaEntryList = newEntry;
	}
	return metaEntryList;
}