merge.c
上传用户:qin5330
上传日期:2007-01-05
资源大小:114k
文件大小:23k
- /*
- ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
- ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
- **
- ** This program and library is free software; you can redistribute it and/or
- ** modify it under the terms of the GNU (Library) General Public License
- ** as published by the Free Software Foundation; either version 2
- ** of the License, or any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU (Library) General Public License for more details.
- **
- ** You should have received a copy of the GNU (Library) General Public License
- ** along with this program; if not, write to the Free Software
- ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- **-----------------------------------------------------------------
- ** Fixed the merge option -M
- ** G. Hill 3/7/97
- **
- ** Changed readindexline, mergeindexentries, printindexentry and
- ** added marknumMerge, addtoresultlistMerge, markentrylistMerge,
- ** ismarkedMerge to add support for METADATA
- ** G. Hill 3/26/97 ghill@library.berkeley.edu
- */
- #include "swish.h"
- #include "merge.h"
- #include "error.h"
- #include "search.h"
- #include "index.h"
- #include "string.h"
- #include "hash.h"
- #include "mem.h"
- #include "docprop.h"
- /* The main merge functions - it accepts three file names.
- ** This is a bit hairy. It basically acts as a zipper,
- ** zipping up both index files into one.
- */
- void readmerge(file1, file2, outfile)
- char *file1;
- char *file2;
- char *outfile;
- {
- int i, j, indexfilenum1, indexfilenum2, result, totalfiles,
- skipwords, skipfiles;
- long limit1, limit2, fileinfo1, fileinfo2, offsetstart;
- char line[MAXSTRLEN];
- struct indexentry *ip1, *ip2, *ip3;
- struct indexentry *buffer1, *buffer2;
- FILE *fp1, *fp2, *fp3;
- struct metaMergeEntry *metaFile1, *metaFile2;
- int firstTime = 1;
-
- initindexfilehashlist();
-
- /* remapVar is used into addindexfilelist and need to be
- ** initialized each time two indexes are merged.
- */
- remapVar = 0;
- metaFile1 = metaFile2 = NULL;
-
- initmapentrylist();
-
- if ((fp1 = openIndexFileForRead(file1)) == NULL) {
- sprintf(errorstr, "Couldn't read the index file "%s".",
- file1);
- progerr(errorstr);
- }
- if (!isokindexheader(fp1)) {
- sprintf(errorstr, ""%s" has an unknown format.",
- file1);
- progerr(errorstr);
- }
- if ((fp2 = openIndexFileForRead(file2)) == NULL) {
- sprintf(errorstr, "Couldn't read the index file "%s".",
- file2);
- progerr(errorstr);
- }
- if (!isokindexheader(fp2)) {
- sprintf(errorstr, ""%s" has an unknown format.",
- file2);
- progerr(errorstr);
- }
-
- /* Was stemming applied to both indexes? If so, we want
- * the new index to be marked as "stemming", otherwise
- * it will be marked as "non-stemming" */
- applyStemmingRules = wasStemmingAppliedToIndex(fp1) && wasStemmingAppliedToIndex(fp2);
- ip1 = ip2 = ip3 = NULL;
- buffer1 = buffer2 = NULL;
- if (verbose)
- printf("Counting files... ");
- indexfilenum1 = getindexfilenum(fp1);
- indexfilenum2 = getindexfilenum(fp2);
- totalfiles = indexfilenum1 + indexfilenum2;
- if (verbose) {
- printf("%d files.n", indexfilenum1 + indexfilenum2);
- printf("Reading stopwords...");
- }
- readoffsets(fp1);
- readstopwords(fp1);
- limit1 = offsets[STOPWORDPOS];
- fileinfo1 = offsets[FILELISTPOS];
- metaFile1 = readMergeMeta(metaFile1,fp1);
-
- readoffsets(fp2);
- readstopwords(fp2);
- limit2 = offsets[STOPWORDPOS];
- fileinfo2 = offsets[FILELISTPOS];
- metaFile2 = readMergeMeta(metaFile2,fp2);
-
- /* Create the merged list and modify the
- individual ones with the new meta index
- */
- metaEntryList = createMetaMerge(metaFile1, metaFile2);
-
- if (verbose)
- printf("nReading file info...");
- fseek(fp1, fileinfo1, 0);
- for (i = 1; i <= indexfilenum1; i++) {
- /* Keep into account that 128th files are skipped */
- /*ii = i + (i/128); */
- /* addindexfilelist(ii, line, &totalfiles); */
- struct docPropertyEntry *docProperties;
- readFileEntry(fp1, line, &docProperties);
- addindexfilelist(i, line, docProperties, &totalfiles);
- #ifdef SUPPORT_DOC_PROPERTIES
- /* swap metaName values for properties */
- swapDocPropertyMetaNames(docProperties, metaFile1);
- #endif
- }
- fseek(fp2, fileinfo2, 0);
- for (i = 1; i <= indexfilenum2; i++) {
- /* keep into account that 128th files are skipped */
- /*ii = i + (i / 128) + indexfilenum1 + (indexfilenum1 / 128);*/
- /* This one is to keep in account the ones skipped earlier */
- /*if (!(ii%128))*/
- /* ii++;*/
- /*addindexfilelist(ii, line, &totalfiles); */
- struct docPropertyEntry *docProperties;
- readFileEntry(fp2, line, &docProperties);
- addindexfilelist(i + indexfilenum1, line, docProperties, &totalfiles);
- #ifdef SUPPORT_DOC_PROPERTIES
- /* swap metaName values for properties */
- swapDocPropertyMetaNames(docProperties, metaFile2);
- #endif
- }
-
- if ((fp3 = openIndexFileForWrite(outfile)) == NULL) {
- sprintf(errorstr,
- "Couldn't write the merged index file "%s".",
- outfile);
- progerr(errorstr);
- }
-
- if (verbose)
- printf("nMerging words... ");
-
- printheader(fp3, outfile, 0, totalfiles,1);
-
- offsetstart = ftell(fp3);
- for (i = 0; i < MAXCHARS; i++)
- fprintf(fp3, "%016li", offsets[i]);
- fputc('n', fp3);
-
- readoffsets(fp1);
- readoffsets(fp2);
-
- for (i = 0; i < MAXCHARS; i++)
- offsets[i] = 0;
-
- skipwords = 0;
- while (1) {
- if (buffer1 == NULL) {
- ip1 = (struct indexentry *)
- readindexline(fp1, limit1,metaFile1);
- if (ip1 == NULL) {
- if (ip2 == NULL && !firstTime) {
- break;
- }
- }
- buffer1 = ip1;
- }
- firstTime =0;
- if (buffer2 == NULL) {
- ip2 = (struct indexentry *)
- readindexline(fp2, limit2,metaFile2);
- if (ip2 == NULL){
- if (ip1 == NULL) {
- break;
- }
- }
- else
- addfilenums(ip2, indexfilenum1);
- buffer2 = ip2;
- }
- if (ip1 == NULL)
- result = 1;
- else if (ip2 == NULL)
- result = -1;
- else
- result = wordcompare(ip1->word, ip2->word);
- if (!result) {
- ip3 = (struct indexentry *) mergeindexentries(ip1, ip2);
- printindexentry(ip3, fp3);
- freeindexentry(ip1);
- freeindexentry(ip2);
- freeindexentry(ip3);
- buffer1 = buffer2 = NULL;
- skipwords++;
- }
- else if (result < 0) {
- printindexentry(ip1, fp3);
- freeindexentry(ip1);
- buffer1 = NULL;
- }
- else {
- printindexentry(ip2, fp3);
- freeindexentry(ip2);
- buffer2 = NULL;
- }
- }
-
- if (verbose) {
- if (skipwords)
- printf("%d redundant word%s.", skipwords,
- (skipwords == 1) ? "" : "s");
- else
- printf("no redundant words.");
- }
-
- printstopwords(fp3);
-
- if (verbose)
- printf("nMerging file info... ");
-
- offsets[FILELISTPOS] = ftell(fp3);
- for (i = j = 1; i <= indexfilenum1 + indexfilenum2; i++)
- {
- if (getmap(i) == j) {
- struct docPropertyEntry* docProperties;
- char* fileInfo;
- addtofilehashlist(j++ - 1, ftell(fp3));
- fileInfo = lookupindexfilenum(i, &docProperties);
- fprintf(fp3, "%s", fileInfo);
- #ifdef SUPPORT_DOC_PROPERTIES
- storeDocProperties(docProperties, fp3);
- #endif
- }
- }
-
- skipfiles = (indexfilenum1 + indexfilenum2) - totalfiles;
- if (verbose) {
- if (skipfiles)
- printf("%d redundant file%s.", skipfiles,
- (skipfiles == 1) ? "" : "s");
- else
- printf("no redundant files.");
- }
- printfileoffsets(fp3);
- printMetaNames(fp3);
-
- fseek(fp3, offsetstart, 0);
- for (i = 0; i < MAXCHARS; i++)
- fprintf(fp3, "%016li", offsets[i]);
- fclose(fp3);
-
- fclose(fp1);
- fclose(fp2);
-
- if (verbose)
- printf("nDone.n");
- }
- /* Gets the number of files in an index file.
- */
- int getindexfilenum(fp)
- FILE *fp;
- {
- int i;
- char line[MAXSTRLEN];
-
- readoffsets(fp);
- fseek(fp, offsets[FILELISTPOS], 0);
-
- i = 0;
- while(ftell(fp) != offsets[FILEOFFSETPOS])
- {
- readFileEntry(fp, line, NULL);
- i++;
- }
-
- return i;
- }
- /* This adds an offset to the file numbers in a particular
- ** result list. For instance, file 1 has file numbers going from
- ** 1 to 10, but so does file 2, so I have to add 10 to all the
- ** file numbers in file 2 before merging.
- */
- void addfilenums(ip, num)
- struct indexentry *ip;
- int num;
- {
- struct resultMerge *rp;
-
- rp = ip->result;
- while (rp != NULL) {
- rp->filenum =
- encodefilenum(getmap(decodefilenum(rp->filenum) + num));
- rp = rp->next;
- }
- }
- /* This reads the next line in the index file and puts the results
- ** in a result structure.
- */
- struct indexentry *readindexline(fp, limit, metaFile)
- FILE *fp;
- long limit;
- struct metaMergeEntry *metaFile;
- {
- int i, c, x, countnum, rank, filenum, structure,metaName;
- char fileword[MAXWORDLEN];
- struct resultMerge *rp;
- struct indexentry *ip;
- struct metaMergeEntry* tmp;
-
- rp = NULL;
-
- if (limit == ftell(fp))
- return NULL;
- for (i = 0; (c = fgetc(fp)) != 0; ) {
- if (c == ':') {
- fileword[i] = ' ';
- break;
- }
- else
- fileword[i++] = c;
- }
-
- countnum = 1;
-
- ungetc(c, fp);
- while ((c = fgetc(fp)) != 0) {
- x = 0;
- do {
- c = fgetc(fp);
- if (c == 0)
- break;
- x *= 128;
- x += c & 127;
- } while (c & 128);
- if (c == 0)
- break;
- if (x) {
- if (countnum == 1) {
- filenum = x;
- countnum++;
- }
- else if (countnum == 2) {
- rank = x;
- countnum++;
- }
- else if (countnum == 3) {
- structure = x;
- countnum++;
- }
- else if (countnum ==4) {
- metaName = x;
- /*Need to modify metaName with new list*/
- for(tmp=metaFile;tmp;tmp=tmp->next) {
- if (tmp->oldIndex == metaName) {
- metaName = tmp->newIndex;
- break;
- }
- }
- rp = (struct resultMerge *)
- addtoresultlistMerge(rp, filenum,
- rank, structure,metaName);
- countnum = 1;
- }
- }
- }
-
- ip = (struct indexentry *) emalloc(sizeof(struct indexentry));
- ip->word = (char *) mystrdup(fileword);
- ip->result = rp;
-
- return ip;
- }
- /* This puts all the file info into a hash table so that it can
- ** be looked up by its pathname and filenumber. This is how
- ** we find redundant file information.
- */
- void addindexfilelist(num, info, docProperties, totalfiles)
- int num;
- char *info;
- struct docPropertyEntry *docProperties;
- int *totalfiles;
- {
- int i;
- unsigned hashval;
- char tmpstr[MAXSTRLEN], path[MAXSTRLEN];
- struct indexfileinfo *ip1, *ip2;
-
- strcpy(path, extractpath(info));
-
- i = lookupindexfilepath(path);
- if (i != -1) {
- *totalfiles = *totalfiles - 1;
- remap(num, i);
- return;
- }
-
- remap(num, remapVar + 1);
- remapVar++;
-
- ip1 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
- ip1->filenum = num;
- ip1->fileinfo = (char *) mystrdup(info);
- ip1->path = (char *) mystrdup(path);
- #ifdef SUPPORT_DOC_PROPERTIES
- ip1->docProperties = docProperties;
- #endif
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- ip1->next = indexfilehashlist[hashval];
- indexfilehashlist[hashval] = ip1;
-
- ip2 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
- ip2->filenum = num;
- ip2->fileinfo = (char *) mystrdup(info);
- ip2->path = (char *) mystrdup(path);
- #ifdef SUPPORT_DOC_PROPERTIES
- ip2->docProperties = docProperties; /* two pointers to the same list! - be careful */
- #endif
-
- hashval = bighash(path);
- ip2->next = indexfilehashlist[hashval];
- indexfilehashlist[hashval] = ip2;
- }
- /* This extracts the pathname information from the file information
- ** line as stored in the index file.
- */
- char *extractpath(s)
- char *s;
- {
- int i;
- static char path[MAXSTRLEN];
-
- for (i = 0; s[i] && s[i] != '"'; i++)
- path[i] = s[i];
- path[i - 1] = ' ';
- path[i] = ' ';
-
- return path;
- }
- /* This returns the file information corresponding to a file number.
- */
- char *lookupindexfilenum(num, docProperties)
- int num;
- struct docPropertyEntry** docProperties;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct indexfileinfo *ip;
-
- if (docProperties != NULL)
- *docProperties = NULL;
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- ip = indexfilehashlist[hashval];
-
- while (ip != NULL) {
- if (ip->filenum == num)
- {
- #ifdef SUPPORT_DOC_PROPERTIES
- if (docProperties != NULL)
- *docProperties = ip->docProperties;
- #endif
- return ip->fileinfo;
- }
- ip = ip->next;
- }
- return NULL;
- }
- /* This returns the file number corresponding to a pathname.
- */
- int lookupindexfilepath(path)
- char *path;
- {
- unsigned hashval;
- struct indexfileinfo *ip;
-
- hashval = bighash(path);
- ip = indexfilehashlist[hashval];
-
- while (ip != NULL) {
- if (!strcmp(ip->path, path))
- return ip->filenum;
- ip = ip->next;
- }
- return -1;
- }
- /* This simply concatenates two information lists that correspond
- ** to a word found in both index files.
- */
- struct indexentry *mergeindexentries(ip1, ip2)
- struct indexentry *ip1;
- struct indexentry *ip2;
- {
- struct resultMerge *newrp, *rp1, *rp2;
- struct indexentry *ep;
-
- rp1 = ip1->result;
- rp2 = ip2->result;
- newrp = NULL;
-
- while (rp1 != NULL) {
- newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
- rp1->filenum, rp1->rank, rp1->structure,rp1->metaName);
- rp1 = rp1->next;
- }
- while (rp2 != NULL) {
- newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
- rp2->filenum, rp2->rank, rp2->structure,rp2->metaName);
- rp2 = rp2->next;
- }
-
- ep = (struct indexentry *) emalloc(sizeof(struct indexentry));
- ep->word = (char *) mystrdup(ip1->word);
- ep->result = newrp;
-
- return ep;
- }
- /* This prints a new word entry into the merged index file,
- ** removing redundant file information as it goes along.
- */
- void printindexentry(ip, fp)
- struct indexentry *ip;
- FILE *fp;
- {
- int i, num, metaName;
- struct resultMerge *rp;
-
- for (i = 0; indexchars[i] != ' '; i++)
- {
- if ((ip->word)[0] == indexchars[i] && !offsets[i])
- offsets[i] = ftell(fp);
- }
- fprintf(fp, "%s:", ip->word);
- initmarkentrylistMerge();
- rp = ip->result;
- while (rp != NULL) {
- num = rp->filenum;
- metaName = rp->metaName;
- if (!ismarkedMerge(num,metaName)) {
- marknumMerge(num,metaName);
- compress(num, fp);
- compress(rp->rank, fp);
- compress(rp->structure, fp);
- compress(rp->metaName,fp);
- }
- rp = rp->next;
- }
- fputc(0, fp);
- }
- /* This associates a number with a new number.
- ** This function is used to remap file numbers from index
- ** files to a new merged index file.
- */
- void remap(oldnum, newnum)
- int oldnum;
- int newnum;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct mapentry *mp;
-
- mp = (struct mapentry *) emalloc(sizeof(struct mapentry));
- mp->oldnum = oldnum;
- mp->newnum = newnum;
-
- sprintf(tmpstr, "%d", oldnum);
- hashval = bighash(tmpstr);
- mp->next = mapentrylist[hashval];
- mapentrylist[hashval] = mp;
- }
- /* This retrieves the number associated with another.
- */
- int getmap(num)
- int num;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct mapentry *mp;
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- mp = mapentrylist[hashval];
-
- while (mp != NULL) {
- if (mp->oldnum == num)
- return mp->newnum;
- mp = mp->next;
- }
- return num;
- }
- /* This marks a number as having been printed.
- */
- void marknum(num)
- int num;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct markentry *mp;
-
- mp = (struct markentry *) emalloc(sizeof(struct markentry));
- mp->num = num;
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- mp->next = markentrylist[hashval];
- markentrylist[hashval] = mp;
- }
- /* Same thing but for merge only */
- void marknumMerge(num, metaName)
- int num;
- int metaName;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct markentryMerge *mp;
-
- mp = (struct markentryMerge *) emalloc(sizeof(struct markentryMerge));
- mp->num = num;
- mp->metaName = metaName;
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- mp->next = markentrylistMerge[hashval];
- markentrylistMerge[hashval] = mp;
- }
- /* Has a number been printed?
- */
- int ismarked(num)
- int num;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct markentry *mp;
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- mp = markentrylist[hashval];
-
- while (mp != NULL) {
- if (mp->num == num)
- return 1;
- mp = mp->next;
- }
- return 0;
- }
- int ismarkedMerge(num,metaName)
- int num;
- int metaName;
- {
- unsigned hashval;
- char tmpstr[MAXSTRLEN];
- struct markentryMerge *mp;
-
- sprintf(tmpstr, "%d", num);
- hashval = bighash(tmpstr);
- mp = markentrylistMerge[hashval];
-
- while (mp != NULL) {
- if ( (mp->num == num) && (mp->metaName == metaName) )
- return 1;
- mp = mp->next;
- }
- return 0;
- }
- /* Initialize the marking list.
- */
- void initmarkentrylist()
- {
- int i;
- struct markentry *mp;
-
- for (i = 0; i < BIGHASHSIZE; i++) {
- mp = markentrylist[i];
- if (mp != NULL)
- free(mp);
- markentrylist[i] = NULL;
- }
- }
- void initmarkentrylistMerge()
- {
- int i;
- struct markentryMerge *mp;
-
- for (i = 0; i < BIGHASHSIZE; i++) {
- mp = markentrylistMerge[i];
- if (mp != NULL)
- free(mp);
- markentrylistMerge[i] = NULL;
- }
- }
- /* Initialize the main file list.
- */
- void initindexfilehashlist()
- {
- int i;
- struct indexfileinfo *ip;
-
- for (i = 0; i < BIGHASHSIZE; i++) {
- ip = indexfilehashlist[i];
- if (ip != NULL)
- free(ip);
- indexfilehashlist[i] = NULL;
- }
- }
- /* Initialize the mapentrylist
- */
- void initmapentrylist()
- {
- int i;
- struct mapentry *ip;
-
- for (i = 0; i < BIGHASHSIZE; i++) {
- ip = mapentrylist[i];
- if (ip != NULL)
- free(ip);
- mapentrylist[i] = NULL;
- }
- }
- /* Frees up used index entries, my best attempt at memory management...
- ** I still have bytes leaking elsewhere...
- */
- void freeindexentry(ip)
- struct indexentry *ip;
- {
- struct resultMerge *rp, *oldp;
-
- free(ip->word);
- rp = ip->result;
- while (rp != NULL) {
- oldp = rp;
- rp = rp->next;
- free(oldp);
- }
- free(ip);
- }
- /* Translates a file number into something that can be compressed.
- */
- int encodefilenum(num)
- int num;
- {
- int i, j;
-
- for (i = j = 0; i != num; i++) {
- j++;
- if (!(j % 128))
- j++;
- }
- return j;
- }
- /* Translates a compressed file number into a correct file number.
- */
- int decodefilenum(num)
- int num;
- {
- int i, extra;
-
- for (i = 1, extra = 0; i < num; i++)
- if (!(i % 128)) {
- extra++;
- i++;
- }
- num -= extra;
-
- return num;
- }
- /* Similar to addtoresultlist, but also adding the meta name
- */
- struct resultMerge *addtoresultlistMerge(rp, filenum, rank,
- structure, metaName)
- struct resultMerge *rp;
- int filenum;
- int rank;
- int structure;
- int metaName;
- {
- struct resultMerge *newnode;
- static struct resultMerge *head;
-
- newnode = (struct resultMerge *) emalloc(sizeof(struct resultMerge));
- newnode->filenum = filenum;
- newnode->rank = rank;
- newnode->structure = structure;
- newnode->metaName = metaName;
- newnode->next = NULL;
-
- if (rp == NULL)
- rp = newnode;
- else
- head->next = newnode;
-
- head = newnode;
-
- return rp;
- }
- /* Reads the meta names from the index. Needs to be different from
- ** readMetaNames because needs to zero out the counter.
- */
- struct metaMergeEntry* readMergeMeta(metaFile,fp)
- struct metaMergeEntry* metaFile;
- FILE* fp;
- {
- int i, c, counter;
- char word[MAXWORDLEN];
-
- counter = 0;
- fseek(fp, offsets[METANAMEPOS], 0);
- for (i = 0; (c = fgetc(fp)) != 'n' && c != EOF; ){
- if (!isspace(c))
- word[i++] = c;
- else {
- int docPropStyle = 0;
- char* docPropStyleTmp;
- word[i] = ' ';
- /* parse the meta name style:
- * <name>"0 -> normal meta name [default]
- * <name>"1 -> doc property name
- * <name>"2 -> both
- */
- docPropStyleTmp = strrchr(word, '"');
- if (docPropStyleTmp != NULL)
- {
- *docPropStyleTmp++ = ' '; /* remove (and move past) quote */
- docPropStyle = atoi(docPropStyleTmp);
-
- }
- metaFile = addMetaMerge(metaFile,
- word,
- (docPropStyle > 0) ? 1 : 0,
- (docPropStyle == 1) ? 1 : 0,
- &counter);
- i = 0;
- }
- }
- return metaFile;
- }
- /* Adds an entry to the list of meta names for one index,
- ** setting the new index to 0 - it will then be set by
- ** createMetaMerge.
- */
- struct metaMergeEntry* addMetaMerge(metaFile, metaWord, isDocProp, isOnlyDocProp, counter)
- struct metaMergeEntry* metaFile;
- char* metaWord;
- int isDocProp, isOnlyDocProp;
- int* counter;
- {
- int i;
- struct metaMergeEntry* newEntry;
- struct metaMergeEntry* tmpEntry;
-
- if (*counter == 0)
- *counter = 2;
- else if ((*counter) == 1 || (!((*counter) % 128)) )
- (*counter)++;
- for( i=0; metaWord[i]; i++)
- metaWord[i] = tolower(metaWord[i]);
- newEntry = (struct metaMergeEntry*) emalloc(sizeof(struct metaMergeEntry));
- newEntry->metaName = (char*)mystrdup(metaWord);
- #ifdef SUPPORT_DOC_PROPERTIES
- newEntry->isDocProperty = isDocProp;
- newEntry->isOnlyDocProperty = isOnlyDocProp;
- #endif
- newEntry->oldIndex = (*counter)++;
- newEntry->newIndex = 0;
- newEntry->next = NULL;
- if (metaFile)
- {
- for(tmpEntry=metaFile;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
- ;
- tmpEntry->next = newEntry;
- }
- else
- metaFile = newEntry;
-
- return metaFile;
- }
- /* Creates a list of all the meta names in the indexes
- */
- struct metaEntry* createMetaMerge(metaFile1, metaFile2)
- struct metaMergeEntry* metaFile1;
- struct metaMergeEntry* metaFile2;
- {
- struct metaMergeEntry* tmpEntry;
- int counter;
-
- metaEntryList = NULL;
- counter = 0;
- for (tmpEntry=metaFile1;tmpEntry;tmpEntry=tmpEntry->next)
- metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
-
- for (tmpEntry=metaFile2;tmpEntry;tmpEntry=tmpEntry->next)
- metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
-
- return metaEntryList;
- }
- /* Adds an entry to the merged meta names list and changes the
- ** new index in the idividual file entry
- */
- struct metaEntry* addMetaMergeList(metaEntryList,metaFileEntry,count)
- struct metaEntry* metaEntryList;
- struct metaMergeEntry* metaFileEntry;
- int* count;
- {
- int i, wordExists, newIndex;
- struct metaEntry* newEntry;
- struct metaEntry* tmpEntry;
- struct metaEntry* last;
- char *metaWord, *compWord;
- int isOnlyDocProperty = 0;
- int isDocProperty = 0;
-
- wordExists = 0;
- if ((*count) == 0)
- *count = 2;
- else if ((*count) == 1 || (!((*count) % 128)) )
- (*count)++;
- metaWord = metaFileEntry->metaName;
- #ifdef SUPPORT_DOC_PROPERTIES
- isDocProperty = metaFileEntry->isDocProperty;
- isOnlyDocProperty = metaFileEntry->isOnlyDocProperty;
- #endif
-
- for( i=0; metaWord[i]; i++)
- metaWord[i] = tolower(metaWord[i]);
- if (metaEntryList)
- {
- for(tmpEntry=metaEntryList;tmpEntry;tmpEntry=tmpEntry->next)
- {
- if (tmpEntry->next == NULL)
- last = tmpEntry;
- compWord = tmpEntry->metaName;
- if (!strcmp(compWord,metaWord) )
- {
- wordExists = 1;
- newIndex = tmpEntry->index;
- /*
- * Keep the docProperties fields in synch.
- * The semantics we want for the metaEntry are:
- * isDocProperty = 1 if either index is using as PropertyName
- * isOnlyDocProperty = 1 if neither index is using as MetaName
- */
- if (isDocProperty) /* new entry is docProp, so assert it */
- {
- tmpEntry->isDocProperty = 1;
- }
- if (!isOnlyDocProperty) /* new entry is not *only* docProp, so unassert that */
- {
- tmpEntry->isOnlyDocProperty = 0;
- }
- break;
- }
- }
- if (wordExists)
- {
- metaFileEntry->newIndex = newIndex;
- }
- else
- {
- newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
- newEntry->metaName = (char*)mystrdup(metaWord);
- newEntry->index = *count;
- newEntry->next = NULL;
- #ifdef SUPPORT_DOC_PROPERTIES
- newEntry->isDocProperty = isDocProperty;
- newEntry->isOnlyDocProperty = isOnlyDocProperty;
- #endif
- metaFileEntry->newIndex = (*count)++;
- last->next = newEntry;
- }
- }
- else {
- newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
- newEntry->metaName = (char*)mystrdup(metaWord);
- newEntry->index = *count;
- newEntry->next = NULL;
- #ifdef SUPPORT_DOC_PROPERTIES
- newEntry->isDocProperty = isDocProperty; /* init */
- newEntry->isOnlyDocProperty = isOnlyDocProperty; /* init */
- #endif
- metaFileEntry->newIndex = (*count)++;
- metaEntryList = newEntry;
- }
- return metaEntryList;
- }
-