check.c
上传用户:qin5330
上传日期:2007-01-05
资源大小:114k
文件大小:3k
- /*
- ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
- ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
- **
- ** This program and library is free software; you can redistribute it and/or
- ** modify it under the terms of the GNU (Library) General Public License
- ** as published by the Free Software Foundation; either version 2
- ** of the License, or any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU (Library) General Public License for more details.
- **
- ** You should have received a copy of the GNU (Library) General Public License
- ** along with this program; if not, write to the Free Software
- ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- #include "swish.h"
- #include "check.h"
- #include "hash.h"
- /* Check if a file with a particular suffix should be indexed
- ** according to the settings in the configuration file.
- */
- /* Should a word be indexed? Consults the stopword hash list
- ** and checks if the word is of a reasonable length...
- ** If you have any good rules that can work with most languages,
- ** please let me know...
- */
- int isokword(word)
- char *word;
- {
- int i, same, hasnumber, hasvowel, hascons,
- numberrow, vowelrow, consrow, wordlen;
- char lastchar;
-
- if (word[0] == ' ')
- return 0;
-
- if (isstopword(word))
- return 0;
- wordlen = strlen(word);
- if ((wordlen < minwordlimit) || (wordlen > maxwordlimit))
- return 0;
-
- lastchar = ':';
- same = 0;
- hasnumber = hasvowel = hascons = 0;
- numberrow = vowelrow = consrow = 0;
- for (i = 0; word[i] != ' '; i++) {
- if (word[i] == lastchar) {
- same++;
- if (same > IGNORESAME)
- return 0;
- }
- else
- same = 0;
- if (isdigit(word[i])) {
- hasnumber = 1;
- numberrow++;
- if (numberrow > IGNOREROWN)
- return 0;
- vowelrow = 0;
- consrow = 0;
- }
- else if (isvowel(word[i])) {
- hasvowel = 1;
- vowelrow++;
- if (vowelrow > IGNOREROWV)
- return 0;
- numberrow = 0;
- consrow = 0;
- }
- else if (!ispunct(word[i])) {
- hascons = 1;
- consrow++;
- if (consrow > IGNOREROWC)
- return 0;
- numberrow = 0;
- vowelrow = 0;
- }
- lastchar = word[i];
- }
-
- if (IGNOREALLV)
- if (hasvowel && !hascons)
- return 0;
- if (IGNOREALLC)
- if (hascons && !hasvowel)
- return 0;
- if (IGNOREALLN)
- if (hasnumber && !hasvowel && !hascons)
- return 0;
-
- return 1;
- }
- /* Does a word have valid characters?
- */
- int hasokchars(word)
- char *word;
- {
- int i, j;
- char c;
-
- c = word[strlen(word) - 1];
- for (i = j = 0; beginchars[i] != ' '; i++)
- if (word[0] == beginchars[i])
- j++;
- if (!j)
- return 0;
- for (i = j = 0; endchars[i] != ' '; i++)
- if (c == endchars[i])
- j++;
- if (!j)
- return 0;
- for (i = 0; word[i] != ' '; i++)
- for (j = 0; wordchars[j] != ' '; j++)
- if (word[i] == wordchars[j])
- return 1;
- return 0;
- }
- /* Is a letter a vowel?
- */
- int isvowel(char c)
- {
- if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' ||
- c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
- return 1;
- return 0;
- }