/* ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 ** ** This program and library is free software; you can redistribute it and/or ** modify it under the terms of the GNU (Library) General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU (Library) General Public License for more details. ** ** You should have received a copy of the GNU (Library) General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. **--------------------------------------------------------- ** Added addStopList to support printing of common words ** G. Hill 4/7/97 ghill@library.berkeley.edu ** ** change sprintf to snprintf to avoid corruption ** SRE 11/17/99 ** ** 04/00 - Jose Ruiz ** change hash for bighash in mergeresultlists for better performance ** when big searchs (a* or b* or c*) ** */ #include "swish.h" #include "string.h" #include "hash.h" #include "mem.h" #include "search.h" /* Hashes a string. */ unsigned hash(s) char *s; { unsigned hashval; for (hashval = 0; *s != '\0'; s++) hashval = (int) ((unsigned char) *s) + 31 * hashval; return hashval % HASHSIZE; } /* Hashes a string for a larger hash table. */ unsigned bighash(s) char *s; { unsigned hashval; for (hashval = 0; *s != '\0'; s++) hashval = (int) ((unsigned char) *s) + 31 * hashval; return hashval % BIGHASHSIZE; } /* Hashes a int. */ unsigned numhash(i) int i; { return i % HASHSIZE; } /* Hashes a int for a larger hash table. */ unsigned bignumhash(i) int i; { return i % BIGHASHSIZE; } /* Hashes a string for a larger hash table (for search). */ unsigned verybighash(s) char *s; { unsigned hashval; for (hashval = 0; *s != '\0'; s++) hashval = (int) ((unsigned char) *s) + 31 * hashval; return hashval % VERYBIGHASHSIZE; } /* Adds a stop word to the list of removed common words */ void addStopList(INDEXDATAHEADER *header, char *word) { char *arrayWord; if (isstopword(header, word)) return; /* Another BUG!! Jose Ruiz 04/00 The dimension of the array was not checked Fixed */ if (header->stopPos == header->stopMaxSize) { header->stopMaxSize += 100; if (!header->stopList) header->stopList = (char **) emalloc(header->stopMaxSize * sizeof(char *)); else header->stopList = (char **) erealloc(header->stopList, header->stopMaxSize * sizeof(char *)); } arrayWord = (char *) estrdup(word); header->stopList[header->stopPos++] = arrayWord; } /* Adds a stop word to a hash table. */ void addstophash(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; if (isstopword(header, word)) return; sp = (struct swline *) emalloc(sizeof(struct swline)); sp->line = (char *) estrdup(word); hashval = hash(word); sp->next = header->hashstoplist[hashval]; header->hashstoplist[hashval] = sp; } /* Sees if a word is a stop word by looking it up in the hash table. */ int isstopword(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; hashval = hash(word); sp = header->hashstoplist[hashval]; while (sp != NULL) { if (!strcmp(sp->line, word)) return 1; sp = sp->next; } return 0; } /* Adds a buzzword to a hash table.*/ void addbuzzwordhash(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; if (isbuzzword(header, word)) return; header->buzzwords_used_flag++; sp = (struct swline *) emalloc(sizeof(struct swline)); sp->line = (char *) estrdup(word); /* should buzzwords be case sensitive? */ strtolower( sp->line ); hashval = hash( sp->line ); sp->next = header->hashbuzzwordlist[hashval]; header->hashbuzzwordlist[hashval] = sp; } void freebuzzwordhash(INDEXDATAHEADER *header) { int i; struct swline *sp, *tmp; for (i = 0; i < HASHSIZE; i++) if (header->hashbuzzwordlist[i]) { sp = (struct swline *) header->hashbuzzwordlist[i]; while (sp) { tmp = sp->next; efree(sp->line); efree(sp); sp = tmp; } header->hashbuzzwordlist[i] = NULL; } } /* Sees if a word is a buzzword by looking it up in the hash table. */ int isbuzzword(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; hashval = hash(word); sp = header->hashbuzzwordlist[hashval]; while (sp != NULL) { if (!strcmp(sp->line, word)) return 1; sp = sp->next; } return 0; } void freestophash(INDEXDATAHEADER *header) { int i; struct swline *sp, *tmp; for (i = 0; i < HASHSIZE; i++) if (header->hashstoplist[i]) { sp = (struct swline *) header->hashstoplist[i]; while (sp) { tmp = sp->next; efree(sp->line); efree(sp); sp = tmp; } header->hashstoplist[i] = NULL; } } void freeStopList(INDEXDATAHEADER *header) { int i; for (i = 0; i < header->stopPos; i++) efree(header->stopList[i]); if (header->stopList) efree(header->stopList); header->stopList = NULL; header->stopPos = header->stopMaxSize = 0; } /* Adds a "use" word to a hash table. */ void addusehash(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; if (isuseword(header, word)) return; sp = (struct swline *) emalloc(sizeof(struct swline)); sp->line = (char *) estrdup(word); hashval = hash(word); sp->next = header->hashuselist[hashval]; header->hashuselist[hashval] = sp; } /* Sees if a word is a "use" word by looking it up in the hash table. */ int isuseword(INDEXDATAHEADER *header, char *word) { unsigned hashval; struct swline *sp; hashval = hash(word); sp = header->hashuselist[hashval]; while (sp != NULL) { if (!strcmp(sp->line, word)) return 1; sp = sp->next; } return 0; }