swish-e/src/hash.c

/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**---------------------------------------------------------
** Added addStopList to support printing of common words
** G. Hill 4/7/97  ghill@library.berkeley.edu
**
** change sprintf to snprintf to avoid corruption
** SRE 11/17/99
**
** 04/00 - Jose Ruiz
** change hash for bighash in mergeresultlists for better performance
** when big searchs (a* or b* or c*)
**
*/

#include "swish.h"
#include "string.h"
#include "hash.h"
#include "mem.h"
#include "search.h"

/* Hashes a string.
*/

unsigned hash(s)
     char   *s;
{
    unsigned hashval;

    for (hashval = 0; *s != '\0'; s++)
        hashval = (int) ((unsigned char) *s) + 31 * hashval;
    return hashval % HASHSIZE;
}

/* Hashes a string for a larger hash table.
*/

unsigned bighash(s)
     char   *s;
{
    unsigned hashval;

    for (hashval = 0; *s != '\0'; s++)
        hashval = (int) ((unsigned char) *s) + 31 * hashval;
    return hashval % BIGHASHSIZE;
}

/* Hashes a int.
*/

unsigned numhash(i)
     int     i;
{
    return i % HASHSIZE;
}

/* Hashes a int for a larger hash table.
*/

unsigned bignumhash(i)
     int     i;
{
    return i % BIGHASHSIZE;
}

/* Hashes a string for a larger hash table (for search).
*/

unsigned verybighash(s)
     char   *s;
{
    unsigned hashval;

    for (hashval = 0; *s != '\0'; s++)
        hashval = (int) ((unsigned char) *s) + 31 * hashval;
    return hashval % VERYBIGHASHSIZE;
}


/* Adds a stop word to the list of removed common words */
void    addStopList(INDEXDATAHEADER *header, char *word)
{
    char   *arrayWord;

    if (isstopword(header, word))
        return;

    /* Another BUG!!  Jose Ruiz 04/00
       The dimension of the array was not checked 
       Fixed */
    if (header->stopPos == header->stopMaxSize)
    {
        header->stopMaxSize += 100;
        if (!header->stopList)
            header->stopList = (char **) emalloc(header->stopMaxSize * sizeof(char *));

        else
            header->stopList = (char **) erealloc(header->stopList, header->stopMaxSize * sizeof(char *));
    }
    arrayWord = (char *) estrdup(word);
    header->stopList[header->stopPos++] = arrayWord;
}


/* Adds a stop word to a hash table.
*/

void    addstophash(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    if (isstopword(header, word))
        return;

    sp = (struct swline *) emalloc(sizeof(struct swline));

    sp->line = (char *) estrdup(word);

    hashval = hash(word);
    sp->next = header->hashstoplist[hashval];
    header->hashstoplist[hashval] = sp;
}

/* Sees if a word is a stop word by looking it up in the hash table.
*/

int     isstopword(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    hashval = hash(word);
    sp = header->hashstoplist[hashval];

    while (sp != NULL)
    {
        if (!strcmp(sp->line, word))
            return 1;
        sp = sp->next;
    }
    return 0;
}


/* Adds a buzzword to a hash table.*/

void    addbuzzwordhash(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    if (isbuzzword(header, word))
        return;

    header->buzzwords_used_flag++;

    sp = (struct swline *) emalloc(sizeof(struct swline));

    sp->line = (char *) estrdup(word);

    
    /* should buzzwords be case sensitive? */
    strtolower( sp->line );

    hashval = hash( sp->line );

    
    sp->next = header->hashbuzzwordlist[hashval];
    header->hashbuzzwordlist[hashval] = sp;
}

void    freebuzzwordhash(INDEXDATAHEADER *header)
{
    int     i;
    struct swline *sp,
           *tmp;

    for (i = 0; i < HASHSIZE; i++)
        if (header->hashbuzzwordlist[i])
        {
            sp = (struct swline *) header->hashbuzzwordlist[i];
            while (sp)
            {
                tmp = sp->next;
                efree(sp->line);
                efree(sp);
                sp = tmp;
            }
            header->hashbuzzwordlist[i] = NULL;
        }
}


/* Sees if a word is a buzzword by looking it up in the hash table. */

int     isbuzzword(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    hashval = hash(word);
    sp = header->hashbuzzwordlist[hashval];

    while (sp != NULL)
    {
        if (!strcmp(sp->line, word))
            return 1;
        sp = sp->next;
    }
    return 0;
}


void    freestophash(INDEXDATAHEADER *header)
{
    int     i;
    struct swline *sp,
           *tmp;

    for (i = 0; i < HASHSIZE; i++)
        if (header->hashstoplist[i])
        {
            sp = (struct swline *) header->hashstoplist[i];
            while (sp)
            {
                tmp = sp->next;
                efree(sp->line);
                efree(sp);
                sp = tmp;
            }
            header->hashstoplist[i] = NULL;
        }
}

void    freeStopList(INDEXDATAHEADER *header)
{
    int     i;

    for (i = 0; i < header->stopPos; i++)
        efree(header->stopList[i]);
    if (header->stopList)
        efree(header->stopList);
    header->stopList = NULL;
    header->stopPos = header->stopMaxSize = 0;
}

/* Adds a "use" word to a hash table.
*/

void    addusehash(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    if (isuseword(header, word))
        return;

    sp = (struct swline *) emalloc(sizeof(struct swline));

    sp->line = (char *) estrdup(word);

    hashval = hash(word);
    sp->next = header->hashuselist[hashval];
    header->hashuselist[hashval] = sp;
}

/* Sees if a word is a "use" word by looking it up in the hash table.
*/

int     isuseword(INDEXDATAHEADER *header, char *word)
{
    unsigned hashval;
    struct swline *sp;

    hashval = hash(word);
    sp = header->hashuselist[hashval];

    while (sp != NULL)
    {
        if (!strcmp(sp->line, word))
            return 1;
        sp = sp->next;
    }
    return 0;
}
1	adcroft	1.1	/*
2			** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3			** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
4			**
5			** This program and library is free software; you can redistribute it and/or
6			** modify it under the terms of the GNU (Library) General Public License
7			** as published by the Free Software Foundation; either version 2
8			** of the License, or any later version.
9			**
10			** This program is distributed in the hope that it will be useful,
11			** but WITHOUT ANY WARRANTY; without even the implied warranty of
12			** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13			** GNU (Library) General Public License for more details.
14			**
15			** You should have received a copy of the GNU (Library) General Public License
16			** along with this program; if not, write to the Free Software
17			** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18			**---------------------------------------------------------
19			** Added addStopList to support printing of common words
20			** G. Hill 4/7/97 ghill@library.berkeley.edu
21			**
22			** change sprintf to snprintf to avoid corruption
23			** SRE 11/17/99
24			**
25			** 04/00 - Jose Ruiz
26			** change hash for bighash in mergeresultlists for better performance
27			** when big searchs (a* or b* or c*)
28			**
29			*/
30
31			#include "swish.h"
32			#include "string.h"
33			#include "hash.h"
34			#include "mem.h"
35			#include "search.h"
36
37			/* Hashes a string.
38			*/
39
40			unsigned hash(s)
41			char *s;
42			{
43			unsigned hashval;
44
45			for (hashval = 0; *s != '\0'; s++)
46			hashval = (int) ((unsigned char) s) + 31 hashval;
47			return hashval % HASHSIZE;
48			}
49
50			/* Hashes a string for a larger hash table.
51			*/
52
53			unsigned bighash(s)
54			char *s;
55			{
56			unsigned hashval;
57
58			for (hashval = 0; *s != '\0'; s++)
59			hashval = (int) ((unsigned char) s) + 31 hashval;
60			return hashval % BIGHASHSIZE;
61			}
62
63			/* Hashes a int.
64			*/
65
66			unsigned numhash(i)
67			int i;
68			{
69			return i % HASHSIZE;
70			}
71
72			/* Hashes a int for a larger hash table.
73			*/
74
75			unsigned bignumhash(i)
76			int i;
77			{
78			return i % BIGHASHSIZE;
79			}
80
81			/* Hashes a string for a larger hash table (for search).
82			*/
83
84			unsigned verybighash(s)
85			char *s;
86			{
87			unsigned hashval;
88
89			for (hashval = 0; *s != '\0'; s++)
90			hashval = (int) ((unsigned char) s) + 31 hashval;
91			return hashval % VERYBIGHASHSIZE;
92			}
93
94
95			/* Adds a stop word to the list of removed common words */
96			void addStopList(INDEXDATAHEADER header, char word)
97			{
98			char *arrayWord;
99
100			if (isstopword(header, word))
101			return;
102
103			/* Another BUG!! Jose Ruiz 04/00
104			The dimension of the array was not checked
105			Fixed */
106			if (header->stopPos == header->stopMaxSize)
107			{
108			header->stopMaxSize += 100;
109			if (!header->stopList)
110			header->stopList = (char *) emalloc(header->stopMaxSize sizeof(char *));
111
112			else
113			header->stopList = (char *) erealloc(header->stopList, header->stopMaxSize sizeof(char *));
114			}
115			arrayWord = (char *) estrdup(word);
116			header->stopList[header->stopPos++] = arrayWord;
117			}
118
119
120			/* Adds a stop word to a hash table.
121			*/
122
123			void addstophash(INDEXDATAHEADER header, char word)
124			{
125			unsigned hashval;
126			struct swline *sp;
127
128			if (isstopword(header, word))
129			return;
130
131			sp = (struct swline *) emalloc(sizeof(struct swline));
132
133			sp->line = (char *) estrdup(word);
134
135			hashval = hash(word);
136			sp->next = header->hashstoplist[hashval];
137			header->hashstoplist[hashval] = sp;
138			}
139
140			/* Sees if a word is a stop word by looking it up in the hash table.
141			*/
142
143			int isstopword(INDEXDATAHEADER header, char word)
144			{
145			unsigned hashval;
146			struct swline *sp;
147
148			hashval = hash(word);
149			sp = header->hashstoplist[hashval];
150
151			while (sp != NULL)
152			{
153			if (!strcmp(sp->line, word))
154			return 1;
155			sp = sp->next;
156			}
157			return 0;
158			}
159
160
161
162			/* Adds a buzzword to a hash table.*/
163
164			void addbuzzwordhash(INDEXDATAHEADER header, char word)
165			{
166			unsigned hashval;
167			struct swline *sp;
168
169			if (isbuzzword(header, word))
170			return;
171
172			header->buzzwords_used_flag++;
173
174			sp = (struct swline *) emalloc(sizeof(struct swline));
175
176			sp->line = (char *) estrdup(word);
177
178
179			/* should buzzwords be case sensitive? */
180			strtolower( sp->line );
181
182			hashval = hash( sp->line );
183
184
185			sp->next = header->hashbuzzwordlist[hashval];
186			header->hashbuzzwordlist[hashval] = sp;
187			}
188
189			void freebuzzwordhash(INDEXDATAHEADER *header)
190			{
191			int i;
192			struct swline *sp,
193			*tmp;
194
195			for (i = 0; i < HASHSIZE; i++)
196			if (header->hashbuzzwordlist[i])
197			{
198			sp = (struct swline *) header->hashbuzzwordlist[i];
199			while (sp)
200			{
201			tmp = sp->next;
202			efree(sp->line);
203			efree(sp);
204			sp = tmp;
205			}
206			header->hashbuzzwordlist[i] = NULL;
207			}
208			}
209
210
211			/* Sees if a word is a buzzword by looking it up in the hash table. */
212
213			int isbuzzword(INDEXDATAHEADER header, char word)
214			{
215			unsigned hashval;
216			struct swline *sp;
217
218			hashval = hash(word);
219			sp = header->hashbuzzwordlist[hashval];
220
221			while (sp != NULL)
222			{
223			if (!strcmp(sp->line, word))
224			return 1;
225			sp = sp->next;
226			}
227			return 0;
228			}
229
230
231
232			void freestophash(INDEXDATAHEADER *header)
233			{
234			int i;
235			struct swline *sp,
236			*tmp;
237
238			for (i = 0; i < HASHSIZE; i++)
239			if (header->hashstoplist[i])
240			{
241			sp = (struct swline *) header->hashstoplist[i];
242			while (sp)
243			{
244			tmp = sp->next;
245			efree(sp->line);
246			efree(sp);
247			sp = tmp;
248			}
249			header->hashstoplist[i] = NULL;
250			}
251			}
252
253			void freeStopList(INDEXDATAHEADER *header)
254			{
255			int i;
256
257			for (i = 0; i < header->stopPos; i++)
258			efree(header->stopList[i]);
259			if (header->stopList)
260			efree(header->stopList);
261			header->stopList = NULL;
262			header->stopPos = header->stopMaxSize = 0;
263			}
264
265			/* Adds a "use" word to a hash table.
266			*/
267
268			void addusehash(INDEXDATAHEADER header, char word)
269			{
270			unsigned hashval;
271			struct swline *sp;
272
273			if (isuseword(header, word))
274			return;
275
276			sp = (struct swline *) emalloc(sizeof(struct swline));
277
278			sp->line = (char *) estrdup(word);
279
280			hashval = hash(word);
281			sp->next = header->hashuselist[hashval];
282			header->hashuselist[hashval] = sp;
283			}
284
285			/* Sees if a word is a "use" word by looking it up in the hash table.
286			*/
287
288			int isuseword(INDEXDATAHEADER header, char word)
289			{
290			unsigned hashval;
291			struct swline *sp;
292
293			hashval = hash(word);
294			sp = header->hashuselist[hashval];
295
296			while (sp != NULL)
297			{
298			if (!strcmp(sp->line, word))
299			return 1;
300			sp = sp->next;
301			}
302			return 0;
303			}