/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/hash.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/hash.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 adcroft 1.1 /*
2     ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3     ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
4     **
5     ** This program and library is free software; you can redistribute it and/or
6     ** modify it under the terms of the GNU (Library) General Public License
7     ** as published by the Free Software Foundation; either version 2
8     ** of the License, or any later version.
9     **
10     ** This program is distributed in the hope that it will be useful,
11     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     ** GNU (Library) General Public License for more details.
14     **
15     ** You should have received a copy of the GNU (Library) General Public License
16     ** along with this program; if not, write to the Free Software
17     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18     **---------------------------------------------------------
19     ** Added addStopList to support printing of common words
20     ** G. Hill 4/7/97 ghill@library.berkeley.edu
21     **
22     ** change sprintf to snprintf to avoid corruption
23     ** SRE 11/17/99
24     **
25     ** 04/00 - Jose Ruiz
26     ** change hash for bighash in mergeresultlists for better performance
27     ** when big searchs (a* or b* or c*)
28     **
29     */
30    
31     #include "swish.h"
32     #include "string.h"
33     #include "hash.h"
34     #include "mem.h"
35     #include "search.h"
36    
37     /* Hashes a string.
38     */
39    
40     unsigned hash(s)
41     char *s;
42     {
43     unsigned hashval;
44    
45     for (hashval = 0; *s != '\0'; s++)
46     hashval = (int) ((unsigned char) *s) + 31 * hashval;
47     return hashval % HASHSIZE;
48     }
49    
50     /* Hashes a string for a larger hash table.
51     */
52    
53     unsigned bighash(s)
54     char *s;
55     {
56     unsigned hashval;
57    
58     for (hashval = 0; *s != '\0'; s++)
59     hashval = (int) ((unsigned char) *s) + 31 * hashval;
60     return hashval % BIGHASHSIZE;
61     }
62    
63     /* Hashes a int.
64     */
65    
66     unsigned numhash(i)
67     int i;
68     {
69     return i % HASHSIZE;
70     }
71    
72     /* Hashes a int for a larger hash table.
73     */
74    
75     unsigned bignumhash(i)
76     int i;
77     {
78     return i % BIGHASHSIZE;
79     }
80    
81     /* Hashes a string for a larger hash table (for search).
82     */
83    
84     unsigned verybighash(s)
85     char *s;
86     {
87     unsigned hashval;
88    
89     for (hashval = 0; *s != '\0'; s++)
90     hashval = (int) ((unsigned char) *s) + 31 * hashval;
91     return hashval % VERYBIGHASHSIZE;
92     }
93    
94    
95     /* Adds a stop word to the list of removed common words */
96     void addStopList(INDEXDATAHEADER *header, char *word)
97     {
98     char *arrayWord;
99    
100     if (isstopword(header, word))
101     return;
102    
103     /* Another BUG!! Jose Ruiz 04/00
104     The dimension of the array was not checked
105     Fixed */
106     if (header->stopPos == header->stopMaxSize)
107     {
108     header->stopMaxSize += 100;
109     if (!header->stopList)
110     header->stopList = (char **) emalloc(header->stopMaxSize * sizeof(char *));
111    
112     else
113     header->stopList = (char **) erealloc(header->stopList, header->stopMaxSize * sizeof(char *));
114     }
115     arrayWord = (char *) estrdup(word);
116     header->stopList[header->stopPos++] = arrayWord;
117     }
118    
119    
120     /* Adds a stop word to a hash table.
121     */
122    
123     void addstophash(INDEXDATAHEADER *header, char *word)
124     {
125     unsigned hashval;
126     struct swline *sp;
127    
128     if (isstopword(header, word))
129     return;
130    
131     sp = (struct swline *) emalloc(sizeof(struct swline));
132    
133     sp->line = (char *) estrdup(word);
134    
135     hashval = hash(word);
136     sp->next = header->hashstoplist[hashval];
137     header->hashstoplist[hashval] = sp;
138     }
139    
140     /* Sees if a word is a stop word by looking it up in the hash table.
141     */
142    
143     int isstopword(INDEXDATAHEADER *header, char *word)
144     {
145     unsigned hashval;
146     struct swline *sp;
147    
148     hashval = hash(word);
149     sp = header->hashstoplist[hashval];
150    
151     while (sp != NULL)
152     {
153     if (!strcmp(sp->line, word))
154     return 1;
155     sp = sp->next;
156     }
157     return 0;
158     }
159    
160    
161    
162     /* Adds a buzzword to a hash table.*/
163    
164     void addbuzzwordhash(INDEXDATAHEADER *header, char *word)
165     {
166     unsigned hashval;
167     struct swline *sp;
168    
169     if (isbuzzword(header, word))
170     return;
171    
172     header->buzzwords_used_flag++;
173    
174     sp = (struct swline *) emalloc(sizeof(struct swline));
175    
176     sp->line = (char *) estrdup(word);
177    
178    
179     /* should buzzwords be case sensitive? */
180     strtolower( sp->line );
181    
182     hashval = hash( sp->line );
183    
184    
185     sp->next = header->hashbuzzwordlist[hashval];
186     header->hashbuzzwordlist[hashval] = sp;
187     }
188    
189     void freebuzzwordhash(INDEXDATAHEADER *header)
190     {
191     int i;
192     struct swline *sp,
193     *tmp;
194    
195     for (i = 0; i < HASHSIZE; i++)
196     if (header->hashbuzzwordlist[i])
197     {
198     sp = (struct swline *) header->hashbuzzwordlist[i];
199     while (sp)
200     {
201     tmp = sp->next;
202     efree(sp->line);
203     efree(sp);
204     sp = tmp;
205     }
206     header->hashbuzzwordlist[i] = NULL;
207     }
208     }
209    
210    
211     /* Sees if a word is a buzzword by looking it up in the hash table. */
212    
213     int isbuzzword(INDEXDATAHEADER *header, char *word)
214     {
215     unsigned hashval;
216     struct swline *sp;
217    
218     hashval = hash(word);
219     sp = header->hashbuzzwordlist[hashval];
220    
221     while (sp != NULL)
222     {
223     if (!strcmp(sp->line, word))
224     return 1;
225     sp = sp->next;
226     }
227     return 0;
228     }
229    
230    
231    
232     void freestophash(INDEXDATAHEADER *header)
233     {
234     int i;
235     struct swline *sp,
236     *tmp;
237    
238     for (i = 0; i < HASHSIZE; i++)
239     if (header->hashstoplist[i])
240     {
241     sp = (struct swline *) header->hashstoplist[i];
242     while (sp)
243     {
244     tmp = sp->next;
245     efree(sp->line);
246     efree(sp);
247     sp = tmp;
248     }
249     header->hashstoplist[i] = NULL;
250     }
251     }
252    
253     void freeStopList(INDEXDATAHEADER *header)
254     {
255     int i;
256    
257     for (i = 0; i < header->stopPos; i++)
258     efree(header->stopList[i]);
259     if (header->stopList)
260     efree(header->stopList);
261     header->stopList = NULL;
262     header->stopPos = header->stopMaxSize = 0;
263     }
264    
265     /* Adds a "use" word to a hash table.
266     */
267    
268     void addusehash(INDEXDATAHEADER *header, char *word)
269     {
270     unsigned hashval;
271     struct swline *sp;
272    
273     if (isuseword(header, word))
274     return;
275    
276     sp = (struct swline *) emalloc(sizeof(struct swline));
277    
278     sp->line = (char *) estrdup(word);
279    
280     hashval = hash(word);
281     sp->next = header->hashuselist[hashval];
282     header->hashuselist[hashval] = sp;
283     }
284    
285     /* Sees if a word is a "use" word by looking it up in the hash table.
286     */
287    
288     int isuseword(INDEXDATAHEADER *header, char *word)
289     {
290     unsigned hashval;
291     struct swline *sp;
292    
293     hashval = hash(word);
294     sp = header->hashuselist[hashval];
295    
296     while (sp != NULL)
297     {
298     if (!strcmp(sp->line, word))
299     return 1;
300     sp = sp->next;
301     }
302     return 0;
303     }

  ViewVC Help
Powered by ViewVC 1.1.22