/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/hash.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/hash.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Error occurred while calculating annotation data.
Importing web-site building process.

1 /*
2 ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3 ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
4 **
5 ** This program and library is free software; you can redistribute it and/or
6 ** modify it under the terms of the GNU (Library) General Public License
7 ** as published by the Free Software Foundation; either version 2
8 ** of the License, or any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU (Library) General Public License for more details.
14 **
15 ** You should have received a copy of the GNU (Library) General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 **---------------------------------------------------------
19 ** Added addStopList to support printing of common words
20 ** G. Hill 4/7/97 ghill@library.berkeley.edu
21 **
22 ** change sprintf to snprintf to avoid corruption
23 ** SRE 11/17/99
24 **
25 ** 04/00 - Jose Ruiz
26 ** change hash for bighash in mergeresultlists for better performance
27 ** when big searchs (a* or b* or c*)
28 **
29 */
30
31 #include "swish.h"
32 #include "string.h"
33 #include "hash.h"
34 #include "mem.h"
35 #include "search.h"
36
37 /* Hashes a string.
38 */
39
40 unsigned hash(s)
41 char *s;
42 {
43 unsigned hashval;
44
45 for (hashval = 0; *s != '\0'; s++)
46 hashval = (int) ((unsigned char) *s) + 31 * hashval;
47 return hashval % HASHSIZE;
48 }
49
50 /* Hashes a string for a larger hash table.
51 */
52
53 unsigned bighash(s)
54 char *s;
55 {
56 unsigned hashval;
57
58 for (hashval = 0; *s != '\0'; s++)
59 hashval = (int) ((unsigned char) *s) + 31 * hashval;
60 return hashval % BIGHASHSIZE;
61 }
62
63 /* Hashes a int.
64 */
65
66 unsigned numhash(i)
67 int i;
68 {
69 return i % HASHSIZE;
70 }
71
72 /* Hashes a int for a larger hash table.
73 */
74
75 unsigned bignumhash(i)
76 int i;
77 {
78 return i % BIGHASHSIZE;
79 }
80
81 /* Hashes a string for a larger hash table (for search).
82 */
83
84 unsigned verybighash(s)
85 char *s;
86 {
87 unsigned hashval;
88
89 for (hashval = 0; *s != '\0'; s++)
90 hashval = (int) ((unsigned char) *s) + 31 * hashval;
91 return hashval % VERYBIGHASHSIZE;
92 }
93
94
95 /* Adds a stop word to the list of removed common words */
96 void addStopList(INDEXDATAHEADER *header, char *word)
97 {
98 char *arrayWord;
99
100 if (isstopword(header, word))
101 return;
102
103 /* Another BUG!! Jose Ruiz 04/00
104 The dimension of the array was not checked
105 Fixed */
106 if (header->stopPos == header->stopMaxSize)
107 {
108 header->stopMaxSize += 100;
109 if (!header->stopList)
110 header->stopList = (char **) emalloc(header->stopMaxSize * sizeof(char *));
111
112 else
113 header->stopList = (char **) erealloc(header->stopList, header->stopMaxSize * sizeof(char *));
114 }
115 arrayWord = (char *) estrdup(word);
116 header->stopList[header->stopPos++] = arrayWord;
117 }
118
119
120 /* Adds a stop word to a hash table.
121 */
122
123 void addstophash(INDEXDATAHEADER *header, char *word)
124 {
125 unsigned hashval;
126 struct swline *sp;
127
128 if (isstopword(header, word))
129 return;
130
131 sp = (struct swline *) emalloc(sizeof(struct swline));
132
133 sp->line = (char *) estrdup(word);
134
135 hashval = hash(word);
136 sp->next = header->hashstoplist[hashval];
137 header->hashstoplist[hashval] = sp;
138 }
139
140 /* Sees if a word is a stop word by looking it up in the hash table.
141 */
142
143 int isstopword(INDEXDATAHEADER *header, char *word)
144 {
145 unsigned hashval;
146 struct swline *sp;
147
148 hashval = hash(word);
149 sp = header->hashstoplist[hashval];
150
151 while (sp != NULL)
152 {
153 if (!strcmp(sp->line, word))
154 return 1;
155 sp = sp->next;
156 }
157 return 0;
158 }
159
160
161
162 /* Adds a buzzword to a hash table.*/
163
164 void addbuzzwordhash(INDEXDATAHEADER *header, char *word)
165 {
166 unsigned hashval;
167 struct swline *sp;
168
169 if (isbuzzword(header, word))
170 return;
171
172 header->buzzwords_used_flag++;
173
174 sp = (struct swline *) emalloc(sizeof(struct swline));
175
176 sp->line = (char *) estrdup(word);
177
178
179 /* should buzzwords be case sensitive? */
180 strtolower( sp->line );
181
182 hashval = hash( sp->line );
183
184
185 sp->next = header->hashbuzzwordlist[hashval];
186 header->hashbuzzwordlist[hashval] = sp;
187 }
188
189 void freebuzzwordhash(INDEXDATAHEADER *header)
190 {
191 int i;
192 struct swline *sp,
193 *tmp;
194
195 for (i = 0; i < HASHSIZE; i++)
196 if (header->hashbuzzwordlist[i])
197 {
198 sp = (struct swline *) header->hashbuzzwordlist[i];
199 while (sp)
200 {
201 tmp = sp->next;
202 efree(sp->line);
203 efree(sp);
204 sp = tmp;
205 }
206 header->hashbuzzwordlist[i] = NULL;
207 }
208 }
209
210
211 /* Sees if a word is a buzzword by looking it up in the hash table. */
212
213 int isbuzzword(INDEXDATAHEADER *header, char *word)
214 {
215 unsigned hashval;
216 struct swline *sp;
217
218 hashval = hash(word);
219 sp = header->hashbuzzwordlist[hashval];
220
221 while (sp != NULL)
222 {
223 if (!strcmp(sp->line, word))
224 return 1;
225 sp = sp->next;
226 }
227 return 0;
228 }
229
230
231
232 void freestophash(INDEXDATAHEADER *header)
233 {
234 int i;
235 struct swline *sp,
236 *tmp;
237
238 for (i = 0; i < HASHSIZE; i++)
239 if (header->hashstoplist[i])
240 {
241 sp = (struct swline *) header->hashstoplist[i];
242 while (sp)
243 {
244 tmp = sp->next;
245 efree(sp->line);
246 efree(sp);
247 sp = tmp;
248 }
249 header->hashstoplist[i] = NULL;
250 }
251 }
252
253 void freeStopList(INDEXDATAHEADER *header)
254 {
255 int i;
256
257 for (i = 0; i < header->stopPos; i++)
258 efree(header->stopList[i]);
259 if (header->stopList)
260 efree(header->stopList);
261 header->stopList = NULL;
262 header->stopPos = header->stopMaxSize = 0;
263 }
264
265 /* Adds a "use" word to a hash table.
266 */
267
268 void addusehash(INDEXDATAHEADER *header, char *word)
269 {
270 unsigned hashval;
271 struct swline *sp;
272
273 if (isuseword(header, word))
274 return;
275
276 sp = (struct swline *) emalloc(sizeof(struct swline));
277
278 sp->line = (char *) estrdup(word);
279
280 hashval = hash(word);
281 sp->next = header->hashuselist[hashval];
282 header->hashuselist[hashval] = sp;
283 }
284
285 /* Sees if a word is a "use" word by looking it up in the hash table.
286 */
287
288 int isuseword(INDEXDATAHEADER *header, char *word)
289 {
290 unsigned hashval;
291 struct swline *sp;
292
293 hashval = hash(word);
294 sp = header->hashuselist[hashval];
295
296 while (sp != NULL)
297 {
298 if (!strcmp(sp->line, word))
299 return 1;
300 sp = sp->next;
301 }
302 return 0;
303 }

  ViewVC Help
Powered by ViewVC 1.1.22