/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/swish2.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/swish2.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Error occurred while calculating annotation data.
Importing web-site building process.

1 /*
2 **
3 ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
4 ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
5 **
6 ** This program and library is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU (Library) General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU (Library) General Public License for more details.
15 **
16 ** You should have received a copy of the GNU (Library) General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21 #define GLOBAL_VARS
22
23 #include "swish.h"
24
25 #include "string.h"
26 #include "mem.h"
27 #include "error.h"
28 #include "list.h"
29 #include "search.h"
30 #include "index.h"
31 #include "file.h"
32 #include "http.h"
33 #include "merge.h"
34 #include "docprop.h"
35 #include "hash.h"
36 #include "entities.h"
37 #include "filter.h"
38 #include "result_output.h"
39 #include "search_alt.h"
40 #include "result_output.h"
41 #include "result_sort.h"
42 #include "db.h"
43 #include "fs.h"
44 #include "swish_words.h"
45 #include "extprog.h"
46 #include "metanames.h"
47 #include "proplimit.h"
48 #include "parse_conffile.h"
49 #ifdef HAVE_ZLIB
50 #include <zlib.h>
51 #endif
52
53
54 /* Moved here so it's in the library */
55 unsigned int DEBUG_MASK = 0;
56
57
58
59 /*
60 -- init swish structure
61 */
62
63 SWISH *SwishNew()
64 {
65 SWISH *sw;
66
67 /* Default is to write errors to stdout */
68 set_error_handle(stdout);
69
70 sw = emalloc(sizeof(SWISH));
71 memset(sw, 0, sizeof(SWISH));
72
73 initModule_Filter(sw);
74 initModule_ResultOutput(sw);
75 initModule_SearchAlt(sw);
76 initModule_ResultSort(sw);
77 initModule_Entities(sw);
78 initModule_DB(sw);
79 initModule_Search(sw);
80 initModule_Index(sw);
81 initModule_FS(sw);
82 initModule_HTTP(sw);
83 initModule_Swish_Words(sw);
84 initModule_Prog(sw);
85 initModule_PropLimit(sw);
86
87 sw->TotalWords = 0;
88 sw->TotalFiles = 0;
89 sw->dirlist = NULL;
90 sw->indexlist = NULL;
91 sw->replaceRegexps = NULL;
92 sw->pathExtractList = NULL;
93 sw->lasterror = RC_OK;
94 sw->lasterrorstr[0] = '\0';
95 sw->verbose = VERBOSE;
96 sw->parser_warn_level = 0;
97 sw->indexComments = 0; /* change default 5/01 wsm */
98 sw->nocontentslist = NULL;
99 sw->DefaultDocType = NODOCTYPE;
100 sw->indexcontents = NULL;
101 sw->storedescription = NULL;
102 sw->suffixlist = NULL;
103 sw->ignoremetalist = NULL;
104 sw->dontbumpstarttagslist = NULL;
105 sw->dontbumpendtagslist = NULL;
106 sw->mtime_limit = 0;
107
108 #ifdef HAVE_ZLIB
109 sw->PropCompressionLevel = Z_DEFAULT_COMPRESSION;
110 #endif
111
112 sw->truncateDocSize = 0; /* default: no truncation of docs */
113
114
115 /* Make rest of lookup tables */
116 makeallstringlookuptables(sw);
117 return (sw);
118 }
119
120
121
122
123 /* Free memory for search results and parameters (properties ...) */
124 void SwishResetSearch(SWISH * sw)
125 {
126
127 /* Free sort stuff */
128 resetModule_Search(sw);
129 resetModule_ResultSort(sw);
130
131 sw->lasterror = RC_OK;
132 sw->lasterrorstr[0] = '\0';
133 }
134
135 void SwishClose(SWISH * sw)
136 {
137 IndexFILE *tmpindexlist;
138 int i;
139
140 if (sw) {
141 /* Free search results and imput parameters */
142 SwishResetSearch(sw);
143
144 /* Close any pending DB */
145 tmpindexlist = sw->indexlist;
146 while (tmpindexlist) {
147 if (tmpindexlist->DB)
148 DB_Close(sw, tmpindexlist->DB);
149 tmpindexlist = tmpindexlist->next;
150 }
151
152 freeModule_Filter(sw);
153 freeModule_ResultOutput(sw);
154 freeModule_SearchAlt(sw);
155 freeModule_Entities(sw);
156 freeModule_DB(sw);
157 freeModule_Index(sw);
158 freeModule_ResultSort(sw);
159 freeModule_FS(sw);
160 freeModule_HTTP(sw);
161 freeModule_Search(sw);
162 freeModule_Swish_Words(sw);
163 freeModule_Prog(sw);
164
165 freeModule_PropLimit(sw);
166
167
168 /* Free MetaNames and close files */
169 tmpindexlist = sw->indexlist;
170
171 /* Free ReplaceRules regular expressions */
172 free_regex_list(&sw->replaceRegexps);
173
174 /* Free ExtractPath list */
175 free_Extracted_Path(sw);
176
177 /* FileRules?? */
178
179 /* meta name for ALT tags */
180 if ( sw->IndexAltTagMeta )
181 {
182 efree( sw->IndexAltTagMeta );
183 sw->IndexAltTagMeta = NULL;
184 }
185
186
187
188 while (tmpindexlist) {
189
190 /* free the property string cache, if used */
191 if ( tmpindexlist->prop_string_cache )
192 {
193 int i;
194 for ( i=0; i<tmpindexlist->header.metaCounter; i++ )
195 if ( tmpindexlist->prop_string_cache[i] )
196 efree( tmpindexlist->prop_string_cache[i] );
197
198 efree( tmpindexlist->prop_string_cache );
199 tmpindexlist->prop_string_cache = NULL;
200 }
201
202
203 /* free the meteEntry array */
204 if (tmpindexlist->header.metaCounter)
205 freeMetaEntries(&tmpindexlist->header);
206
207 /* Free stopwords structures */
208 freestophash(&tmpindexlist->header);
209 freeStopList(&tmpindexlist->header);
210
211 freebuzzwordhash(&tmpindexlist->header);
212
213 free_header(&tmpindexlist->header);
214
215 /* Removed due to patents
216 if(tmpindexlist->header.applyFileInfoCompression && tmpindexlist->n_dict_entries)
217 {
218 for(i=0;i<tmpindexlist->n_dict_entries;i++)
219 efree(tmpindexlist->dict[i]);
220 }
221 */
222 for (i = 0; i < 256; i++)
223 if (tmpindexlist->keywords[i])
224 efree(tmpindexlist->keywords[i]);
225
226
227 tmpindexlist = tmpindexlist->next;
228 }
229
230 freeindexfile(sw->indexlist);
231
232 if (sw->Prop_IO_Buf) {
233 efree(sw->Prop_IO_Buf);
234 sw->Prop_IO_Buf = NULL;
235 }
236
237 /* Free SWISH struct */
238
239
240 freeSwishConfigOptions( sw ); // should be freeConfigOptions( sw->config )
241 efree(sw);
242 }
243 }
244
245 /**************************************************
246 * SwishOpen - Create a swish handle
247 * Returns a swish handle
248 * Caller much check sw->lasterror for errors
249 * and call SwishClose() to free memory
250 **************************************************/
251
252
253 SWISH *SwishInit(char *indexfiles)
254 {
255 StringList *sl = NULL;
256 SWISH *sw;
257 int i;
258
259 sw = SwishNew();
260 if (!indexfiles || !*indexfiles)
261 {
262 set_progerr(INDEX_FILE_ERROR, sw, "No index file supplied" );
263 return sw;
264 }
265
266
267 /* Parse out index files, and append to indexlist */
268 sl = parse_line(indexfiles);
269
270 if ( 0 == sl->n )
271 {
272 set_progerr(INDEX_FILE_ERROR, sw, "No index file supplied" );
273 return sw;
274 }
275
276
277
278 for (i = 0; i < sl->n; i++)
279 sw->indexlist = (IndexFILE *)addindexfile(sw->indexlist, sl->word[i]);
280
281 if (sl)
282 freeStringList(sl);
283
284 if ( !sw->lasterror )
285 SwishAttach(sw);
286
287 return sw;
288 }
289
290
291 /**************************************************
292 * SwishOpen - Create a swish handle
293 * Returns NULL on error -- no error message available
294 * Frees memory on error
295 * This is depreciated form
296 **************************************************/
297
298
299 SWISH *SwishOpen(char *indexfiles)
300 {
301 SWISH *sw = SwishInit( indexfiles );
302
303 if ( sw->lasterror )
304 {
305 SwishClose(sw);
306 sw = NULL;
307 }
308
309 return sw;
310 }
311
312
313
314 /**************************************************
315 * SwishAttach - Connect to the database
316 * Returns false on Failure
317 **************************************************/
318
319 int SwishAttach(SWISH * sw)
320 {
321 struct MOD_Search *srch = sw->Search;
322 IndexFILE *indexlist;
323
324 IndexFILE *tmplist;
325
326 indexlist = sw->indexlist;
327 sw->TotalWords = 0;
328 sw->TotalFiles = 0;
329
330
331 /* First of all . Read header default values from all index fileis */
332 /* With this, we read wordchars, stripchars, ... */
333 for (tmplist = indexlist; tmplist;)
334 {
335 sw->commonerror = RC_OK;
336 srch->bigrank = 0;
337
338 tmplist->DB = (void *)DB_Open(sw, tmplist->line, DB_READ);
339 if ( sw->lasterror )
340 return 0;
341
342 read_header(sw, &tmplist->header, tmplist->DB);
343
344
345 sw->TotalWords += tmplist->header.totalwords;
346 sw->TotalFiles += tmplist->header.totalfiles;
347 tmplist = tmplist->next;
348 }
349
350 return ( sw->lasterror == 0 );
351 }
352
353
354
355
356 int SwishSearch(SWISH * sw, char *words, int structure, char *props, char *sort)
357 {
358 StringList *slprops = NULL;
359 StringList *slsort = NULL;
360 int i,
361 sortmode;
362 int header_level;
363 char *field;
364
365 if (!sw)
366 {
367 sw->lasterror = INVALID_SWISH_HANDLE;
368 return INVALID_SWISH_HANDLE;
369 }
370
371
372 /* If previous search - reset its values (results, props ) */
373 SwishResetSearch(sw);
374
375 if (props && props[0]) {
376 slprops = parse_line(props);
377 for (i = 0; i < slprops->n; i++)
378 addSearchResultDisplayProperty(sw, slprops->word[i]);
379 }
380
381 if (sort && sort[0]) {
382 slsort = parse_line(sort);
383 for (i = 0; i < slsort->n;) {
384 sortmode = 1; /* Default mode is ascending */
385 field = slsort->word[i++];
386 if (i < slsort->n) {
387 if (!strcasecmp(slsort->word[i], "asc")) {
388 sortmode = -1; /* Ascending */
389 i++;
390 } else {
391 if (!strcasecmp(slsort->word[i], "desc")) {
392 sortmode = 1; /* Ascending */
393 i++;
394 }
395 }
396 }
397 addSearchResultSortProperty(sw, field, sortmode);
398 }
399 }
400 i = 0;
401
402 header_level = sw->ResultOutput->headerOutVerbose;
403 sw->ResultOutput->headerOutVerbose = 0;
404
405 i = search(sw, words, structure); /* search with no eco */
406
407 sw->ResultOutput->headerOutVerbose = header_level;
408 if (slsort)
409 freeStringList(slsort);
410 if (slprops)
411 freeStringList(slprops);
412 return i;
413 }
414
415
416 int SwishSeek(SWISH * sw, int pos)
417 {
418 int i;
419 RESULT *sp = NULL;
420
421 if (!sw)
422 return INVALID_SWISH_HANDLE;
423
424 if ( !sw->Search->db_results )
425 {
426 set_progerr(SWISH_LISTRESULTS_EOF, sw, "Attempted to SwishSeek before searching");
427 return SWISH_LISTRESULTS_EOF;
428 }
429
430 /* Check if only one index file -> Faster SwishSeek */
431
432 if (!sw->Search->db_results->next) {
433 for (i = 0, sp = sw->Search->db_results->sortresultlist; sp && i < pos; i++)
434 sp = sp->next;
435
436 sw->Search->db_results->currentresult = sp;
437 } else {
438 /* Well, we finally have more than one file */
439 /* In this case we have no choice - We need to read the data from disk */
440 /* The easy way: Let SwishNext do the job */
441
442 for (i = 0; i < pos; i++)
443 if (!(sp = SwishNext(sw)))
444 break;
445 }
446
447 if (!sp)
448 return ((sw->lasterror = SWISH_LISTRESULTS_EOF));
449
450 return pos;
451 }
452
453
454 char tmp_header_buffer[50]; /* Not thread safe $$$ */
455
456 /** Argh! This is as ugly as the config parsing code **/
457
458 char *SwishHeaderParameter(IndexFILE * indexf, char *parameter_name)
459 {
460 if (!strcasecmp(parameter_name, WORDCHARSPARAMNAME))
461 return indexf->header.wordchars;
462
463 else if (!strcasecmp(parameter_name, BEGINCHARSPARAMNAME))
464 return indexf->header.beginchars;
465
466 else if (!strcasecmp(parameter_name, ENDCHARSPARAMNAME))
467 return indexf->header.endchars;
468
469 else if (!strcasecmp(parameter_name, IGNOREFIRSTCHARPARAMNAME))
470 return indexf->header.ignorefirstchar;
471
472 else if (!strcasecmp(parameter_name, IGNORELASTCHARPARAMNAME))
473 return indexf->header.ignorelastchar;
474
475
476
477 else if (!strcasecmp(parameter_name, NAMEHEADERPARAMNAME))
478 return indexf->header.indexn;
479
480 else if (!strcasecmp(parameter_name, DESCRIPTIONPARAMNAME))
481 return indexf->header.indexd;
482
483 else if (!strcasecmp(parameter_name, POINTERPARAMNAME))
484 return indexf->header.indexp;
485
486 else if (!strcasecmp(parameter_name, MAINTAINEDBYPARAMNAME))
487 return indexf->header.indexa;
488
489 else if (!strcasecmp(parameter_name, INDEXEDONPARAMNAME))
490 return indexf->header.indexedon;
491
492
493
494 else if (!strcasecmp(parameter_name, STEMMINGPARAMNAME)) {
495 if (indexf->header.fuzzy_mode == FUZZY_STEMMING )
496 return "1";
497 else
498 return "0";
499
500 } else if (!strcasecmp(parameter_name, SOUNDEXPARAMNAME)) {
501 if (indexf->header.fuzzy_mode == FUZZY_SOUNDEX )
502 return "1";
503 else
504 return "0";
505
506 } else if (!strcasecmp(parameter_name, FUZZYMODEPARAMNAME)) {
507 return fuzzy_mode_to_string( indexf->header.fuzzy_mode );
508
509
510 } else if (!strcasecmp(parameter_name, FILECOUNTPARAMNAME)) {
511 sprintf(tmp_header_buffer, "%d", indexf->header.totalfiles);
512 return tmp_header_buffer;
513
514 } else
515 return "";
516 }
517
518 char **SwishStopWords(SWISH * sw, char *filename, int *numstops)
519 {
520 IndexFILE *indexf;
521
522 indexf = sw->indexlist;
523 while (indexf) {
524 if (!strcasecmp(indexf->line, filename)) {
525 *numstops = indexf->header.stopPos;
526 return indexf->header.stopList;
527 }
528 }
529 *numstops = 0;
530 return NULL;
531 }
532
533 char *SwishWords(SWISH * sw, char *filename, char c)
534 {
535 IndexFILE *indexf;
536
537 indexf = sw->indexlist;
538 while (indexf) {
539 if (!strcasecmp(indexf->line, filename)) {
540 return getfilewords(sw, c, indexf);
541 }
542 }
543 return "";
544 }

  ViewVC Help
Powered by ViewVC 1.1.22