/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/swish2.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/swish2.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 adcroft 1.1 /*
2     **
3     ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
4     ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
5     **
6     ** This program and library is free software; you can redistribute it and/or
7     ** modify it under the terms of the GNU (Library) General Public License
8     ** as published by the Free Software Foundation; either version 2
9     ** of the License, or any later version.
10     **
11     ** This program is distributed in the hope that it will be useful,
12     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     ** GNU (Library) General Public License for more details.
15     **
16     ** You should have received a copy of the GNU (Library) General Public License
17     ** along with this program; if not, write to the Free Software
18     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19     */
20    
21     #define GLOBAL_VARS
22    
23     #include "swish.h"
24    
25     #include "string.h"
26     #include "mem.h"
27     #include "error.h"
28     #include "list.h"
29     #include "search.h"
30     #include "index.h"
31     #include "file.h"
32     #include "http.h"
33     #include "merge.h"
34     #include "docprop.h"
35     #include "hash.h"
36     #include "entities.h"
37     #include "filter.h"
38     #include "result_output.h"
39     #include "search_alt.h"
40     #include "result_output.h"
41     #include "result_sort.h"
42     #include "db.h"
43     #include "fs.h"
44     #include "swish_words.h"
45     #include "extprog.h"
46     #include "metanames.h"
47     #include "proplimit.h"
48     #include "parse_conffile.h"
49     #ifdef HAVE_ZLIB
50     #include <zlib.h>
51     #endif
52    
53    
54     /* Moved here so it's in the library */
55     unsigned int DEBUG_MASK = 0;
56    
57    
58    
59     /*
60     -- init swish structure
61     */
62    
63     SWISH *SwishNew()
64     {
65     SWISH *sw;
66    
67     /* Default is to write errors to stdout */
68     set_error_handle(stdout);
69    
70     sw = emalloc(sizeof(SWISH));
71     memset(sw, 0, sizeof(SWISH));
72    
73     initModule_Filter(sw);
74     initModule_ResultOutput(sw);
75     initModule_SearchAlt(sw);
76     initModule_ResultSort(sw);
77     initModule_Entities(sw);
78     initModule_DB(sw);
79     initModule_Search(sw);
80     initModule_Index(sw);
81     initModule_FS(sw);
82     initModule_HTTP(sw);
83     initModule_Swish_Words(sw);
84     initModule_Prog(sw);
85     initModule_PropLimit(sw);
86    
87     sw->TotalWords = 0;
88     sw->TotalFiles = 0;
89     sw->dirlist = NULL;
90     sw->indexlist = NULL;
91     sw->replaceRegexps = NULL;
92     sw->pathExtractList = NULL;
93     sw->lasterror = RC_OK;
94     sw->lasterrorstr[0] = '\0';
95     sw->verbose = VERBOSE;
96     sw->parser_warn_level = 0;
97     sw->indexComments = 0; /* change default 5/01 wsm */
98     sw->nocontentslist = NULL;
99     sw->DefaultDocType = NODOCTYPE;
100     sw->indexcontents = NULL;
101     sw->storedescription = NULL;
102     sw->suffixlist = NULL;
103     sw->ignoremetalist = NULL;
104     sw->dontbumpstarttagslist = NULL;
105     sw->dontbumpendtagslist = NULL;
106     sw->mtime_limit = 0;
107    
108     #ifdef HAVE_ZLIB
109     sw->PropCompressionLevel = Z_DEFAULT_COMPRESSION;
110     #endif
111    
112     sw->truncateDocSize = 0; /* default: no truncation of docs */
113    
114    
115     /* Make rest of lookup tables */
116     makeallstringlookuptables(sw);
117     return (sw);
118     }
119    
120    
121    
122    
123     /* Free memory for search results and parameters (properties ...) */
124     void SwishResetSearch(SWISH * sw)
125     {
126    
127     /* Free sort stuff */
128     resetModule_Search(sw);
129     resetModule_ResultSort(sw);
130    
131     sw->lasterror = RC_OK;
132     sw->lasterrorstr[0] = '\0';
133     }
134    
135     void SwishClose(SWISH * sw)
136     {
137     IndexFILE *tmpindexlist;
138     int i;
139    
140     if (sw) {
141     /* Free search results and imput parameters */
142     SwishResetSearch(sw);
143    
144     /* Close any pending DB */
145     tmpindexlist = sw->indexlist;
146     while (tmpindexlist) {
147     if (tmpindexlist->DB)
148     DB_Close(sw, tmpindexlist->DB);
149     tmpindexlist = tmpindexlist->next;
150     }
151    
152     freeModule_Filter(sw);
153     freeModule_ResultOutput(sw);
154     freeModule_SearchAlt(sw);
155     freeModule_Entities(sw);
156     freeModule_DB(sw);
157     freeModule_Index(sw);
158     freeModule_ResultSort(sw);
159     freeModule_FS(sw);
160     freeModule_HTTP(sw);
161     freeModule_Search(sw);
162     freeModule_Swish_Words(sw);
163     freeModule_Prog(sw);
164    
165     freeModule_PropLimit(sw);
166    
167    
168     /* Free MetaNames and close files */
169     tmpindexlist = sw->indexlist;
170    
171     /* Free ReplaceRules regular expressions */
172     free_regex_list(&sw->replaceRegexps);
173    
174     /* Free ExtractPath list */
175     free_Extracted_Path(sw);
176    
177     /* FileRules?? */
178    
179     /* meta name for ALT tags */
180     if ( sw->IndexAltTagMeta )
181     {
182     efree( sw->IndexAltTagMeta );
183     sw->IndexAltTagMeta = NULL;
184     }
185    
186    
187    
188     while (tmpindexlist) {
189    
190     /* free the property string cache, if used */
191     if ( tmpindexlist->prop_string_cache )
192     {
193     int i;
194     for ( i=0; i<tmpindexlist->header.metaCounter; i++ )
195     if ( tmpindexlist->prop_string_cache[i] )
196     efree( tmpindexlist->prop_string_cache[i] );
197    
198     efree( tmpindexlist->prop_string_cache );
199     tmpindexlist->prop_string_cache = NULL;
200     }
201    
202    
203     /* free the meteEntry array */
204     if (tmpindexlist->header.metaCounter)
205     freeMetaEntries(&tmpindexlist->header);
206    
207     /* Free stopwords structures */
208     freestophash(&tmpindexlist->header);
209     freeStopList(&tmpindexlist->header);
210    
211     freebuzzwordhash(&tmpindexlist->header);
212    
213     free_header(&tmpindexlist->header);
214    
215     /* Removed due to patents
216     if(tmpindexlist->header.applyFileInfoCompression && tmpindexlist->n_dict_entries)
217     {
218     for(i=0;i<tmpindexlist->n_dict_entries;i++)
219     efree(tmpindexlist->dict[i]);
220     }
221     */
222     for (i = 0; i < 256; i++)
223     if (tmpindexlist->keywords[i])
224     efree(tmpindexlist->keywords[i]);
225    
226    
227     tmpindexlist = tmpindexlist->next;
228     }
229    
230     freeindexfile(sw->indexlist);
231    
232     if (sw->Prop_IO_Buf) {
233     efree(sw->Prop_IO_Buf);
234     sw->Prop_IO_Buf = NULL;
235     }
236    
237     /* Free SWISH struct */
238    
239    
240     freeSwishConfigOptions( sw ); // should be freeConfigOptions( sw->config )
241     efree(sw);
242     }
243     }
244    
245     /**************************************************
246     * SwishOpen - Create a swish handle
247     * Returns a swish handle
248     * Caller much check sw->lasterror for errors
249     * and call SwishClose() to free memory
250     **************************************************/
251    
252    
253     SWISH *SwishInit(char *indexfiles)
254     {
255     StringList *sl = NULL;
256     SWISH *sw;
257     int i;
258    
259     sw = SwishNew();
260     if (!indexfiles || !*indexfiles)
261     {
262     set_progerr(INDEX_FILE_ERROR, sw, "No index file supplied" );
263     return sw;
264     }
265    
266    
267     /* Parse out index files, and append to indexlist */
268     sl = parse_line(indexfiles);
269    
270     if ( 0 == sl->n )
271     {
272     set_progerr(INDEX_FILE_ERROR, sw, "No index file supplied" );
273     return sw;
274     }
275    
276    
277    
278     for (i = 0; i < sl->n; i++)
279     sw->indexlist = (IndexFILE *)addindexfile(sw->indexlist, sl->word[i]);
280    
281     if (sl)
282     freeStringList(sl);
283    
284     if ( !sw->lasterror )
285     SwishAttach(sw);
286    
287     return sw;
288     }
289    
290    
291     /**************************************************
292     * SwishOpen - Create a swish handle
293     * Returns NULL on error -- no error message available
294     * Frees memory on error
295     * This is depreciated form
296     **************************************************/
297    
298    
299     SWISH *SwishOpen(char *indexfiles)
300     {
301     SWISH *sw = SwishInit( indexfiles );
302    
303     if ( sw->lasterror )
304     {
305     SwishClose(sw);
306     sw = NULL;
307     }
308    
309     return sw;
310     }
311    
312    
313    
314     /**************************************************
315     * SwishAttach - Connect to the database
316     * Returns false on Failure
317     **************************************************/
318    
319     int SwishAttach(SWISH * sw)
320     {
321     struct MOD_Search *srch = sw->Search;
322     IndexFILE *indexlist;
323    
324     IndexFILE *tmplist;
325    
326     indexlist = sw->indexlist;
327     sw->TotalWords = 0;
328     sw->TotalFiles = 0;
329    
330    
331     /* First of all . Read header default values from all index fileis */
332     /* With this, we read wordchars, stripchars, ... */
333     for (tmplist = indexlist; tmplist;)
334     {
335     sw->commonerror = RC_OK;
336     srch->bigrank = 0;
337    
338     tmplist->DB = (void *)DB_Open(sw, tmplist->line, DB_READ);
339     if ( sw->lasterror )
340     return 0;
341    
342     read_header(sw, &tmplist->header, tmplist->DB);
343    
344    
345     sw->TotalWords += tmplist->header.totalwords;
346     sw->TotalFiles += tmplist->header.totalfiles;
347     tmplist = tmplist->next;
348     }
349    
350     return ( sw->lasterror == 0 );
351     }
352    
353    
354    
355    
356     int SwishSearch(SWISH * sw, char *words, int structure, char *props, char *sort)
357     {
358     StringList *slprops = NULL;
359     StringList *slsort = NULL;
360     int i,
361     sortmode;
362     int header_level;
363     char *field;
364    
365     if (!sw)
366     {
367     sw->lasterror = INVALID_SWISH_HANDLE;
368     return INVALID_SWISH_HANDLE;
369     }
370    
371    
372     /* If previous search - reset its values (results, props ) */
373     SwishResetSearch(sw);
374    
375     if (props && props[0]) {
376     slprops = parse_line(props);
377     for (i = 0; i < slprops->n; i++)
378     addSearchResultDisplayProperty(sw, slprops->word[i]);
379     }
380    
381     if (sort && sort[0]) {
382     slsort = parse_line(sort);
383     for (i = 0; i < slsort->n;) {
384     sortmode = 1; /* Default mode is ascending */
385     field = slsort->word[i++];
386     if (i < slsort->n) {
387     if (!strcasecmp(slsort->word[i], "asc")) {
388     sortmode = -1; /* Ascending */
389     i++;
390     } else {
391     if (!strcasecmp(slsort->word[i], "desc")) {
392     sortmode = 1; /* Ascending */
393     i++;
394     }
395     }
396     }
397     addSearchResultSortProperty(sw, field, sortmode);
398     }
399     }
400     i = 0;
401    
402     header_level = sw->ResultOutput->headerOutVerbose;
403     sw->ResultOutput->headerOutVerbose = 0;
404    
405     i = search(sw, words, structure); /* search with no eco */
406    
407     sw->ResultOutput->headerOutVerbose = header_level;
408     if (slsort)
409     freeStringList(slsort);
410     if (slprops)
411     freeStringList(slprops);
412     return i;
413     }
414    
415    
416     int SwishSeek(SWISH * sw, int pos)
417     {
418     int i;
419     RESULT *sp = NULL;
420    
421     if (!sw)
422     return INVALID_SWISH_HANDLE;
423    
424     if ( !sw->Search->db_results )
425     {
426     set_progerr(SWISH_LISTRESULTS_EOF, sw, "Attempted to SwishSeek before searching");
427     return SWISH_LISTRESULTS_EOF;
428     }
429    
430     /* Check if only one index file -> Faster SwishSeek */
431    
432     if (!sw->Search->db_results->next) {
433     for (i = 0, sp = sw->Search->db_results->sortresultlist; sp && i < pos; i++)
434     sp = sp->next;
435    
436     sw->Search->db_results->currentresult = sp;
437     } else {
438     /* Well, we finally have more than one file */
439     /* In this case we have no choice - We need to read the data from disk */
440     /* The easy way: Let SwishNext do the job */
441    
442     for (i = 0; i < pos; i++)
443     if (!(sp = SwishNext(sw)))
444     break;
445     }
446    
447     if (!sp)
448     return ((sw->lasterror = SWISH_LISTRESULTS_EOF));
449    
450     return pos;
451     }
452    
453    
454     char tmp_header_buffer[50]; /* Not thread safe $$$ */
455    
456     /** Argh! This is as ugly as the config parsing code **/
457    
458     char *SwishHeaderParameter(IndexFILE * indexf, char *parameter_name)
459     {
460     if (!strcasecmp(parameter_name, WORDCHARSPARAMNAME))
461     return indexf->header.wordchars;
462    
463     else if (!strcasecmp(parameter_name, BEGINCHARSPARAMNAME))
464     return indexf->header.beginchars;
465    
466     else if (!strcasecmp(parameter_name, ENDCHARSPARAMNAME))
467     return indexf->header.endchars;
468    
469     else if (!strcasecmp(parameter_name, IGNOREFIRSTCHARPARAMNAME))
470     return indexf->header.ignorefirstchar;
471    
472     else if (!strcasecmp(parameter_name, IGNORELASTCHARPARAMNAME))
473     return indexf->header.ignorelastchar;
474    
475    
476    
477     else if (!strcasecmp(parameter_name, NAMEHEADERPARAMNAME))
478     return indexf->header.indexn;
479    
480     else if (!strcasecmp(parameter_name, DESCRIPTIONPARAMNAME))
481     return indexf->header.indexd;
482    
483     else if (!strcasecmp(parameter_name, POINTERPARAMNAME))
484     return indexf->header.indexp;
485    
486     else if (!strcasecmp(parameter_name, MAINTAINEDBYPARAMNAME))
487     return indexf->header.indexa;
488    
489     else if (!strcasecmp(parameter_name, INDEXEDONPARAMNAME))
490     return indexf->header.indexedon;
491    
492    
493    
494     else if (!strcasecmp(parameter_name, STEMMINGPARAMNAME)) {
495     if (indexf->header.fuzzy_mode == FUZZY_STEMMING )
496     return "1";
497     else
498     return "0";
499    
500     } else if (!strcasecmp(parameter_name, SOUNDEXPARAMNAME)) {
501     if (indexf->header.fuzzy_mode == FUZZY_SOUNDEX )
502     return "1";
503     else
504     return "0";
505    
506     } else if (!strcasecmp(parameter_name, FUZZYMODEPARAMNAME)) {
507     return fuzzy_mode_to_string( indexf->header.fuzzy_mode );
508    
509    
510     } else if (!strcasecmp(parameter_name, FILECOUNTPARAMNAME)) {
511     sprintf(tmp_header_buffer, "%d", indexf->header.totalfiles);
512     return tmp_header_buffer;
513    
514     } else
515     return "";
516     }
517    
518     char **SwishStopWords(SWISH * sw, char *filename, int *numstops)
519     {
520     IndexFILE *indexf;
521    
522     indexf = sw->indexlist;
523     while (indexf) {
524     if (!strcasecmp(indexf->line, filename)) {
525     *numstops = indexf->header.stopPos;
526     return indexf->header.stopList;
527     }
528     }
529     *numstops = 0;
530     return NULL;
531     }
532    
533     char *SwishWords(SWISH * sw, char *filename, char c)
534     {
535     IndexFILE *indexf;
536    
537     indexf = sw->indexlist;
538     while (indexf) {
539     if (!strcasecmp(indexf->line, filename)) {
540     return getfilewords(sw, c, indexf);
541     }
542     }
543     return "";
544     }

  ViewVC Help
Powered by ViewVC 1.1.22