1 |
/* |
2 |
$Id: search.c,v 1.104 2002/08/22 22:58:39 whmoseley Exp $ |
3 |
** |
4 |
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company |
5 |
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 |
6 |
** |
7 |
** This program and library is free software; you can redistribute it and/or |
8 |
** modify it under the terms of the GNU (Library) General Public License |
9 |
** as published by the Free Software Foundation; either version 2 |
10 |
** of the License, or any later version. |
11 |
** |
12 |
** This program is distributed in the hope that it will be useful, |
13 |
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
** GNU (Library) General Public License for more details. |
16 |
** |
17 |
** You should have received a copy of the GNU (Library) General Public License |
18 |
** along with this program; if not, write to the Free Software |
19 |
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
20 |
**----------------------------------------------------------------- |
21 |
** Changes in expandstar and parseterm to fix the wildcard * problem. |
22 |
** G. Hill, ghill@library.berkeley.edu 3/11/97 |
23 |
** |
24 |
** Changes in notresultlist, parseterm, and fixnot to fix the NOT problem |
25 |
** G. Hill, ghill@library.berkeley.edu 3/13/97 |
26 |
** |
27 |
** Changes in search, parseterm, fixnot, operate, getfileinfo |
28 |
** to support METADATA |
29 |
** G. Hill 3/18/97 ghill@library.berkeley.edu |
30 |
** |
31 |
** Change in search to allow for search with a list including |
32 |
** also some empty indexes. |
33 |
** G. Hill after a suggestion by J. Winstead 12/18/97 |
34 |
** |
35 |
** Created countResults for number of hits in search |
36 |
** G. Hill 12/18/97 |
37 |
** |
38 |
** |
39 |
** Change in search to allow maxhits to return N number |
40 |
** of results for each index specified |
41 |
** D. Norris after suggestion by D. Chrisment 08/29/99 |
42 |
** |
43 |
** Created resultmaxhits as a global, renewable maxhits |
44 |
** D. Norris 08/29/99 |
45 |
** |
46 |
** added word length arg to Stem() call for strcat overflow checking in stemmer.c |
47 |
** added safestrcpy() macro to avoid corruption from strcpy overflow |
48 |
** SRE 11/17/99 |
49 |
** |
50 |
** 10/10/99 & 11/23/99 - Bill Moseley (merged by SRE) |
51 |
** - Changed to stem words *before* expanding with expandstar |
52 |
** so can find words in the index |
53 |
** - Moved META tag check before expandstar so META names don't get |
54 |
** expanded! |
55 |
** |
56 |
** fixed cast to int problems pointed out by "gcc -Wall" |
57 |
** SRE 2/22/00 |
58 |
** |
59 |
** fixed search() for case where stopword is followed by rule: |
60 |
** stopword was removed, rule was left, no matches ever found |
61 |
** added "# Stopwords removed:" to output header so caller can |
62 |
** trap actions of IGNORE_STOPWORDS_IN_QUERY |
63 |
** SRE 2/25/00 |
64 |
** |
65 |
** 04/00 - Jose Ruiz |
66 |
** Added code for phrase search |
67 |
** - New function phraseresultlists |
68 |
** - New function expandphrase |
69 |
** |
70 |
** 04/00 - Jose Ruiz |
71 |
** Added freeresult function for freing results memory |
72 |
** Also added changes to orresultlists andresultlists notresultlist |
73 |
** for freing memory |
74 |
** |
75 |
** 04/00 - Jose Ruiz |
76 |
** Now use bighash instead of hash for better performance in |
77 |
** orresultlist (a* or b*). Also changed hash.c |
78 |
** |
79 |
** 04/00 - Jose Ruiz |
80 |
** Function getfileinfo rewrite |
81 |
** - Now use a hash approach for faster searching |
82 |
** - Solves the long timed searches (a* or b* or c*) |
83 |
** |
84 |
** 04/00 - Jose Ruiz |
85 |
** Ordering of result rewrite |
86 |
** Now builtin C function qsort is used for faster ordering of results |
87 |
** This is useful when lots of results are found |
88 |
** For example: not (axf) -> This gives you all the documents!! |
89 |
** |
90 |
** 06/00 - Jose Ruiz |
91 |
** Rewrite of andresultlits and phraseresultlists for better permonace |
92 |
** New function notresultlits for better performance |
93 |
** |
94 |
** 07/00 and 08/00 - Jose Ruiz |
95 |
** Many modifications to make all search functions thread safe |
96 |
** |
97 |
** 08/00 - Added ascending and descending capabilities in results sorting |
98 |
** |
99 |
** 2001-02-xx rasc search call changed, tolower changed... |
100 |
** 2001-03-03 rasc altavista search, translatechar in headers |
101 |
** 2001-03-13 rasc definable logical operators via sw->SearchAlt->srch_op.[and|or|nor] |
102 |
** bugfix in parse_search_string handling... |
103 |
** 2001-03-14 rasc resultHeaderOutput -H <n> |
104 |
** |
105 |
** 2001-05-23 moseley - replace parse_search_string with new parser |
106 |
** |
107 |
*/ |
108 |
|
109 |
#include "swish.h" |
110 |
#include "mem.h" |
111 |
#include "string.h" |
112 |
#include "search.h" |
113 |
#include "index.h" |
114 |
#include "file.h" |
115 |
#include "list.h" |
116 |
#include "merge.h" |
117 |
#include "hash.h" |
118 |
#include "docprop.h" |
119 |
#include "error.h" |
120 |
#include "compress.h" |
121 |
/* removed stuff |
122 |
#include "deflate.h" |
123 |
*/ |
124 |
#include "metanames.h" |
125 |
#include "result_sort.h" |
126 |
#include "result_output.h" |
127 |
#include "search_alt.h" |
128 |
#include "db.h" |
129 |
#include "swish_words.h" |
130 |
#include "swish_qsort.h" |
131 |
|
132 |
#include "proplimit.h" |
133 |
|
134 |
#include "rank.h" |
135 |
|
136 |
|
137 |
/*** ??? $$$ fix this ****/ |
138 |
#define TOTAL_WORDS_FIX 1; |
139 |
/* |
140 |
-- init structures for this module |
141 |
*/ |
142 |
|
143 |
void initModule_Search (SWISH *sw) |
144 |
|
145 |
{ |
146 |
struct MOD_Search *srch; |
147 |
|
148 |
srch = (struct MOD_Search *) emalloc(sizeof(struct MOD_Search)); |
149 |
sw->Search = srch; |
150 |
|
151 |
|
152 |
/* Default variables for search */ |
153 |
srch->maxhits = -1; |
154 |
srch->beginhits = 0; |
155 |
|
156 |
srch->currentMaxPropertiesToDisplay = 0; |
157 |
srch->numPropertiesToDisplay = 0; |
158 |
|
159 |
srch->db_results = NULL; |
160 |
|
161 |
srch->propNameToDisplay =NULL; |
162 |
|
163 |
srch->PhraseDelimiter=PHRASE_DELIMITER_CHAR; |
164 |
|
165 |
srch->bigrank = 0; |
166 |
|
167 |
srch->resultSearchZone = Mem_ZoneCreate("resultSearch Zone", 0, 0); |
168 |
|
169 |
return; |
170 |
} |
171 |
|
172 |
/* Resets memory of vars used by ResultSortt properties configuration */ |
173 |
void resetModule_Search (SWISH *sw) |
174 |
|
175 |
{ |
176 |
struct DB_RESULTS *tmp; |
177 |
struct MOD_Search *srch = sw->Search; |
178 |
int i; |
179 |
IndexFILE *tmpindexlist; |
180 |
|
181 |
|
182 |
|
183 |
/* Default variables for search */ |
184 |
srch->maxhits = -1; |
185 |
srch->beginhits = 0; |
186 |
|
187 |
/* Free results from search if they exists */ |
188 |
|
189 |
while ( srch->db_results ) |
190 |
{ |
191 |
tmp = srch->db_results->next; |
192 |
freeresultlist(sw,srch->db_results); |
193 |
efree(srch->db_results); |
194 |
srch->db_results = tmp; |
195 |
} |
196 |
|
197 |
|
198 |
|
199 |
|
200 |
|
201 |
Mem_ZoneReset(srch->resultSearchZone); |
202 |
|
203 |
|
204 |
/* Free display props arrays */ |
205 |
/* First the common part to all the index files */ |
206 |
if (srch->propNameToDisplay) |
207 |
{ |
208 |
for(i=0;i<srch->numPropertiesToDisplay;i++) |
209 |
efree(srch->propNameToDisplay[i]); |
210 |
|
211 |
efree(srch->propNameToDisplay); |
212 |
} |
213 |
|
214 |
srch->propNameToDisplay=NULL; |
215 |
srch->numPropertiesToDisplay=0; |
216 |
srch->currentMaxPropertiesToDisplay=0; |
217 |
|
218 |
/* Now the IDs of each index file */ |
219 |
for(tmpindexlist=sw->indexlist;tmpindexlist;tmpindexlist=tmpindexlist->next) |
220 |
{ |
221 |
if (tmpindexlist->propIDToDisplay) |
222 |
efree(tmpindexlist->propIDToDisplay); |
223 |
|
224 |
tmpindexlist->propIDToDisplay=NULL; |
225 |
} |
226 |
} |
227 |
|
228 |
/* |
229 |
-- release all wired memory for this module |
230 |
*/ |
231 |
|
232 |
void freeModule_Search (SWISH *sw) |
233 |
|
234 |
{ |
235 |
struct MOD_Search *srch = sw->Search; |
236 |
|
237 |
/* free module data */ |
238 |
Mem_ZoneFree(&srch->resultSearchZone); |
239 |
|
240 |
efree (srch); |
241 |
sw->Search = NULL; |
242 |
|
243 |
return; |
244 |
} |
245 |
|
246 |
// #define DUMP_RESULTS 1 |
247 |
|
248 |
|
249 |
#ifdef DUMP_RESULTS |
250 |
|
251 |
void dump_result_lists( SWISH *sw, char *message ) |
252 |
|
253 |
{ |
254 |
struct DB_RESULTS *db_results = sw->Search->db_results; |
255 |
int cnt = 0; |
256 |
|
257 |
printf("\nDump Results: (%s)\n", message ); |
258 |
|
259 |
while ( db_results ) |
260 |
{ |
261 |
RESULT *result; |
262 |
printf("Looking at index\n"); |
263 |
if ( !db_results->resultlist ) |
264 |
{ |
265 |
printf("no resultlist\n"); |
266 |
continue; |
267 |
} |
268 |
result = db_results->resultlist->head; |
269 |
|
270 |
if ( !result ) |
271 |
{ |
272 |
printf("resultlist, but head is null\n"); |
273 |
continue; |
274 |
} |
275 |
while ( result ) |
276 |
{ |
277 |
printf("Result (%2d): filenum '%d' from index file '%s'\n", ++cnt, result->filenum, result->indexf->line ); |
278 |
result = result->next; |
279 |
} |
280 |
|
281 |
printf("end of results for index\n"); |
282 |
|
283 |
db_results = db_results->next; |
284 |
} |
285 |
printf("end of results\n\n"); |
286 |
} |
287 |
#endif |
288 |
|
289 |
|
290 |
|
291 |
|
292 |
/* |
293 |
** ---------------------------------------------- |
294 |
** |
295 |
** Module config code starts here |
296 |
** |
297 |
** ---------------------------------------------- |
298 |
*/ |
299 |
|
300 |
|
301 |
/* |
302 |
-- Config Directives |
303 |
-- Configuration directives for this Module |
304 |
-- return: 0/1 = none/config applied |
305 |
*/ |
306 |
|
307 |
int configModule_Search (SWISH *sw, StringList *sl) |
308 |
|
309 |
{ |
310 |
//struct MOD_Search *srch = sw->Search; |
311 |
//char *w0 = sl->word[0]; |
312 |
int retval = 0; |
313 |
|
314 |
return retval; |
315 |
} |
316 |
|
317 |
|
318 |
/* Compare two integers */ |
319 |
/* This routine is used by qsort */ |
320 |
int icomp(const void *s1, const void *s2) |
321 |
{ |
322 |
return (*(int *) s1 - *(int *) s2); |
323 |
} |
324 |
|
325 |
/* Compare two positions as stored in posdata */ |
326 |
/* This routine is used by qsort */ |
327 |
int icomp_posdata(const void *s1, const void *s2) |
328 |
{ |
329 |
return (GET_POSITION(*(int *) s1) - GET_POSITION(*(int *) s2)); |
330 |
} |
331 |
|
332 |
/* 01/2001 Jose Ruiz */ |
333 |
/* Compare RESULTS using RANK */ |
334 |
/* This routine is used by qsort */ |
335 |
int compResultsByFileNum(const void *s1, const void *s2) |
336 |
{ |
337 |
return ((*(RESULT * const *) s1)->filenum - (*(RESULT * const *) s2)->filenum); |
338 |
} |
339 |
|
340 |
/* 04/00 Jose Ruiz */ |
341 |
/* Simple routing for comparing pointers to integers in order to |
342 |
get an ascending sort with qsort */ |
343 |
/* Identical to previous one but use two integers per array */ |
344 |
int icomp2(const void *s1, const void *s2) |
345 |
{ |
346 |
int rc, |
347 |
*p1, |
348 |
*p2; |
349 |
|
350 |
rc = (*(int *) s1 - *(int *) s2); |
351 |
if (rc) |
352 |
return (rc); |
353 |
else |
354 |
{ |
355 |
p1 = (int *) s1; |
356 |
p2 = (int *) s2; |
357 |
return (*(++p1) - *(++p2)); |
358 |
} |
359 |
} |
360 |
|
361 |
|
362 |
|
363 |
/* |
364 |
-- Search Swish |
365 |
-- Check if AltaVista/Lycos/Web.de like search string has to be converted |
366 |
-- and call swish search... |
367 |
-- 2001-03-02 rasc |
368 |
*/ |
369 |
|
370 |
int search(SWISH * sw, char *words, int structure) |
371 |
{ |
372 |
char *sw_srch_str; |
373 |
int ret; |
374 |
|
375 |
|
376 |
if (sw->SearchAlt->enableAltSearchSyntax) |
377 |
{ /* AltaVista like search enabled? */ |
378 |
sw_srch_str = convAltSearch2SwishStr(words); |
379 |
ret = search_2(sw, sw_srch_str, structure); |
380 |
efree(sw_srch_str); |
381 |
} |
382 |
else |
383 |
{ |
384 |
ret = search_2(sw, words, structure); |
385 |
} |
386 |
return ret; |
387 |
} |
388 |
|
389 |
|
390 |
static void limit_result_list( SWISH *sw ) |
391 |
{ |
392 |
RESULT *rtmp; |
393 |
RESULT *rp; |
394 |
RESULT *last; |
395 |
struct DB_RESULTS *db_results = sw->Search->db_results; |
396 |
|
397 |
|
398 |
/* Process each index */ |
399 |
while ( db_results ) |
400 |
{ |
401 |
if(db_results->resultlist) |
402 |
rp = db_results->resultlist->head; |
403 |
else |
404 |
rp = NULL; |
405 |
|
406 |
last = NULL; |
407 |
|
408 |
while (rp) |
409 |
{ |
410 |
rtmp = rp->next; |
411 |
|
412 |
if ( LimitByProperty( sw, rp->indexf, rp->filenum ) ) |
413 |
{ |
414 |
freeresult( sw, rp ); |
415 |
|
416 |
if ( !last ) /* if first in list */ |
417 |
db_results->resultlist->head = rtmp; |
418 |
else |
419 |
last->next = rtmp; |
420 |
} |
421 |
else |
422 |
last = rp; /* move the last pointer to current one */ |
423 |
|
424 |
rp = rtmp; |
425 |
} |
426 |
|
427 |
db_results = db_results->next; |
428 |
|
429 |
} |
430 |
} |
431 |
|
432 |
|
433 |
|
434 |
|
435 |
/******************************************************************************** |
436 |
* Returns a negative number on error, zero or positive = number of results |
437 |
* |
438 |
********************************************************************************/ |
439 |
|
440 |
int search_2(SWISH * sw, char *words, int structure) |
441 |
{ |
442 |
int j, |
443 |
k, |
444 |
hassearch, |
445 |
indexYes, |
446 |
totalResults; |
447 |
struct swline *searchwordlist, |
448 |
*tmplist2; |
449 |
IndexFILE *indexlist; |
450 |
int rc = 0; |
451 |
unsigned char PhraseDelimiter; |
452 |
struct DB_RESULTS *db_results, |
453 |
*db_tmp; |
454 |
struct MOD_Search *srch = sw->Search; |
455 |
|
456 |
|
457 |
/* If not words - do nothing */ |
458 |
if (!words || !*words) |
459 |
return (sw->lasterror = NO_WORDS_IN_SEARCH); |
460 |
|
461 |
PhraseDelimiter = (unsigned char) srch->PhraseDelimiter; |
462 |
|
463 |
indexlist = sw->indexlist; |
464 |
sw->Search->db_results = NULL; |
465 |
|
466 |
|
467 |
j = 0; |
468 |
searchwordlist = NULL; |
469 |
indexYes = 0; |
470 |
totalResults = 0; |
471 |
hassearch = 0; |
472 |
|
473 |
sw->lasterror = RC_OK; |
474 |
sw->commonerror = RC_OK; |
475 |
|
476 |
if ((rc = initSearchResultProperties(sw))) |
477 |
return rc; |
478 |
if ((rc = initSortResultProperties(sw))) |
479 |
return rc; |
480 |
|
481 |
|
482 |
/* This returns false when no files found within the limit */ |
483 |
if ( !Prepare_PropLookup( sw ) ) |
484 |
return sw->lasterror; /* normally RC_OK (no results), but could be an error */ |
485 |
|
486 |
while (indexlist != NULL) |
487 |
{ |
488 |
/* moved these inside the loop Nov 24,2001. Need to verify */ |
489 |
sw->lasterror = RC_OK; |
490 |
sw->commonerror = RC_OK; |
491 |
|
492 |
|
493 |
/* Clean up from previous loop */ |
494 |
if (searchwordlist) |
495 |
{ |
496 |
freeswline(searchwordlist); |
497 |
searchwordlist = NULL; |
498 |
} |
499 |
|
500 |
|
501 |
|
502 |
/* tokenize the query into swish words based on this current index */ |
503 |
|
504 |
if (!(searchwordlist = tokenize_query_string(sw, words, &indexlist->header))) |
505 |
{ |
506 |
indexlist = indexlist->next; |
507 |
if ( sw->lasterror ) |
508 |
return sw->lasterror; |
509 |
continue; |
510 |
} |
511 |
|
512 |
|
513 |
hassearch = 1; // Flag that we found some words to search form |
514 |
|
515 |
srch->bigrank = 0; |
516 |
|
517 |
|
518 |
/* Is there an open index? */ |
519 |
if (!indexlist->DB) |
520 |
{ |
521 |
if (searchwordlist) |
522 |
freeswline(searchwordlist); /* 2001-03-13 rasc */ |
523 |
return sw->lasterror; |
524 |
} |
525 |
|
526 |
|
527 |
/* Any files in the index? */ |
528 |
|
529 |
if (!indexlist->header.totalfiles) |
530 |
{ |
531 |
indexlist = indexlist->next; |
532 |
continue; |
533 |
} |
534 |
else |
535 |
{ |
536 |
indexYes = 1; /*There is a non-empty index */ |
537 |
} |
538 |
|
539 |
|
540 |
resultHeaderOut(sw, 2, "#\n# Index File: %s\n", indexlist->line); |
541 |
|
542 |
searchwordlist = (struct swline *) ignore_words_in_query(sw, indexlist, searchwordlist, PhraseDelimiter); |
543 |
if ( sw->lasterror ) |
544 |
{ |
545 |
if (searchwordlist) |
546 |
freeswline(searchwordlist); |
547 |
return sw->lasterror; |
548 |
} |
549 |
|
550 |
|
551 |
/* Echo index file, fixed search, stopwords */ |
552 |
|
553 |
/* Result Header Output (2001-03-14 rasc, rewritten) */ |
554 |
|
555 |
resultPrintHeader(sw, 2, &indexlist->header, indexlist->header.savedasheader, 0); |
556 |
|
557 |
|
558 |
resultHeaderOut(sw, 3, "# StopWords:"); |
559 |
for (k = 0; k < indexlist->header.stopPos; k++) |
560 |
resultHeaderOut(sw, 3, " %s", indexlist->header.stopList[k]); |
561 |
|
562 |
resultHeaderOut(sw, 3, "\n"); |
563 |
|
564 |
|
565 |
|
566 |
printheaderbuzzwords(sw,indexlist); |
567 |
|
568 |
|
569 |
resultHeaderOut(sw, 2, "# Search Words: %s\n", words); |
570 |
|
571 |
tmplist2 = searchwordlist; |
572 |
|
573 |
if ( !tmplist2 ) |
574 |
sw->commonerror++; // Flag at the query (with this index) did no have any search words |
575 |
else |
576 |
{ |
577 |
resultHeaderOut(sw, 2, "# Parsed Words: "); |
578 |
|
579 |
while (tmplist2) |
580 |
{ |
581 |
resultHeaderOut(sw, 2, "%s ", tmplist2->line); |
582 |
tmplist2 = tmplist2->next; |
583 |
} |
584 |
|
585 |
resultHeaderOut(sw, 2, "\n"); |
586 |
} |
587 |
|
588 |
|
589 |
/* Expand phrase search: "kim harlow" becomes (kim PHRASE_WORD harlow) */ |
590 |
searchwordlist = (struct swline *) expandphrase(searchwordlist, PhraseDelimiter); |
591 |
searchwordlist = (struct swline *) fixmetanames(searchwordlist); |
592 |
searchwordlist = (struct swline *) fixnot1(searchwordlist); |
593 |
searchwordlist = (struct swline *) fixnot2(searchwordlist); |
594 |
|
595 |
/* Allocate memory for the result list structure */ |
596 |
db_results = (struct DB_RESULTS *) emalloc(sizeof(struct DB_RESULTS)); |
597 |
memset( db_results, 0, sizeof(struct DB_RESULTS)); |
598 |
|
599 |
|
600 |
/* Now do the search */ |
601 |
|
602 |
if (searchwordlist) |
603 |
{ |
604 |
int metaID = 1; /* Default meta ID to search */ |
605 |
|
606 |
tmplist2 = searchwordlist; |
607 |
db_results->resultlist = (RESULT_LIST *) parseterm(sw, 0, metaID, structure, indexlist, &tmplist2); |
608 |
} |
609 |
|
610 |
|
611 |
/* add db_results to the list of results */ |
612 |
|
613 |
if ( !sw->Search->db_results) |
614 |
sw->Search->db_results = db_results; |
615 |
else |
616 |
{ |
617 |
db_tmp = sw->Search->db_results; |
618 |
while (db_tmp) |
619 |
{ |
620 |
if (!db_tmp->next) |
621 |
{ |
622 |
db_tmp->next = db_results; |
623 |
break; |
624 |
} |
625 |
db_tmp = db_tmp->next; |
626 |
} |
627 |
|
628 |
} |
629 |
|
630 |
|
631 |
indexlist = indexlist->next; |
632 |
|
633 |
} /* end process each index file */ |
634 |
|
635 |
|
636 |
/* Did any of the indexes have a query? */ |
637 |
|
638 |
if (!hassearch) |
639 |
return (sw->lasterror = NO_WORDS_IN_SEARCH); |
640 |
|
641 |
|
642 |
|
643 |
if (searchwordlist) |
644 |
{ |
645 |
freeswline(searchwordlist); |
646 |
searchwordlist = NULL; |
647 |
} |
648 |
|
649 |
|
650 |
/* Limit result list by -L parameter */ |
651 |
// note -- this used to be in addtoresultlist, but that was checking every word |
652 |
// placing here means that it only checks for each file, instead of each word. |
653 |
|
654 |
if ( is_prop_limit_used( sw ) ) |
655 |
limit_result_list( sw ); |
656 |
|
657 |
#ifdef DUMP_RESULTS |
658 |
dump_result_lists( sw, "After search" ); |
659 |
#endif |
660 |
|
661 |
|
662 |
/* 04/00 Jose Ruiz - Sort results by rank or by properties */ |
663 |
|
664 |
totalResults = sortresults(sw, structure); |
665 |
|
666 |
|
667 |
if (!totalResults && sw->commonerror) |
668 |
return (sw->lasterror = WORDS_TOO_COMMON); |
669 |
|
670 |
if (!totalResults && !indexYes) |
671 |
return (sw->lasterror = INDEX_FILE_IS_EMPTY); |
672 |
|
673 |
return totalResults; |
674 |
} |
675 |
|
676 |
/* 2001-09 jmruiz - Rewriting |
677 |
** This puts parentheses in the right places around meta searches |
678 |
** to avoid problems whith them. Basically "metaname = bla" |
679 |
** becomes "(metanames = bla)" */ |
680 |
|
681 |
struct swline *fixmetanames(struct swline *sp) |
682 |
{ |
683 |
int metapar; |
684 |
struct swline *tmpp, |
685 |
*newp; |
686 |
|
687 |
tmpp = sp; |
688 |
newp = NULL; |
689 |
|
690 |
/* Fix metanames with parenthesys eg: metaname = bla => (metanames = bla) */ |
691 |
while (tmpp != NULL) |
692 |
{ |
693 |
if (isMetaNameOpNext(tmpp->next)) |
694 |
{ |
695 |
/* If it is a metaName add the name and = and skip to next */ |
696 |
newp = (struct swline *) addswline(newp, "("); |
697 |
newp = (struct swline *) addswline(newp, tmpp->line); |
698 |
newp = (struct swline *) addswline(newp, "="); |
699 |
tmpp = tmpp->next; |
700 |
tmpp = tmpp->next; |
701 |
if ( !tmpp ) |
702 |
return NULL; |
703 |
|
704 |
/* 06/00 Jose Ruiz |
705 |
** Fix to consider parenthesys in the |
706 |
** content of a MetaName */ |
707 |
if (tmpp->line[0] == '(') |
708 |
{ |
709 |
metapar = 1; |
710 |
newp = (struct swline *) addswline(newp, tmpp->line); |
711 |
tmpp = tmpp->next; |
712 |
while (metapar && tmpp) |
713 |
{ |
714 |
if (tmpp->line[0] == '(') |
715 |
metapar++; |
716 |
else if (tmpp->line[0] == ')') |
717 |
metapar--; |
718 |
newp = (struct swline *) addswline(newp, tmpp->line); |
719 |
if (metapar) |
720 |
tmpp = tmpp->next; |
721 |
} |
722 |
if (!tmpp) |
723 |
return (newp); |
724 |
} |
725 |
else |
726 |
newp = (struct swline *) addswline(newp, tmpp->line); |
727 |
newp = (struct swline *) addswline(newp, ")"); |
728 |
} |
729 |
else |
730 |
newp = (struct swline *) addswline(newp, tmpp->line); |
731 |
/* next one */ |
732 |
tmpp = tmpp->next; |
733 |
} |
734 |
|
735 |
freeswline(sp); |
736 |
return newp; |
737 |
} |
738 |
|
739 |
/* 2001 -09 jmruiz Rewritten |
740 |
** This optimizes some NOT operator to be faster. |
741 |
** |
742 |
** "word1 not word" is changed by "word1 and_not word2" |
743 |
** |
744 |
** In the old way the previous query was... |
745 |
** get results if word1 |
746 |
** get results of word2 |
747 |
** not results of word2 (If we have 100000 docs and word2 is in |
748 |
** just 3 docs, this means read 99997 |
749 |
** results) |
750 |
** intersect both list of results |
751 |
** |
752 |
** The "new way" |
753 |
** get results if word1 |
754 |
** get results of word2 |
755 |
** intersect (and_not_rule) both lists of results |
756 |
** |
757 |
*/ |
758 |
|
759 |
struct swline *fixnot1(struct swline *sp) |
760 |
{ |
761 |
struct swline *tmpp, |
762 |
*prev; |
763 |
|
764 |
if (!sp) |
765 |
return NULL; |
766 |
/* 06/00 Jose Ruiz - Check if first word is NOT_RULE */ |
767 |
/* Change remaining NOT by AND_NOT_RULE */ |
768 |
for (tmpp = sp, prev = NULL; tmpp; prev = tmpp, tmpp = tmpp->next) |
769 |
{ |
770 |
if (tmpp->line[0] == '(') |
771 |
continue; |
772 |
else if (isnotrule(tmpp->line)) |
773 |
{ |
774 |
if(prev && prev->line[0]!='=' && prev->line[0]!='(') |
775 |
{ |
776 |
efree(tmpp->line); |
777 |
tmpp->line = estrdup(AND_NOT_WORD); |
778 |
} |
779 |
} |
780 |
} |
781 |
return sp; |
782 |
} |
783 |
|
784 |
/* 2001 -09 jmruiz - Totally new - Fix the meta=(not ahsg) bug |
785 |
** Add parentheses to avoid the way operator NOT confuse complex queries */ |
786 |
struct swline *fixnot2(struct swline *sp) |
787 |
{ |
788 |
int openparen, found; |
789 |
struct swline *tmpp, *newp; |
790 |
char *magic = "<__not__>"; /* magic avoids parsing the |
791 |
** "not" operator twice |
792 |
** and put the code in an |
793 |
** endless loop */ |
794 |
|
795 |
found = 1; |
796 |
while(found) |
797 |
{ |
798 |
openparen = 0; |
799 |
found = 0; |
800 |
|
801 |
for (tmpp = sp , newp = NULL; tmpp ; tmpp = tmpp->next) |
802 |
{ |
803 |
if (isnotrule(tmpp->line)) |
804 |
{ |
805 |
found = 1; |
806 |
/* Add parentheses */ |
807 |
newp = (struct swline *) addswline(newp, "("); |
808 |
/* Change "NOT" by magic to avoid find it in next iteration */ |
809 |
newp = (struct swline *) addswline(newp, magic); |
810 |
for(tmpp = tmpp->next; tmpp; tmpp = tmpp->next) |
811 |
{ |
812 |
if ((tmpp->line)[0] == '(') |
813 |
openparen++; |
814 |
else if(!openparen) |
815 |
{ |
816 |
newp = (struct swline *) addswline(newp, tmpp->line); |
817 |
/* Add parentheses */ |
818 |
newp = (struct swline *) addswline(newp, ")"); |
819 |
break; |
820 |
} |
821 |
else if ((tmpp->line)[0] == ')') |
822 |
openparen--; |
823 |
newp = (struct swline *) addswline(newp, tmpp->line); |
824 |
} |
825 |
if(!tmpp) |
826 |
break; |
827 |
} |
828 |
else |
829 |
newp = (struct swline *) addswline(newp, tmpp->line); |
830 |
} |
831 |
freeswline(sp); |
832 |
sp = newp; |
833 |
} |
834 |
|
835 |
/* remove magic and put the "real" NOT in place */ |
836 |
for(tmpp = newp; tmpp ; tmpp = tmpp->next) |
837 |
{ |
838 |
if(!strcmp(tmpp->line,magic)) |
839 |
{ |
840 |
efree(tmpp->line); |
841 |
tmpp->line = estrdup(NOT_WORD); |
842 |
} |
843 |
} |
844 |
|
845 |
return newp; |
846 |
} |
847 |
|
848 |
/* expandstar removed - Jose Ruiz 04/00 */ |
849 |
|
850 |
/* Expands phrase search. Berkeley University becomes Berkeley PHRASE_WORD University */ |
851 |
/* It also fixes the and, not or problem when they appeared inside a phrase */ |
852 |
struct swline *expandphrase(struct swline *sp, char delimiter) |
853 |
{ |
854 |
struct swline *tmp, |
855 |
*newp; |
856 |
int inphrase; |
857 |
|
858 |
if (!sp) |
859 |
return NULL; |
860 |
inphrase = 0; |
861 |
newp = NULL; |
862 |
tmp = sp; |
863 |
while (tmp != NULL) |
864 |
{ |
865 |
if ((tmp->line)[0] == delimiter) |
866 |
{ |
867 |
if (inphrase) |
868 |
{ |
869 |
inphrase = 0; |
870 |
newp = (struct swline *) addswline(newp, ")"); |
871 |
} |
872 |
else |
873 |
{ |
874 |
inphrase++; |
875 |
newp = (struct swline *) addswline(newp, "("); |
876 |
} |
877 |
} |
878 |
else |
879 |
{ |
880 |
if (inphrase) |
881 |
{ |
882 |
if (inphrase > 1) |
883 |
newp = (struct swline *) addswline(newp, PHRASE_WORD); |
884 |
inphrase++; |
885 |
newp = (struct swline *) addswline(newp, tmp->line); |
886 |
} |
887 |
else |
888 |
{ |
889 |
char *operator; |
890 |
|
891 |
if ( ( operator = isBooleanOperatorWord( tmp->line )) ) |
892 |
newp = (struct swline *) addswline(newp, operator); |
893 |
else |
894 |
newp = (struct swline *) addswline(newp, tmp->line); |
895 |
} |
896 |
} |
897 |
tmp = tmp->next; |
898 |
} |
899 |
freeswline(sp); |
900 |
return newp; |
901 |
} |
902 |
|
903 |
|
904 |
|
905 |
/* Print the buzzwords */ |
906 |
void printheaderbuzzwords(SWISH *sw, IndexFILE * indexf) |
907 |
{ |
908 |
int hashval; |
909 |
struct swline *sp = NULL; |
910 |
|
911 |
resultHeaderOut(sw, 3, "# BuzzWords:"); |
912 |
|
913 |
for (hashval = 0; hashval < HASHSIZE; hashval++) |
914 |
{ |
915 |
sp = indexf->header.hashbuzzwordlist[hashval]; |
916 |
while (sp != NULL) |
917 |
{ |
918 |
resultHeaderOut(sw, 3, " %s", sp->line); |
919 |
sp = sp->next; |
920 |
} |
921 |
} |
922 |
resultHeaderOut(sw, 3, "\n"); |
923 |
} |
924 |
|
925 |
|
926 |
|
927 |
|
928 |
|
929 |
|
930 |
/* The recursive parsing function. |
931 |
** This was a headache to make but ended up being surprisingly easy. :) |
932 |
** parseone tells the function to only operate on one word or term. |
933 |
** parseone is needed so that with metaA=foo bar "bar" is searched |
934 |
** with the default metaname. |
935 |
*/ |
936 |
|
937 |
RESULT_LIST *parseterm(SWISH * sw, int parseone, int metaID, int structure, IndexFILE * indexf, struct swline **searchwordlist) |
938 |
{ |
939 |
int rulenum; |
940 |
char *word; |
941 |
int lenword; |
942 |
RESULT_LIST *l_rp, |
943 |
*new_l_rp; |
944 |
|
945 |
/* |
946 |
* The andLevel is used to help keep the ranking function honest |
947 |
* when it ANDs the results of the latest search term with |
948 |
* the results so far (rp). The idea is that if you AND three |
949 |
* words together you ultimately want to resulting rank to |
950 |
* be the average of all three individual work ranks. By keeping |
951 |
* a running total of the number of terms already ANDed, the |
952 |
* next AND operation can properly scale the average-rank-so-far |
953 |
* and recompute the new average properly (see andresultlists()). |
954 |
* This implementation is a little weak in that it will not average |
955 |
* across terms that are in parenthesis. (It treats an () expression |
956 |
* as one term, and weights it as "one".) |
957 |
*/ |
958 |
int andLevel = 0; /* number of terms ANDed so far */ |
959 |
|
960 |
word = NULL; |
961 |
lenword = 0; |
962 |
|
963 |
l_rp = NULL; |
964 |
|
965 |
rulenum = OR_RULE; |
966 |
while (*searchwordlist) |
967 |
{ |
968 |
|
969 |
word = SafeStrCopy(word, (*searchwordlist)->line, &lenword); |
970 |
|
971 |
|
972 |
if (rulenum == NO_RULE) |
973 |
rulenum = sw->SearchAlt->srch_op.defaultrule; |
974 |
|
975 |
|
976 |
if (isunaryrule(word)) /* is it a NOT? */ |
977 |
{ |
978 |
*searchwordlist = (*searchwordlist)->next; |
979 |
l_rp = (RESULT_LIST *) parseterm(sw, 1, metaID, structure, indexf, searchwordlist); |
980 |
l_rp = (RESULT_LIST *) notresultlist(sw, l_rp, indexf); |
981 |
|
982 |
/* Wild goose chase */ |
983 |
rulenum = NO_RULE; |
984 |
continue; |
985 |
} |
986 |
|
987 |
|
988 |
/* If it's an operator, set the current rulenum, and continue */ |
989 |
else if (isbooleanrule(word)) |
990 |
{ |
991 |
rulenum = getrulenum(word); |
992 |
*searchwordlist = (*searchwordlist)->next; |
993 |
continue; |
994 |
} |
995 |
|
996 |
|
997 |
/* Bump up the count of AND terms for this level */ |
998 |
|
999 |
if (rulenum != AND_RULE) |
1000 |
andLevel = 0; /* reset */ |
1001 |
else if (rulenum == AND_RULE) |
1002 |
andLevel++; |
1003 |
|
1004 |
|
1005 |
|
1006 |
/* Is this the start of a sub-query? */ |
1007 |
|
1008 |
if (word[0] == '(') |
1009 |
{ |
1010 |
|
1011 |
|
1012 |
/* Recurse */ |
1013 |
*searchwordlist = (*searchwordlist)->next; |
1014 |
new_l_rp = (RESULT_LIST *) parseterm(sw, 0, metaID, structure, indexf, searchwordlist); |
1015 |
|
1016 |
|
1017 |
if (rulenum == AND_RULE) |
1018 |
l_rp = (RESULT_LIST *) andresultlists(sw, l_rp, new_l_rp, andLevel); |
1019 |
|
1020 |
else if (rulenum == OR_RULE) |
1021 |
l_rp = (RESULT_LIST *) orresultlists(sw, l_rp, new_l_rp); |
1022 |
|
1023 |
else if (rulenum == PHRASE_RULE) |
1024 |
l_rp = (RESULT_LIST *) phraseresultlists(sw, l_rp, new_l_rp, 1); |
1025 |
|
1026 |
else if (rulenum == AND_NOT_RULE) |
1027 |
l_rp = (RESULT_LIST *) notresultlists(sw, l_rp, new_l_rp); |
1028 |
|
1029 |
if (!*searchwordlist) |
1030 |
break; |
1031 |
|
1032 |
rulenum = NO_RULE; |
1033 |
continue; |
1034 |
|
1035 |
} |
1036 |
else if (word[0] == ')') |
1037 |
{ |
1038 |
*searchwordlist = (*searchwordlist)->next; |
1039 |
break; |
1040 |
} |
1041 |
|
1042 |
|
1043 |
/* Now down to checking for metanames and actual search words */ |
1044 |
|
1045 |
|
1046 |
/* Check if the next word is '=' */ |
1047 |
if (isMetaNameOpNext((*searchwordlist)->next)) |
1048 |
{ |
1049 |
struct metaEntry *m = getMetaNameByName(&indexf->header, word); |
1050 |
|
1051 |
/* shouldn't happen since already checked */ |
1052 |
if ( !m ) |
1053 |
progerr("Unknown metaname '%s' -- swish_words failed to find.", word ); |
1054 |
|
1055 |
metaID = m->metaID; |
1056 |
|
1057 |
|
1058 |
/* Skip both the metaName end the '=' */ |
1059 |
*searchwordlist = (*searchwordlist)->next->next; |
1060 |
|
1061 |
|
1062 |
if ((*searchwordlist) && ((*searchwordlist)->line[0] == '(')) |
1063 |
{ |
1064 |
*searchwordlist = (*searchwordlist)->next; |
1065 |
parseone = 0; |
1066 |
} |
1067 |
else |
1068 |
parseone = 1; |
1069 |
|
1070 |
/* Now recursively process the next terms */ |
1071 |
|
1072 |
new_l_rp = (RESULT_LIST *) parseterm(sw, parseone, metaID, structure, indexf, searchwordlist); |
1073 |
if (rulenum == AND_RULE) |
1074 |
l_rp = (RESULT_LIST *) andresultlists(sw, l_rp, new_l_rp, andLevel); |
1075 |
|
1076 |
else if (rulenum == OR_RULE) |
1077 |
l_rp = (RESULT_LIST *) orresultlists(sw, l_rp, new_l_rp); |
1078 |
|
1079 |
else if (rulenum == PHRASE_RULE) |
1080 |
l_rp = (RESULT_LIST *) phraseresultlists(sw, l_rp, new_l_rp, 1); |
1081 |
|
1082 |
else if (rulenum == AND_NOT_RULE) |
1083 |
l_rp = (RESULT_LIST *) notresultlists(sw, l_rp, new_l_rp); |
1084 |
|
1085 |
if (!*searchwordlist) |
1086 |
break; |
1087 |
|
1088 |
rulenum = NO_RULE; |
1089 |
metaID = 1; |
1090 |
continue; |
1091 |
} |
1092 |
|
1093 |
|
1094 |
/* Finally, look up a word, and merge with previous results. */ |
1095 |
|
1096 |
l_rp = (RESULT_LIST *) operate(sw, l_rp, rulenum, word, indexf->DB, metaID, structure, andLevel, indexf); |
1097 |
|
1098 |
if (parseone) |
1099 |
{ |
1100 |
*searchwordlist = (*searchwordlist)->next; |
1101 |
break; |
1102 |
} |
1103 |
rulenum = NO_RULE; |
1104 |
|
1105 |
*searchwordlist = (*searchwordlist)->next; |
1106 |
} |
1107 |
|
1108 |
if (lenword) |
1109 |
efree(word); |
1110 |
|
1111 |
return l_rp; |
1112 |
} |
1113 |
|
1114 |
/* Looks up a word in the index file - |
1115 |
** it calls getfileinfo(), which does the real searching. |
1116 |
*/ |
1117 |
|
1118 |
RESULT_LIST *operate(SWISH * sw, RESULT_LIST * l_rp, int rulenum, char *wordin, void *DB, int metaID, int structure, int andLevel, IndexFILE * indexf) |
1119 |
{ |
1120 |
RESULT_LIST *new_l_rp, |
1121 |
*return_l_rp; |
1122 |
char *word; |
1123 |
int lenword; |
1124 |
|
1125 |
word = estrdup(wordin); |
1126 |
lenword = strlen(word); |
1127 |
|
1128 |
new_l_rp = return_l_rp = NULL; |
1129 |
|
1130 |
|
1131 |
/* ??? Stopwords have already been removed at this point. */ |
1132 |
/***** |
1133 |
if (isstopword(&indexf->header, word) && !isrule(word)) |
1134 |
{ |
1135 |
if (rulenum == OR_RULE && l_rp != NULL) |
1136 |
return l_rp; |
1137 |
else |
1138 |
sw->commonerror = 1; |
1139 |
} |
1140 |
******/ |
1141 |
|
1142 |
|
1143 |
/* ??? Ever a time when rulenum is not one of these? */ |
1144 |
|
1145 |
if (rulenum == AND_RULE) |
1146 |
{ |
1147 |
new_l_rp = (RESULT_LIST *) getfileinfo(sw, word, indexf, metaID, structure); |
1148 |
return_l_rp = (RESULT_LIST *) andresultlists(sw, l_rp, new_l_rp, andLevel); |
1149 |
} |
1150 |
|
1151 |
|
1152 |
else if (rulenum == OR_RULE) |
1153 |
{ |
1154 |
new_l_rp = (RESULT_LIST *) getfileinfo(sw, word, indexf, metaID, structure); |
1155 |
return_l_rp = (RESULT_LIST *) orresultlists(sw, l_rp, new_l_rp); |
1156 |
} |
1157 |
|
1158 |
|
1159 |
else if (rulenum == NOT_RULE) |
1160 |
{ |
1161 |
new_l_rp = (RESULT_LIST *) getfileinfo(sw, word, indexf, metaID, structure); |
1162 |
return_l_rp = (RESULT_LIST *) notresultlist(sw, new_l_rp, indexf); |
1163 |
} |
1164 |
|
1165 |
|
1166 |
else if (rulenum == PHRASE_RULE) |
1167 |
{ |
1168 |
new_l_rp = (RESULT_LIST *) getfileinfo(sw, word, indexf, metaID, structure); |
1169 |
return_l_rp = (RESULT_LIST *) phraseresultlists(sw, l_rp, new_l_rp, 1); |
1170 |
} |
1171 |
|
1172 |
|
1173 |
else if (rulenum == AND_NOT_RULE) |
1174 |
{ |
1175 |
new_l_rp = (RESULT_LIST *) getfileinfo(sw, word, indexf, metaID, structure); |
1176 |
return_l_rp = (RESULT_LIST *) notresultlists(sw, l_rp, new_l_rp); |
1177 |
} |
1178 |
|
1179 |
|
1180 |
efree(word); |
1181 |
return return_l_rp; |
1182 |
} |
1183 |
|
1184 |
|
1185 |
static RESULT_LIST *newResultsList(SWISH *sw) |
1186 |
{ |
1187 |
RESULT_LIST *l = (RESULT_LIST *)Mem_ZoneAlloc(sw->Search->resultSearchZone, sizeof(RESULT_LIST)); |
1188 |
l->head = l->tail = NULL; |
1189 |
l->sw = (struct SWISH *)sw; |
1190 |
|
1191 |
return l; |
1192 |
} |
1193 |
|
1194 |
void addResultToList(RESULT_LIST *l_r, RESULT *r) |
1195 |
{ |
1196 |
r->next = NULL; |
1197 |
|
1198 |
r->reslist = l_r; |
1199 |
|
1200 |
if(!l_r->head) |
1201 |
l_r->head = r; |
1202 |
if(l_r->tail) |
1203 |
l_r->tail->next = r; |
1204 |
l_r->tail = r; |
1205 |
|
1206 |
} |
1207 |
|
1208 |
|
1209 |
/* Routine to test structure in a result */ |
1210 |
/* Also removes posdata that do not fit with structure field */ |
1211 |
int test_structure(int structure, int frequency, int *posdata) |
1212 |
{ |
1213 |
int i, j; /* i -> counter upto frequency, j -> new frequency */ |
1214 |
int *p,*q; /* Use pointers to ints instead of arrays for |
1215 |
** faster proccess */ |
1216 |
|
1217 |
for(i = j = 0, p = q = posdata; i < frequency; i++, p++) |
1218 |
{ |
1219 |
if(GET_STRUCTURE(*p) & structure) |
1220 |
{ |
1221 |
if(p - q) |
1222 |
{ |
1223 |
*q = *p; |
1224 |
} |
1225 |
j++; |
1226 |
q++; |
1227 |
} |
1228 |
} |
1229 |
return j; /* return new frequency */ |
1230 |
} |
1231 |
|
1232 |
|
1233 |
|
1234 |
/* Finds a word and returns its corresponding file and rank information list. |
1235 |
** If not found, NULL is returned. |
1236 |
*/ |
1237 |
/* Jose Ruiz |
1238 |
** New implmentation based on Hashing for direct access. Faster!! |
1239 |
** Also solves stars. Faster!! It can even found "and", "or" |
1240 |
** when looking for "an*" or "o*" if they are not stop words |
1241 |
*/ |
1242 |
|
1243 |
#define MAX_POSDATA_STACK 256 |
1244 |
|
1245 |
RESULT_LIST *getfileinfo(SWISH * sw, char *word, IndexFILE * indexf, int metaID, int structure) |
1246 |
{ |
1247 |
int j, |
1248 |
x, |
1249 |
filenum, |
1250 |
frequency, |
1251 |
len, |
1252 |
curmetaID, |
1253 |
index_structure, |
1254 |
index_structfreq, |
1255 |
tmpval; |
1256 |
RESULT_LIST *l_rp, *l_rp2; |
1257 |
long wordID; |
1258 |
unsigned long nextposmetaname; |
1259 |
char *p; |
1260 |
int tfrequency = 0; |
1261 |
unsigned char *s, *buffer; |
1262 |
int sz_buffer; |
1263 |
unsigned char flag; |
1264 |
int stack_posdata[MAX_POSDATA_STACK]; /* stack buffer for posdata */ |
1265 |
int *posdata; |
1266 |
|
1267 |
x = j = filenum = frequency = len = curmetaID = index_structure = index_structfreq = 0; |
1268 |
nextposmetaname = 0L; |
1269 |
|
1270 |
|
1271 |
l_rp = l_rp2 = NULL; |
1272 |
|
1273 |
|
1274 |
/* First: Look for star at the end of the word */ |
1275 |
if ((p = strrchr(word, '*'))) |
1276 |
{ |
1277 |
if (p != word && *(p - 1) == '\\') /* Check for an escaped * */ |
1278 |
{ |
1279 |
p = NULL; /* If escaped it is not a wildcard */ |
1280 |
} |
1281 |
else |
1282 |
{ |
1283 |
/* Check if it is at the end of the word */ |
1284 |
if (p == (word + strlen(word) - 1)) |
1285 |
{ |
1286 |
word[strlen(word) - 1] = '\0'; |
1287 |
/* Remove the wildcard - p remains not NULL */ |
1288 |
} |
1289 |
else |
1290 |
{ |
1291 |
p = NULL; /* Not at the end - Ignore */ |
1292 |
} |
1293 |
} |
1294 |
} |
1295 |
|
1296 |
|
1297 |
|
1298 |
|
1299 |
DB_InitReadWords(sw, indexf->DB); |
1300 |
if (!p) /* No wildcard -> Direct hash search */ |
1301 |
{ |
1302 |
DB_ReadWordHash(sw, word, &wordID, indexf->DB); |
1303 |
|
1304 |
if(!wordID) |
1305 |
{ |
1306 |
DB_EndReadWords(sw, indexf->DB); |
1307 |
sw->lasterror = WORD_NOT_FOUND; |
1308 |
return NULL; |
1309 |
} |
1310 |
} |
1311 |
|
1312 |
else /* There is a star. So use the sequential approach */ |
1313 |
{ |
1314 |
char *resultword; |
1315 |
|
1316 |
if (*word == '*') |
1317 |
{ |
1318 |
sw->lasterror = UNIQUE_WILDCARD_NOT_ALLOWED_IN_WORD; |
1319 |
return NULL; |
1320 |
} |
1321 |
|
1322 |
|
1323 |
DB_ReadFirstWordInvertedIndex(sw, word, &resultword, &wordID, indexf->DB); |
1324 |
|
1325 |
if (!wordID) |
1326 |
{ |
1327 |
DB_EndReadWords(sw, indexf->DB); |
1328 |
sw->lasterror = WORD_NOT_FOUND; |
1329 |
return NULL; |
1330 |
} |
1331 |
efree(resultword); /* Do not need it */ |
1332 |
} |
1333 |
|
1334 |
|
1335 |
/* If code is here we have found the word !! */ |
1336 |
|
1337 |
do |
1338 |
{ |
1339 |
DB_ReadWordData(sw, wordID, &buffer, &sz_buffer, indexf->DB); |
1340 |
s = buffer; |
1341 |
|
1342 |
// buffer structure = <tfreq><metaID><delta to next meta> |
1343 |
|
1344 |
/* Get the data of the word */ |
1345 |
tfrequency = uncompress2(&s); /* tfrequency - number of files with this word */ |
1346 |
|
1347 |
|
1348 |
/* Now look for a correct Metaname */ |
1349 |
curmetaID = uncompress2(&s); |
1350 |
|
1351 |
while (curmetaID) |
1352 |
{ |
1353 |
nextposmetaname = UNPACKLONG2(s); |
1354 |
s += sizeof(long); |
1355 |
|
1356 |
|
1357 |
if (curmetaID >= metaID) |
1358 |
break; |
1359 |
|
1360 |
s = buffer + nextposmetaname; |
1361 |
if(nextposmetaname == (unsigned long)sz_buffer) |
1362 |
break; // if no more meta data |
1363 |
|
1364 |
curmetaID = uncompress2(&s); |
1365 |
} |
1366 |
|
1367 |
if (curmetaID == metaID) /* found a matching meta value */ |
1368 |
{ |
1369 |
filenum = 0; |
1370 |
do |
1371 |
{ |
1372 |
/* Read on all items */ |
1373 |
uncompress_location_values(&s,&flag,&tmpval,&frequency); |
1374 |
filenum += tmpval; |
1375 |
|
1376 |
/* stack_posdata is just to avoid calling emalloc */ |
1377 |
/* it should be enough for most cases */ |
1378 |
if(frequency > MAX_POSDATA_STACK) |
1379 |
posdata = (int *)emalloc(frequency * sizeof(int)); |
1380 |
else |
1381 |
posdata = stack_posdata; |
1382 |
|
1383 |
/* read positions */ |
1384 |
uncompress_location_positions(&s,flag,frequency,posdata); |
1385 |
|
1386 |
/* test structure and adjust frequency */ |
1387 |
frequency = test_structure(structure, frequency, posdata); |
1388 |
|
1389 |
/* Store -1 in rank - In this way, we can delay its computation */ |
1390 |
/* This stuff has been removed */ |
1391 |
|
1392 |
/* Store result */ |
1393 |
if(frequency) |
1394 |
{ |
1395 |
/* This is very useful if we sorted by other property */ |
1396 |
if(!l_rp) |
1397 |
l_rp = newResultsList(sw); |
1398 |
|
1399 |
// tfrequency = number of files with this word |
1400 |
// frequency = number of times this words is in this document for this metaID |
1401 |
|
1402 |
addtoresultlist(l_rp, filenum, -1, tfrequency, frequency, indexf, sw); |
1403 |
|
1404 |
/* Copy positions */ |
1405 |
memcpy((unsigned char *)l_rp->tail->posdata,(unsigned char *)posdata,frequency * sizeof(int)); |
1406 |
|
1407 |
// Temp fix |
1408 |
{ |
1409 |
struct RESULT *r1 = l_rp->tail; |
1410 |
r1->rank = r1->rank = getrank( sw, r1->frequency, r1->tfrequency, r1->posdata, r1->indexf, r1->filenum ); |
1411 |
} |
1412 |
} |
1413 |
if(posdata != stack_posdata) |
1414 |
efree(posdata); |
1415 |
|
1416 |
|
1417 |
} while ((unsigned long)(s - buffer) != nextposmetaname); |
1418 |
|
1419 |
|
1420 |
} |
1421 |
|
1422 |
efree(buffer); |
1423 |
|
1424 |
|
1425 |
if (!p) |
1426 |
break; /* direct access (no wild card) -> break */ |
1427 |
|
1428 |
else |
1429 |
{ |
1430 |
char *resultword; |
1431 |
|
1432 |
/* Jump to next word */ |
1433 |
/* No more data for this word but we |
1434 |
are in sequential search because of |
1435 |
the star (p is not null) */ |
1436 |
/* So, go for next word */ |
1437 |
DB_ReadNextWordInvertedIndex(sw, word, &resultword, &wordID, indexf->DB); |
1438 |
if (! wordID) |
1439 |
break; /* no more data */ |
1440 |
|
1441 |
efree(resultword); /* Do not need it (although might be useful for highlighting some day) */ |
1442 |
} |
1443 |
|
1444 |
} while(1); /* continue on in loop for wildcard search */ |
1445 |
|
1446 |
|
1447 |
|
1448 |
if (p) |
1449 |
{ |
1450 |
/* Finally, if we are in an sequential search merge all results */ |
1451 |
l_rp = mergeresulthashlist(sw, l_rp); |
1452 |
} |
1453 |
|
1454 |
DB_EndReadWords(sw, indexf->DB); |
1455 |
return l_rp; |
1456 |
} |
1457 |
|
1458 |
|
1459 |
/* |
1460 |
-- Rules checking |
1461 |
-- u_is... = user rules (and, or, ...) |
1462 |
-- is... = internal rules checking |
1463 |
*/ |
1464 |
|
1465 |
|
1466 |
/* Is a word a rule? |
1467 |
*/ |
1468 |
|
1469 |
int isrule(char *word) |
1470 |
{ |
1471 |
int i; |
1472 |
static char *ops[] = { /* internal ops... */ |
1473 |
AND_WORD, OR_WORD, NOT_WORD, PHRASE_WORD, AND_NOT_WORD, |
1474 |
NULL |
1475 |
}; |
1476 |
|
1477 |
for (i = 0; ops[i]; i++) |
1478 |
{ |
1479 |
if (!strcmp(word, ops[i])) |
1480 |
return 1; |
1481 |
} |
1482 |
return 0; |
1483 |
} |
1484 |
|
1485 |
int u_isrule(SWISH * sw, char *word) |
1486 |
{ |
1487 |
LOGICAL_OP *op; |
1488 |
|
1489 |
op = &(sw->SearchAlt->srch_op); |
1490 |
if (!strcmp(word, op->and) || !strcmp(word, op->or) || !strcmp(word, op->not)) |
1491 |
return 1; |
1492 |
else |
1493 |
return 0; |
1494 |
} |
1495 |
|
1496 |
int isnotrule(char *word) |
1497 |
{ |
1498 |
if (!strcmp(word, NOT_WORD)) |
1499 |
return 1; |
1500 |
else |
1501 |
return 0; |
1502 |
} |
1503 |
|
1504 |
int u_isnotrule(SWISH * sw, char *word) |
1505 |
{ |
1506 |
if (!strcmp(word, sw->SearchAlt->srch_op.not)) |
1507 |
return 1; |
1508 |
else |
1509 |
return 0; |
1510 |
} |
1511 |
|
1512 |
|
1513 |
/* Is a word a boolean rule? |
1514 |
*/ |
1515 |
|
1516 |
int isbooleanrule(char *word) |
1517 |
{ |
1518 |
if (!strcmp(word, AND_WORD) || !strcmp(word, OR_WORD) || !strcmp(word, PHRASE_WORD) || !strcmp(word, AND_NOT_WORD)) |
1519 |
return 1; |
1520 |
else |
1521 |
return 0; |
1522 |
} |
1523 |
|
1524 |
/* Is a word a unary rule? |
1525 |
*/ |
1526 |
|
1527 |
int isunaryrule(char *word) |
1528 |
{ |
1529 |
if (!strcmp(word, NOT_WORD)) |
1530 |
return 1; |
1531 |
else |
1532 |
return 0; |
1533 |
} |
1534 |
|
1535 |
/* Return the number for a rule. |
1536 |
*/ |
1537 |
|
1538 |
int getrulenum(char *word) |
1539 |
{ |
1540 |
if (!strcmp(word, AND_WORD)) |
1541 |
return AND_RULE; |
1542 |
else if (!strcmp(word, OR_WORD)) |
1543 |
return OR_RULE; |
1544 |
else if (!strcmp(word, NOT_WORD)) |
1545 |
return NOT_RULE; |
1546 |
else if (!strcmp(word, PHRASE_WORD)) |
1547 |
return PHRASE_RULE; |
1548 |
else if (!strcmp(word, AND_NOT_WORD)) |
1549 |
return AND_NOT_RULE; |
1550 |
return NO_RULE; |
1551 |
} |
1552 |
|
1553 |
|
1554 |
/* |
1555 |
-- Return selected RuleNumber for default rule. |
1556 |
-- defined via current Swish Search Boolean OP Settings for user. |
1557 |
*/ |
1558 |
|
1559 |
int u_SelectDefaultRulenum(SWISH * sw, char *word) |
1560 |
{ |
1561 |
if (!strcasecmp(word, sw->SearchAlt->srch_op.and)) |
1562 |
return AND_RULE; |
1563 |
else if (!strcasecmp(word, sw->SearchAlt->srch_op.or)) |
1564 |
return OR_RULE; |
1565 |
return NO_RULE; |
1566 |
} |
1567 |
|
1568 |
|
1569 |
|
1570 |
static void make_db_res_and_free(SWISH *sw,RESULT_LIST *l_res) { |
1571 |
struct DB_RESULTS tmp; |
1572 |
memset (&tmp,0,sizeof(struct DB_RESULTS)); |
1573 |
tmp.resultlist = l_res; |
1574 |
freeresultlist(sw,&tmp); |
1575 |
} |
1576 |
|
1577 |
|
1578 |
|
1579 |
/* Takes two lists of results from searches and ANDs them together. |
1580 |
** On input, both result lists r1 and r2 must be sorted by filenum |
1581 |
** On output, the new result list remains sorted |
1582 |
*/ |
1583 |
|
1584 |
RESULT_LIST *andresultlists(SWISH * sw, RESULT_LIST * l_r1, RESULT_LIST * l_r2, int andLevel) |
1585 |
{ |
1586 |
RESULT_LIST *new_results_list = NULL; |
1587 |
RESULT *r1; |
1588 |
RESULT *r2; |
1589 |
int res = 0; |
1590 |
|
1591 |
/* patch provided by Mukund Srinivasan */ |
1592 |
if (l_r1 == NULL || l_r2 == NULL) |
1593 |
{ |
1594 |
make_db_res_and_free(sw,l_r1); |
1595 |
make_db_res_and_free(sw,l_r2); |
1596 |
return NULL; |
1597 |
} |
1598 |
|
1599 |
if (andLevel < 1) |
1600 |
andLevel = 1; |
1601 |
|
1602 |
for (r1 = l_r1->head, r2 = l_r2->head; r1 && r2;) |
1603 |
{ |
1604 |
res = r1->filenum - r2->filenum; |
1605 |
if (!res) |
1606 |
{ |
1607 |
/* |
1608 |
* Computing the new rank is interesting because |
1609 |
* we want to weight each of the words that was |
1610 |
* previously ANDed equally along with the new word. |
1611 |
* We compute a running average using andLevel and |
1612 |
* simply scale up the old average (in r1->rank) |
1613 |
* and recompute a new, equally weighted average. |
1614 |
*/ |
1615 |
int newRank = 0; |
1616 |
|
1617 |
if(r1->rank == -1) |
1618 |
r1->rank = getrank( sw, r1->frequency, r1->tfrequency, r1->posdata, r1->indexf, r1->filenum ); |
1619 |
|
1620 |
if(r2->rank == -1) |
1621 |
r2->rank = getrank( sw, r2->frequency, r1->tfrequency, r2->posdata, r2->indexf, r2->filenum ); |
1622 |
|
1623 |
newRank = ((r1->rank * andLevel) + r2->rank) / (andLevel + 1); |
1624 |
|
1625 |
|
1626 |
if(!new_results_list) |
1627 |
new_results_list = newResultsList(sw); |
1628 |
|
1629 |
addtoresultlist(new_results_list, r1->filenum, newRank, 0, r1->frequency + r2->frequency, r1->indexf, sw); |
1630 |
|
1631 |
|
1632 |
/* Storing all positions could be useful in the future */ |
1633 |
|
1634 |
CopyPositions(new_results_list->tail->posdata, 0, r1->posdata, 0, r1->frequency); |
1635 |
CopyPositions(new_results_list->tail->posdata, r1->frequency, r2->posdata, 0, r2->frequency); |
1636 |
|
1637 |
|
1638 |
r1 = r1->next; |
1639 |
r2 = r2->next; |
1640 |
} |
1641 |
|
1642 |
else if (res > 0) |
1643 |
{ |
1644 |
r2 = r2->next; |
1645 |
} |
1646 |
else |
1647 |
{ |
1648 |
r1 = r1->next; |
1649 |
} |
1650 |
} |
1651 |
|
1652 |
return new_results_list; |
1653 |
} |
1654 |
|
1655 |
/* Takes two lists of results from searches and ORs them together. |
1656 |
2001-11 jmruiz Completely rewritten. Older one was really |
1657 |
slow when the lists are very long |
1658 |
On input, both result lists r1 and r2 must be sorted by filenum |
1659 |
On output, the new result list remains sorted |
1660 |
|
1661 |
rank is combined for matching files. That is, |
1662 |
"foo OR bar" will rank files with both higher. |
1663 |
|
1664 |
*/ |
1665 |
|
1666 |
|
1667 |
RESULT_LIST *orresultlists(SWISH * sw, RESULT_LIST * l_r1, RESULT_LIST * l_r2) |
1668 |
{ |
1669 |
int rc; |
1670 |
RESULT *r1; |
1671 |
RESULT *r2; |
1672 |
RESULT *rp, |
1673 |
*tmp; |
1674 |
RESULT_LIST *new_results_list = NULL; |
1675 |
|
1676 |
|
1677 |
/* If either list is empty, just return the other */ |
1678 |
if (l_r1 == NULL) |
1679 |
return l_r2; |
1680 |
|
1681 |
else if (l_r2 == NULL) |
1682 |
return l_r1; |
1683 |
|
1684 |
/* Look for files that have both words, and add up the ranks */ |
1685 |
|
1686 |
r1 = l_r1->head; |
1687 |
r2 = l_r2->head; |
1688 |
|
1689 |
while(r1 && r2) |
1690 |
{ |
1691 |
rc = r1->filenum - r2->filenum; |
1692 |
if(rc < 0) |
1693 |
{ |
1694 |
rp = r1; |
1695 |
r1 = r1->next; |
1696 |
} |
1697 |
else if(rc > 0) |
1698 |
{ |
1699 |
rp = r2; |
1700 |
r2 = r2->next; |
1701 |
} |
1702 |
|
1703 |
else /* Matching file number */ |
1704 |
{ |
1705 |
int result_size; |
1706 |
|
1707 |
/* Compute rank if not yet computed */ |
1708 |
if(r1->rank == -1) |
1709 |
r1->rank = getrank( sw, r1->frequency, r1->tfrequency, r1->posdata, r1->indexf, r1->filenum ); |
1710 |
|
1711 |
if(r2->rank == -1) |
1712 |
r2->rank = getrank( sw, r2->frequency, r2->tfrequency, r2->posdata, r2->indexf, r2->filenum ); |
1713 |
|
1714 |
|
1715 |
/* Create a new RESULT - Should be a function to creeate this, I'd think */ |
1716 |
|
1717 |
result_size = sizeof(RESULT) + ( (r1->frequency + r2->frequency - 1) * sizeof(int) ); |
1718 |
rp = (RESULT *) Mem_ZoneAlloc(sw->Search->resultSearchZone, result_size ); |
1719 |
memset( rp, 0, result_size ); |
1720 |
|
1721 |
|
1722 |
rp->fi.filenum = rp->filenum = r1->filenum; |
1723 |
|
1724 |
rp->rank = ( r1->rank + r2->rank) * 2; // bump up the or terms |
1725 |
rp->tfrequency = 0; |
1726 |
rp->frequency = r1->frequency + r2->frequency; |
1727 |
rp->indexf = r1->indexf; |
1728 |
|
1729 |
|
1730 |
/* save the combined position data in the new result. (Would freq ever be zero?) */ |
1731 |
if (r1->frequency) |
1732 |
CopyPositions(rp->posdata, 0, r1->posdata, 0, r1->frequency); |
1733 |
|
1734 |
if (r2->frequency) |
1735 |
CopyPositions(rp->posdata, r1->frequency, r2->posdata, 0, r2->frequency); |
1736 |
|
1737 |
r1 = r1->next; |
1738 |
r2 = r2->next; |
1739 |
} |
1740 |
|
1741 |
|
1742 |
/* Now add the result to the output list */ |
1743 |
|
1744 |
if(!new_results_list) |
1745 |
new_results_list = newResultsList(sw); |
1746 |
|
1747 |
addResultToList(new_results_list,rp); |
1748 |
} |
1749 |
|
1750 |
|
1751 |
/* Add the remaining results */ |
1752 |
|
1753 |
tmp = r1 ? r1 : r2; |
1754 |
|
1755 |
while(tmp) |
1756 |
{ |
1757 |
rp = tmp; |
1758 |
tmp = tmp->next; |
1759 |
if(!new_results_list) |
1760 |
new_results_list = newResultsList(sw); |
1761 |
|
1762 |
addResultToList(new_results_list,rp); |
1763 |
} |
1764 |
|
1765 |
return new_results_list; |
1766 |
} |
1767 |
|
1768 |
|
1769 |
/* 2001-10 jmruiz - This code was originally at merge.c |
1770 |
** Also made it thread safe |
1771 |
*/ |
1772 |
/* These three routines are only used by notresultlist */ |
1773 |
|
1774 |
struct markentry |
1775 |
{ |
1776 |
struct markentry *next; |
1777 |
int num; |
1778 |
}; |
1779 |
|
1780 |
/* This marks a number as having been printed. |
1781 |
*/ |
1782 |
|
1783 |
void marknum(SWISH *sw, struct markentry **markentrylist, int num) |
1784 |
{ |
1785 |
unsigned hashval; |
1786 |
struct markentry *mp; |
1787 |
|
1788 |
mp = (struct markentry *) Mem_ZoneAlloc(sw->Search->resultSearchZone, sizeof(struct markentry)); |
1789 |
|
1790 |
mp->num = num; |
1791 |
|
1792 |
hashval = bignumhash(num); |
1793 |
mp->next = markentrylist[hashval]; |
1794 |
markentrylist[hashval] = mp; |
1795 |
} |
1796 |
|
1797 |
|
1798 |
/* Has a number been printed? |
1799 |
*/ |
1800 |
|
1801 |
int ismarked(struct markentry **markentrylist, int num) |
1802 |
{ |
1803 |
unsigned hashval; |
1804 |
struct markentry *mp; |
1805 |
|
1806 |
hashval = bignumhash(num); |
1807 |
mp = markentrylist[hashval]; |
1808 |
|
1809 |
while (mp != NULL) |
1810 |
{ |
1811 |
if (mp->num == num) |
1812 |
return 1; |
1813 |
mp = mp->next; |
1814 |
} |
1815 |
return 0; |
1816 |
} |
1817 |
|
1818 |
/* Initialize the marking list. |
1819 |
*/ |
1820 |
|
1821 |
void initmarkentrylist(struct markentry **markentrylist) |
1822 |
{ |
1823 |
int i; |
1824 |
|
1825 |
for (i = 0; i < BIGHASHSIZE; i++) |
1826 |
markentrylist[i] = NULL; |
1827 |
} |
1828 |
|
1829 |
void freemarkentrylist(struct markentry **markentrylist) |
1830 |
{ |
1831 |
int i; |
1832 |
|
1833 |
for (i = 0; i < BIGHASHSIZE; i++) |
1834 |
{ |
1835 |
markentrylist[i] = NULL; |
1836 |
} |
1837 |
} |
1838 |
|
1839 |
/* This performs the NOT unary operation on a result list. |
1840 |
** NOTed files are marked with a default rank of 1000. |
1841 |
** |
1842 |
** Basically it returns all the files that have not been |
1843 |
** marked (GH) |
1844 |
*/ |
1845 |
|
1846 |
RESULT_LIST *notresultlist(SWISH * sw, RESULT_LIST * l_rp, IndexFILE * indexf) |
1847 |
{ |
1848 |
int i, |
1849 |
filenums; |
1850 |
RESULT *rp; |
1851 |
RESULT_LIST *new_results_list = NULL; |
1852 |
struct markentry *markentrylist[BIGHASHSIZE]; |
1853 |
|
1854 |
if(!l_rp) |
1855 |
rp = NULL; |
1856 |
else |
1857 |
rp = l_rp->head; |
1858 |
|
1859 |
initmarkentrylist(markentrylist); |
1860 |
while (rp != NULL) |
1861 |
{ |
1862 |
marknum(sw, markentrylist, rp->filenum); |
1863 |
rp = rp->next; |
1864 |
} |
1865 |
|
1866 |
filenums = indexf->header.totalfiles; |
1867 |
|
1868 |
for (i = 1; i <= filenums; i++) |
1869 |
{ |
1870 |
if (!ismarked(markentrylist, i)) |
1871 |
{ |
1872 |
if(!new_results_list) |
1873 |
new_results_list = newResultsList(sw); |
1874 |
addtoresultlist(new_results_list, i, 1000, 0, 0, indexf, sw); |
1875 |
} |
1876 |
} |
1877 |
|
1878 |
freemarkentrylist(markentrylist); |
1879 |
|
1880 |
new_results_list = sortresultsbyfilenum(new_results_list); |
1881 |
|
1882 |
return new_results_list; |
1883 |
} |
1884 |
|
1885 |
/* Phrase result routine - see distance parameter. For phrase search this |
1886 |
** value must be 1 (consecutive words) |
1887 |
** |
1888 |
** On input, both result lists r1 abd r2 must be sorted by filenum |
1889 |
** On output, the new result list remains sorted |
1890 |
*/ |
1891 |
RESULT_LIST *phraseresultlists(SWISH * sw, RESULT_LIST * l_r1, RESULT_LIST * l_r2, int distance) |
1892 |
{ |
1893 |
int i, |
1894 |
j, |
1895 |
found, |
1896 |
newRank, |
1897 |
*allpositions; |
1898 |
int res = 0; |
1899 |
RESULT_LIST *new_results_list = NULL; |
1900 |
RESULT *r1, *r2; |
1901 |
|
1902 |
if (l_r1 == NULL || l_r2 == NULL) |
1903 |
return NULL; |
1904 |
|
1905 |
for (r1 = l_r1->head, r2 = l_r2->head; r1 && r2;) |
1906 |
{ |
1907 |
res = r1->filenum - r2->filenum; |
1908 |
if (!res) |
1909 |
{ |
1910 |
found = 0; |
1911 |
allpositions = NULL; |
1912 |
for (i = 0; i < r1->frequency; i++) |
1913 |
{ |
1914 |
for (j = 0; j < r2->frequency; j++) |
1915 |
{ |
1916 |
if ((GET_POSITION(r1->posdata[i]) + distance) == GET_POSITION(r2->posdata[j])) |
1917 |
{ |
1918 |
found++; |
1919 |
if (allpositions) |
1920 |
allpositions = (int *) erealloc(allpositions, found * sizeof(int)); |
1921 |
|
1922 |
else |
1923 |
allpositions = (int *) emalloc(found * sizeof(int)); |
1924 |
|
1925 |
allpositions[found - 1] = r2->posdata[j]; |
1926 |
break; |
1927 |
} |
1928 |
} |
1929 |
} |
1930 |
if (found) |
1931 |
{ |
1932 |
/* Compute newrank */ |
1933 |
if(r1->rank == -1) |
1934 |
r1->rank = getrank( sw, r1->frequency, r1->tfrequency, r1->posdata, r1->indexf, r1->filenum ); |
1935 |
if(r2->rank == -1) |
1936 |
r2->rank = getrank( sw, r2->frequency, r1->tfrequency, r2->posdata, r2->indexf, r2->filenum ); |
1937 |
|
1938 |
newRank = (r1->rank + r2->rank) / 2; |
1939 |
/* |
1940 |
* Storing positions is neccesary for further |
1941 |
* operations |
1942 |
*/ |
1943 |
if(!new_results_list) |
1944 |
new_results_list = newResultsList(sw); |
1945 |
|
1946 |
addtoresultlist(new_results_list, r1->filenum, newRank, 0, found, r1->indexf, sw); |
1947 |
|
1948 |
CopyPositions(new_results_list->tail->posdata, 0, allpositions, 0, found); |
1949 |
efree(allpositions); |
1950 |
} |
1951 |
r1 = r1->next; |
1952 |
r2 = r2->next; |
1953 |
} |
1954 |
else if (res > 0) |
1955 |
{ |
1956 |
r2 = r2->next; |
1957 |
} |
1958 |
else |
1959 |
{ |
1960 |
r1 = r1->next; |
1961 |
} |
1962 |
|
1963 |
} |
1964 |
|
1965 |
return new_results_list; |
1966 |
} |
1967 |
|
1968 |
/* Adds a file number and rank to a list of results. |
1969 |
*/ |
1970 |
|
1971 |
|
1972 |
void addtoresultlist(RESULT_LIST * l_rp, int filenum, int rank, int tfrequency, int frequency, IndexFILE * indexf, SWISH * sw) |
1973 |
{ |
1974 |
RESULT *newnode; |
1975 |
int result_size; |
1976 |
|
1977 |
result_size = sizeof(RESULT) + ((frequency - 1) * sizeof(int)); |
1978 |
newnode = (RESULT *) Mem_ZoneAlloc(sw->Search->resultSearchZone, result_size ); |
1979 |
memset( newnode, 0, result_size ); |
1980 |
|
1981 |
newnode->fi.filenum = newnode->filenum = filenum; |
1982 |
|
1983 |
newnode->rank = rank; |
1984 |
newnode->tfrequency = tfrequency; |
1985 |
newnode->frequency = frequency; |
1986 |
newnode->indexf = indexf; |
1987 |
|
1988 |
addResultToList(l_rp, newnode); |
1989 |
} |
1990 |
|
1991 |
|
1992 |
|
1993 |
|
1994 |
RESULT *SwishNext(SWISH * sw) |
1995 |
{ |
1996 |
struct MOD_Search *srch = sw->Search; |
1997 |
RESULT *res = NULL; |
1998 |
RESULT *res2 = NULL; |
1999 |
int rc; |
2000 |
struct DB_RESULTS *db_results = NULL, |
2001 |
*db_results_winner = NULL; |
2002 |
int num; |
2003 |
|
2004 |
if (srch->bigrank) |
2005 |
num = 10000000 / srch->bigrank; |
2006 |
else |
2007 |
num = 10000; |
2008 |
|
2009 |
|
2010 |
/* Seems like we should error here if there are no results */ |
2011 |
if ( !sw->Search->db_results ) |
2012 |
{ |
2013 |
set_progerr(SWISH_LISTRESULTS_EOF, sw, "Attempted to read results before searching"); |
2014 |
return NULL; |
2015 |
} |
2016 |
|
2017 |
/* Check for a unique index file */ |
2018 |
if (!sw->Search->db_results->next) |
2019 |
{ |
2020 |
if ((res = sw->Search->db_results->currentresult)) |
2021 |
{ |
2022 |
/* Increase Pointer */ |
2023 |
sw->Search->db_results->currentresult = res->next; |
2024 |
/* If rank was delayed, compute it now */ |
2025 |
if(res->rank == -1) |
2026 |
res->rank = getrank( sw, res->frequency, res->tfrequency, res->posdata, res->indexf, res->filenum ); |
2027 |
|
2028 |
} |
2029 |
} |
2030 |
|
2031 |
|
2032 |
else /* tape merge to find the next one from all the index files */ |
2033 |
|
2034 |
{ |
2035 |
/* We have more than one index file - can't use pre-sorted index */ |
2036 |
/* Get the lower value */ |
2037 |
db_results_winner = sw->Search->db_results; |
2038 |
|
2039 |
if ((res = db_results_winner->currentresult)) |
2040 |
{ |
2041 |
/* If rank was delayed, compute it now */ |
2042 |
if(res->rank == -1) |
2043 |
res->rank = getrank( sw, res->frequency, res->tfrequency, res->posdata, res->indexf, res->filenum ); |
2044 |
|
2045 |
if ( !res->PropSort ) |
2046 |
res->PropSort = getResultSortProperties(sw, res); |
2047 |
} |
2048 |
|
2049 |
|
2050 |
for (db_results = sw->Search->db_results->next; db_results; db_results = db_results->next) |
2051 |
{ |
2052 |
/* Any more results for this index? */ |
2053 |
if (!(res2 = db_results->currentresult)) |
2054 |
continue; |
2055 |
|
2056 |
else |
2057 |
{ |
2058 |
/* If rank was delayed, compute it now */ |
2059 |
if(res2->rank == -1) |
2060 |
res2->rank = getrank( sw, res2->frequency, res2->tfrequency, res2->posdata, res2->indexf, res2->filenum ); |
2061 |
|
2062 |
if ( !res2->PropSort ) |
2063 |
res2->PropSort = getResultSortProperties(sw, res2); |
2064 |
} |
2065 |
|
2066 |
if (!res) |
2067 |
{ |
2068 |
res = res2; |
2069 |
db_results_winner = db_results; |
2070 |
continue; |
2071 |
} |
2072 |
|
2073 |
rc = (int) compResultsByNonSortedProps(&res, &res2); |
2074 |
if (rc < 0) |
2075 |
{ |
2076 |
res = res2; |
2077 |
db_results_winner = db_results; |
2078 |
} |
2079 |
} |
2080 |
|
2081 |
if ((res = db_results_winner->currentresult)) |
2082 |
db_results_winner->currentresult = res->next; |
2083 |
} |
2084 |
|
2085 |
|
2086 |
|
2087 |
/* Normalize rank */ |
2088 |
if (res) |
2089 |
{ |
2090 |
res->rank = (int) (res->rank * num)/10000; |
2091 |
if (res->rank >= 999) |
2092 |
res->rank = 1000; |
2093 |
else if (res->rank < 1) |
2094 |
res->rank = 1; |
2095 |
} |
2096 |
else |
2097 |
{ |
2098 |
// it's expected to just return null on end of list. |
2099 |
// sw->lasterror = SWISH_LISTRESULTS_EOF; |
2100 |
} |
2101 |
|
2102 |
|
2103 |
return res; |
2104 |
|
2105 |
} |
2106 |
|
2107 |
|
2108 |
|
2109 |
|
2110 |
|
2111 |
/* Checks if the next word is "=" |
2112 |
*/ |
2113 |
|
2114 |
int isMetaNameOpNext(struct swline *searchWord) |
2115 |
{ |
2116 |
if (searchWord == NULL) |
2117 |
return 0; |
2118 |
if (!strcmp(searchWord->line, "=")) |
2119 |
return 1; |
2120 |
return 0; |
2121 |
} |
2122 |
|
2123 |
/* funtion to free all memory of a list of results */ |
2124 |
void freeresultlist(SWISH * sw, struct DB_RESULTS *dbres) |
2125 |
{ |
2126 |
RESULT *rp; |
2127 |
RESULT *tmp; |
2128 |
|
2129 |
#ifdef DUMP_RESULTS |
2130 |
dump_result_lists( sw, "In freeresultlist()" ); |
2131 |
#endif |
2132 |
|
2133 |
if(dbres->resultlist) |
2134 |
rp = dbres->resultlist->head; |
2135 |
else |
2136 |
rp = NULL; |
2137 |
|
2138 |
while (rp) |
2139 |
{ |
2140 |
tmp = rp->next; |
2141 |
freeresult(sw, rp); |
2142 |
rp = tmp; |
2143 |
} |
2144 |
dbres->resultlist = NULL; |
2145 |
dbres->currentresult = NULL; |
2146 |
dbres->sortresultlist = NULL; |
2147 |
} |
2148 |
|
2149 |
/* funtion to free the memory of one result */ |
2150 |
void freeresult(SWISH * sw, RESULT * rp) |
2151 |
{ |
2152 |
int i; |
2153 |
|
2154 |
if (rp) |
2155 |
{ |
2156 |
freefileinfo( &rp->fi ); // may have already been freed |
2157 |
|
2158 |
if (sw->ResultSort->numPropertiesToSort && rp->PropSort) |
2159 |
{ |
2160 |
for (i = 0; i < sw->ResultSort->numPropertiesToSort; i++) |
2161 |
efree(rp->PropSort[i]); |
2162 |
|
2163 |
/* For better performance with thousands of results this is stored in a MemZone |
2164 |
in result_sort.c module |
2165 |
efree(rp->PropSort); |
2166 |
*/ |
2167 |
|
2168 |
} |
2169 |
/* For better performance with thousands of results this is stored in a MemZone |
2170 |
in result_sort.c module |
2171 |
|
2172 |
if (sw->ResultSort->numPropertiesToSort && rp->iPropSort) |
2173 |
{ |
2174 |
efree(rp->iPropSort); |
2175 |
} |
2176 |
*/ |
2177 |
/* For better performance store results in a MemZone |
2178 |
|
2179 |
efree(rp); |
2180 |
*/ |
2181 |
} |
2182 |
} |
2183 |
|
2184 |
|
2185 |
|
2186 |
/* |
2187 |
06/00 Jose Ruiz - Sort results by filenum |
2188 |
Uses an array and qsort for better performance |
2189 |
Used for faster "and" and "phrase" of results |
2190 |
*/ |
2191 |
RESULT_LIST *sortresultsbyfilenum(RESULT_LIST * l_rp) |
2192 |
{ |
2193 |
int i, |
2194 |
j; |
2195 |
RESULT **ptmp; |
2196 |
RESULT *rp; |
2197 |
|
2198 |
/* Very trivial case */ |
2199 |
if (!l_rp) |
2200 |
return NULL; |
2201 |
|
2202 |
|
2203 |
/* Compute results */ |
2204 |
for (i = 0, rp = l_rp->head; rp; rp = rp->next, i++); |
2205 |
/* Another very trivial case */ |
2206 |
if (i == 1) |
2207 |
return l_rp; |
2208 |
/* Compute array size */ |
2209 |
ptmp = (void *) emalloc(i * sizeof(RESULT *)); |
2210 |
/* Build an array with the elements to compare |
2211 |
and pointers to data */ |
2212 |
for (j = 0, rp = l_rp->head; rp; rp = rp->next) |
2213 |
ptmp[j++] = rp; |
2214 |
/* Sort them */ |
2215 |
swish_qsort(ptmp, i, sizeof(RESULT *), &compResultsByFileNum); |
2216 |
/* Build the list */ |
2217 |
for (j = 0, rp = NULL; j < i; j++) |
2218 |
{ |
2219 |
if (!rp) |
2220 |
l_rp->head = ptmp[j]; |
2221 |
else |
2222 |
rp->next = ptmp[j]; |
2223 |
rp = ptmp[j]; |
2224 |
} |
2225 |
rp->next = NULL; |
2226 |
l_rp->tail = rp; |
2227 |
|
2228 |
/* Free the memory of the array */ |
2229 |
efree(ptmp); |
2230 |
|
2231 |
return l_rp; |
2232 |
} |
2233 |
|
2234 |
|
2235 |
/* 06/00 Jose Ruiz |
2236 |
** returns all results in r1 that not contains r2 |
2237 |
** |
2238 |
** On input, both result lists r1 and r2 must be sorted by filenum |
2239 |
** On output, the new result list remains sorted |
2240 |
*/ |
2241 |
RESULT_LIST *notresultlists(SWISH * sw, RESULT_LIST * l_r1, RESULT_LIST * l_r2) |
2242 |
{ |
2243 |
RESULT *rp, *r1, *r2; |
2244 |
RESULT_LIST *new_results_list = NULL; |
2245 |
int res = 0; |
2246 |
|
2247 |
if (!l_r1) |
2248 |
return NULL; |
2249 |
if (l_r1 && !l_r2) |
2250 |
return l_r1; |
2251 |
|
2252 |
for (r1 = l_r1->head, r2 = l_r2->head; r1 && r2;) |
2253 |
{ |
2254 |
res = r1->filenum - r2->filenum; |
2255 |
if (res < 0) |
2256 |
{ |
2257 |
/* |
2258 |
* Storing all positions could be useful |
2259 |
* in the future |
2260 |
*/ |
2261 |
|
2262 |
rp = r1; |
2263 |
r1 = r1->next; |
2264 |
if(!new_results_list) |
2265 |
new_results_list = newResultsList(sw); |
2266 |
addResultToList(new_results_list,rp); |
2267 |
} |
2268 |
else if (res > 0) |
2269 |
{ |
2270 |
r2 = r2->next; |
2271 |
} |
2272 |
else |
2273 |
{ |
2274 |
r1 = r1->next; |
2275 |
r2 = r2->next; |
2276 |
} |
2277 |
} |
2278 |
/* Add remaining results */ |
2279 |
while (r1) |
2280 |
{ |
2281 |
rp = r1; |
2282 |
r1 = r1->next; |
2283 |
if(!new_results_list) |
2284 |
new_results_list = newResultsList(sw); |
2285 |
addResultToList(new_results_list,rp); |
2286 |
} |
2287 |
|
2288 |
return new_results_list; |
2289 |
} |
2290 |
|
2291 |
|
2292 |
/****************************************************************************** |
2293 |
* Remove the stop words from the tokenized query |
2294 |
* rewritten Nov 24, 2001 - moseley |
2295 |
* Still horrible! Need a real parse tree. |
2296 |
*******************************************************************************/ |
2297 |
|
2298 |
|
2299 |
struct swline *ignore_words_in_query(SWISH * sw, IndexFILE * indexf, struct swline *searchwordlist, unsigned char phrase_delimiter) |
2300 |
{ |
2301 |
struct swline *cur_token = searchwordlist; |
2302 |
struct swline *prev_token = NULL; |
2303 |
struct swline *prev_prev_token = NULL; // for removing two things |
2304 |
int in_phrase = 0; |
2305 |
int word_count = 0; /* number of search words found */ |
2306 |
int paren_count = 0; |
2307 |
struct MOD_Search *srch = sw->Search; |
2308 |
|
2309 |
srch->removed_stopwords = 0; |
2310 |
|
2311 |
|
2312 |
|
2313 |
while ( cur_token ) |
2314 |
{ |
2315 |
int remove = 0; |
2316 |
char first_char = cur_token->line[0]; |
2317 |
|
2318 |
if ( cur_token == searchwordlist ) |
2319 |
{ |
2320 |
prev_token = prev_prev_token = NULL; |
2321 |
word_count = 0; |
2322 |
paren_count = 0; |
2323 |
in_phrase = 0; |
2324 |
} |
2325 |
|
2326 |
while ( 1 ) // so we can use break. |
2327 |
{ |
2328 |
|
2329 |
/* Can't backslash here -- (because this code should really be include in swish_words.c) */ |
2330 |
|
2331 |
if ( first_char == phrase_delimiter ) |
2332 |
{ |
2333 |
in_phrase = !in_phrase; |
2334 |
|
2335 |
if ( !in_phrase && prev_token && prev_token->line[0] == phrase_delimiter ) |
2336 |
remove = 2; |
2337 |
|
2338 |
break; |
2339 |
} |
2340 |
|
2341 |
|
2342 |
/* leave everything alone inside a pharse */ |
2343 |
|
2344 |
if ( in_phrase ) |
2345 |
{ |
2346 |
if (isstopword(&indexf->header, cur_token->line)) |
2347 |
{ |
2348 |
srch->removed_stopwords++; |
2349 |
resultHeaderOut(sw, 1, "# Removed stopword: %s\n", cur_token->line); |
2350 |
remove = 1; |
2351 |
} |
2352 |
else |
2353 |
word_count++; |
2354 |
|
2355 |
break; |
2356 |
} |
2357 |
|
2358 |
|
2359 |
/* Allow operators */ |
2360 |
|
2361 |
if ( first_char == '=' ) |
2362 |
break; |
2363 |
|
2364 |
if ( first_char == '(' ) |
2365 |
{ |
2366 |
paren_count++; |
2367 |
break; |
2368 |
} |
2369 |
|
2370 |
if ( first_char == ')' ) |
2371 |
{ |
2372 |
paren_count--; |
2373 |
|
2374 |
if ( prev_token && prev_token->line[0] == '(' ) |
2375 |
remove = 2; |
2376 |
|
2377 |
break; |
2378 |
} |
2379 |
|
2380 |
|
2381 |
|
2382 |
/* Allow all metanames */ |
2383 |
|
2384 |
if ( isMetaNameOpNext(cur_token->next) ) |
2385 |
break; |
2386 |
|
2387 |
|
2388 |
|
2389 |
/* Look for AND OR NOT - remove AND OR at start, and remove second of doubles */ |
2390 |
|
2391 |
if ( u_isrule(sw, cur_token->line) ) |
2392 |
{ |
2393 |
if ( prev_token ) |
2394 |
{ |
2395 |
/* remove double tokens */ |
2396 |
if ( u_isrule( sw, prev_token->line ) ) |
2397 |
remove = 1; |
2398 |
} |
2399 |
/* allow NOT at the start */ |
2400 |
else if ( !u_isnotrule(sw, cur_token->line) ) |
2401 |
remove = 1; |
2402 |
|
2403 |
break; |
2404 |
} |
2405 |
|
2406 |
|
2407 |
|
2408 |
/* Finally, is it a stop word? */ |
2409 |
|
2410 |
if ( isstopword(&indexf->header, cur_token->line) ) |
2411 |
{ |
2412 |
srch->removed_stopwords++; |
2413 |
resultHeaderOut(sw, 1, "# Removed stopword: %s\n", cur_token->line); |
2414 |
remove = 1; |
2415 |
} |
2416 |
else |
2417 |
word_count++; |
2418 |
|
2419 |
|
2420 |
|
2421 |
break; |
2422 |
} |
2423 |
|
2424 |
|
2425 |
/* Catch dangling metanames */ |
2426 |
if ( !remove && !cur_token->next && isMetaNameOpNext( cur_token ) ) |
2427 |
remove = 2; |
2428 |
|
2429 |
|
2430 |
if ( remove ) |
2431 |
{ |
2432 |
struct swline *tmp = cur_token; |
2433 |
|
2434 |
if ( cur_token == searchwordlist ) // we are removing first token |
2435 |
searchwordlist = cur_token->next; |
2436 |
else |
2437 |
{ |
2438 |
prev_token->next = cur_token->next; // remove one in the middle |
2439 |
cur_token = prev_token; // save if remove == 2 |
2440 |
} |
2441 |
|
2442 |
|
2443 |
efree( tmp->line ); |
2444 |
efree( tmp ); |
2445 |
|
2446 |
if ( remove == 2 ) |
2447 |
{ |
2448 |
tmp = cur_token; |
2449 |
|
2450 |
if ( cur_token == searchwordlist ) // we are removing first token |
2451 |
searchwordlist = cur_token->next; |
2452 |
else |
2453 |
prev_prev_token->next = cur_token->next; // remove one in the middle |
2454 |
|
2455 |
efree( tmp->line ); |
2456 |
efree( tmp ); |
2457 |
} |
2458 |
|
2459 |
|
2460 |
/* start at the beginning again */ |
2461 |
|
2462 |
cur_token = searchwordlist; |
2463 |
continue; |
2464 |
} |
2465 |
|
2466 |
if ( prev_token ) |
2467 |
prev_prev_token = prev_token; |
2468 |
|
2469 |
prev_token = cur_token; |
2470 |
cur_token = cur_token->next; |
2471 |
} |
2472 |
|
2473 |
|
2474 |
if ( in_phrase || paren_count ) |
2475 |
sw->lasterror = QUERY_SYNTAX_ERROR; |
2476 |
|
2477 |
else if ( !word_count ) |
2478 |
sw->lasterror = srch->removed_stopwords ? WORDS_TOO_COMMON : NO_WORDS_IN_SEARCH; |
2479 |
|
2480 |
return searchwordlist; |
2481 |
|
2482 |
} |
2483 |
|
2484 |
|
2485 |
|
2486 |
/* Adds a file number to a hash table of results. |
2487 |
** If the entry's already there, add the ranks, |
2488 |
** else make a new entry. |
2489 |
** |
2490 |
** Jose Ruiz 04/00 |
2491 |
** For better performance in large "or" |
2492 |
** keep the lists sorted by filenum |
2493 |
** |
2494 |
** Jose Ruiz 2001/11 Rewritten to get better performance |
2495 |
*/ |
2496 |
RESULT_LIST *mergeresulthashlist(SWISH *sw, RESULT_LIST *l_r) |
2497 |
{ |
2498 |
unsigned hashval; |
2499 |
RESULT *r, |
2500 |
*rp, |
2501 |
*tmp, |
2502 |
*next, |
2503 |
*start, |
2504 |
*newnode = NULL; |
2505 |
RESULT_LIST *new_results_list = NULL; |
2506 |
int i, |
2507 |
tot_frequency, |
2508 |
pos_off, |
2509 |
filenum; |
2510 |
|
2511 |
if(!l_r) |
2512 |
return NULL; |
2513 |
|
2514 |
if(!l_r->head) |
2515 |
return NULL; |
2516 |
|
2517 |
/* Init hash table */ |
2518 |
for (i = 0; i < BIGHASHSIZE; i++) |
2519 |
sw->Search->resulthashlist[i] = NULL; |
2520 |
|
2521 |
for(r = l_r->head, next = NULL; r; r =next) |
2522 |
{ |
2523 |
next = r->next; |
2524 |
|
2525 |
tmp = NULL; |
2526 |
hashval = bignumhash(r->filenum); |
2527 |
|
2528 |
rp = sw->Search->resulthashlist[hashval]; |
2529 |
for(tmp = NULL; rp; ) |
2530 |
{ |
2531 |
if (r->filenum <= rp->filenum) |
2532 |
{ |
2533 |
break; |
2534 |
} |
2535 |
tmp = rp; |
2536 |
rp = rp->next; |
2537 |
} |
2538 |
if (tmp) |
2539 |
{ |
2540 |
tmp->next = r; |
2541 |
} |
2542 |
else |
2543 |
{ |
2544 |
sw->Search->resulthashlist[hashval] = r; |
2545 |
} |
2546 |
r->next = rp; |
2547 |
} |
2548 |
|
2549 |
/* Now coalesce reptitive filenums */ |
2550 |
for (i = 0; i < BIGHASHSIZE; i++) |
2551 |
{ |
2552 |
rp = sw->Search->resulthashlist[i]; |
2553 |
for (filenum = 0, start = NULL; ; ) |
2554 |
{ |
2555 |
if(rp) |
2556 |
next = rp->next; |
2557 |
if(!rp || rp->filenum != filenum) |
2558 |
{ |
2559 |
/* Start of new block, coalesce previous results */ |
2560 |
if(filenum) |
2561 |
{ |
2562 |
int result_size; |
2563 |
|
2564 |
for(tmp = start, tot_frequency = 0; tmp!=rp; tmp = tmp->next) |
2565 |
{ |
2566 |
tot_frequency += tmp->frequency; |
2567 |
} |
2568 |
|
2569 |
result_size = sizeof(RESULT) + ((tot_frequency - 1) * sizeof(int)); |
2570 |
newnode = (RESULT *) Mem_ZoneAlloc(sw->Search->resultSearchZone, result_size ); |
2571 |
memset( newnode, 0, result_size ); |
2572 |
|
2573 |
newnode->fi.filenum = newnode->filenum = filenum; |
2574 |
newnode->rank = 0; |
2575 |
newnode->tfrequency = 0; |
2576 |
newnode->frequency = tot_frequency; |
2577 |
newnode->indexf = start->indexf; |
2578 |
|
2579 |
for(tmp = start, pos_off = 0; tmp!=rp; tmp = tmp->next) |
2580 |
{ |
2581 |
/* Compute rank if not yet computed */ |
2582 |
if(tmp->rank == -1) |
2583 |
tmp->rank = getrank( sw, tmp->frequency, tmp->tfrequency, tmp->posdata, tmp->indexf, tmp->filenum ); |
2584 |
|
2585 |
newnode->rank += tmp->rank; |
2586 |
if (tmp->frequency) |
2587 |
{ |
2588 |
CopyPositions(newnode->posdata, pos_off, tmp->posdata, 0, tmp->frequency); |
2589 |
pos_off += tmp->frequency; |
2590 |
} |
2591 |
|
2592 |
} |
2593 |
/* Add at the end of new_results_list */ |
2594 |
if(!new_results_list) |
2595 |
{ |
2596 |
new_results_list = newResultsList(sw); |
2597 |
} |
2598 |
addResultToList(new_results_list,newnode); |
2599 |
/* Sort positions */ |
2600 |
swish_qsort(newnode->posdata,newnode->frequency,sizeof(int),&icomp_posdata); |
2601 |
} |
2602 |
if(rp) |
2603 |
filenum = rp->filenum; |
2604 |
start = rp; |
2605 |
} |
2606 |
if(!rp) |
2607 |
break; |
2608 |
rp = next; |
2609 |
} |
2610 |
} |
2611 |
/* Sort results by filenum and return */ |
2612 |
return sortresultsbyfilenum(new_results_list); |
2613 |
} |
2614 |
|