1 |
/* |
2 |
$Id: proplimit.c,v 1.32 2002/05/26 15:38:02 whmoseley Exp $ |
3 |
** |
4 |
** This program and library is free software; you can redistribute it and/or |
5 |
** modify it under the terms of the GNU (Library) General Public License |
6 |
** as published by the Free Software Foundation; either version 2 |
7 |
** of the License, or any later version. |
8 |
** |
9 |
** This program is distributed in the hope that it will be useful, |
10 |
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
** GNU (Library) General Public License for more details. |
13 |
** |
14 |
** You should have received a copy of the GNU (Library) General Public License |
15 |
** along with this program; if not, write to the Free Software |
16 |
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 |
** |
18 |
** module to limit within a range of properties |
19 |
** Created June 10, 2001 - moseley |
20 |
** |
21 |
*/ |
22 |
|
23 |
#include "swish.h" |
24 |
#include "string.h" |
25 |
#include "mem.h" |
26 |
#include "merge.h" // why is this needed for docprop.h??? |
27 |
#include "docprop.h" |
28 |
#include "index.h" |
29 |
#include "metanames.h" |
30 |
#include "compress.h" |
31 |
#include "error.h" |
32 |
#include "db.h" |
33 |
#include "result_sort.h" |
34 |
#include "swish_qsort.h" |
35 |
#include "proplimit.h" |
36 |
|
37 |
|
38 |
// #define DEBUGLIMIT |
39 |
|
40 |
/*==================== These should be in other modules ================*/ |
41 |
|
42 |
/* Should be in docprop.c */ |
43 |
|
44 |
/******************************************************************* |
45 |
* Fetch a doc's properties by file number and metaID |
46 |
* |
47 |
* Call with: |
48 |
* *sw |
49 |
* *indexf |
50 |
* filenum |
51 |
* metaID |
52 |
* |
53 |
* Returns: |
54 |
* pointer to a docPropertyEntry or NULL if not found |
55 |
* |
56 |
********************************************************************/ |
57 |
|
58 |
static propEntry *GetPropertyByFile( SWISH *sw, IndexFILE *indexf, int filenum, int metaID ) |
59 |
{ |
60 |
propEntry *d; |
61 |
FileRec fi; |
62 |
memset(&fi, 0, sizeof( FileRec )); |
63 |
fi.filenum = filenum; |
64 |
|
65 |
|
66 |
d = ReadSingleDocPropertiesFromDisk(sw, indexf, &fi, metaID, MAX_SORT_STRING_LEN ); |
67 |
freefileinfo(&fi); |
68 |
|
69 |
return d; |
70 |
} |
71 |
|
72 |
#ifdef DEBUGLIMIT |
73 |
static void printdocprop( propEntry *d ) |
74 |
{ |
75 |
char str[1000]; |
76 |
int j; |
77 |
|
78 |
for (j=0; j < d->propLen; j++) |
79 |
str[j] = (d->propValue)[j]; |
80 |
|
81 |
str[ d->propLen ] = '\0'; |
82 |
|
83 |
printf("%s (%d)", str, d->propLen ); |
84 |
} |
85 |
|
86 |
static void printfileprop( SWISH *sw, IndexFILE *indexf, int filenum, int metaID ) |
87 |
{ |
88 |
propEntry *d; |
89 |
|
90 |
if ( (d = GetPropertyByFile( sw, indexf, filenum,metaID ))) |
91 |
printdocprop( d ); |
92 |
else |
93 |
printf("File %d does not have a property for metaID %d", filenum, metaID ); |
94 |
|
95 |
freeProperty( d ); |
96 |
} |
97 |
#endif |
98 |
|
99 |
|
100 |
|
101 |
|
102 |
/*==============================================================*/ |
103 |
/* typedefs and structures */ |
104 |
/*==============================================================*/ |
105 |
|
106 |
/* This is used to for inverting the metaEntry->sorted_data array */ |
107 |
typedef struct LOOKUP_TABLE |
108 |
{ |
109 |
int filenum; |
110 |
unsigned long sort; |
111 |
} LOOKUP_TABLE; |
112 |
|
113 |
|
114 |
/* This is the list of parameters supplied with the query */ |
115 |
typedef struct PARAMS |
116 |
{ |
117 |
struct PARAMS *next; |
118 |
unsigned char *propname; |
119 |
unsigned char *lowrange; |
120 |
unsigned char *highrange; |
121 |
} PARAMS; |
122 |
|
123 |
|
124 |
|
125 |
|
126 |
struct MOD_PropLimit |
127 |
{ |
128 |
PARAMS *params; /* parameter */ |
129 |
}; |
130 |
|
131 |
|
132 |
|
133 |
/*==============================================================*/ |
134 |
/* Code */ |
135 |
/*==============================================================*/ |
136 |
|
137 |
void initModule_PropLimit (SWISH *sw) |
138 |
{ |
139 |
/* local data */ |
140 |
struct MOD_PropLimit *self; |
141 |
self =(struct MOD_PropLimit *) emalloc(sizeof(struct MOD_PropLimit)); |
142 |
sw->PropLimit = self; |
143 |
|
144 |
self->params = NULL; |
145 |
} |
146 |
|
147 |
void ClearLimitParameter (SWISH *sw) |
148 |
{ |
149 |
struct MOD_PropLimit *self = sw->PropLimit; |
150 |
PARAMS *tmp; |
151 |
|
152 |
while ( self->params ) { |
153 |
efree( self->params->propname ); |
154 |
efree( self->params->lowrange ); |
155 |
efree( self->params->highrange ); |
156 |
tmp = (PARAMS *)self->params->next; |
157 |
efree( self->params ); |
158 |
self->params = tmp; |
159 |
} |
160 |
} |
161 |
|
162 |
|
163 |
|
164 |
void freeModule_PropLimit (SWISH *sw) |
165 |
{ |
166 |
ClearLimitParameter( sw ); |
167 |
efree( sw->PropLimit ); |
168 |
sw->PropLimit = NULL; |
169 |
|
170 |
} |
171 |
|
172 |
|
173 |
/******************************************************************* |
174 |
* Stores strings away for later processing |
175 |
* called from someplace? |
176 |
* |
177 |
* Call with: |
178 |
* Three strings, first must be metaname. |
179 |
* |
180 |
* Returns: |
181 |
* returns false (0) on failure |
182 |
* pointer to a PARAMS |
183 |
* errors do not return (doesn't do many checks) |
184 |
* |
185 |
* ToDo: |
186 |
* Error checking, and maybe pass in a StringList |
187 |
* |
188 |
********************************************************************/ |
189 |
int SetLimitParameter(SWISH *sw, char *propertyname, char *low, char *hi) |
190 |
{ |
191 |
PARAMS *newparam; |
192 |
PARAMS *params; |
193 |
struct MOD_PropLimit *self = sw->PropLimit; |
194 |
|
195 |
|
196 |
/* Currently, can only limit by one property -- so check that one hasn't already been used */ |
197 |
for ( params = self->params; params && (strcmp( (char *)params->propname, propertyname ) != 0); params = (PARAMS *)params->next); |
198 |
if ( params ) |
199 |
{ |
200 |
set_progerr( PROP_LIMIT_ERROR, sw, "Property '%s' is already limited", propertyname ); |
201 |
return 0; |
202 |
} |
203 |
|
204 |
|
205 |
|
206 |
newparam = emalloc( sizeof( PARAMS ) ); |
207 |
|
208 |
newparam->propname = (unsigned char *)estrdup( propertyname ); |
209 |
newparam->lowrange = (unsigned char *)estrdup( low ); |
210 |
newparam->highrange = (unsigned char *)estrdup( hi ); |
211 |
|
212 |
params = self->params; |
213 |
|
214 |
/* put at head of list */ |
215 |
self->params = newparam; |
216 |
newparam->next = (struct PARAMS *)params; |
217 |
|
218 |
return 1; |
219 |
|
220 |
} |
221 |
|
222 |
|
223 |
|
224 |
/******************************************************************* |
225 |
* This compares the user supplied value with a file's property |
226 |
* The file's property is looked up and then Compare_Properties is called |
227 |
* |
228 |
* Call with: |
229 |
* *SWISH |
230 |
* *indexf |
231 |
* *propEntry key - compare key |
232 |
* *LOOKUP_TABLE - element containing file number |
233 |
* |
234 |
* Returns: |
235 |
* |
236 |
********************************************************************/ |
237 |
static int test_prop( SWISH *sw, IndexFILE *indexf, struct metaEntry *meta_entry, propEntry *key, LOOKUP_TABLE *sort_array) |
238 |
{ |
239 |
propEntry *fileprop; |
240 |
int cmp_value; |
241 |
|
242 |
#ifdef DEBUGLIMIT |
243 |
{ |
244 |
char *p = DecodeDocProperty( meta_entry, key ); |
245 |
printf("test_prop comparing '%s' cmp '%s' with ", meta_entry->metaName, p); |
246 |
efree( p ); |
247 |
} |
248 |
#endif |
249 |
|
250 |
|
251 |
|
252 |
if ( !(fileprop = GetPropertyByFile( sw, indexf, sort_array->filenum, meta_entry->metaID )) ) |
253 |
{ |
254 |
#ifdef DEBUGLIMIT |
255 |
printf("(no prop found for filenum %d) - return +1\n", sort_array->filenum ); |
256 |
#endif |
257 |
|
258 |
/* No property found, assume it's very, very, small */ |
259 |
return +1; |
260 |
} |
261 |
|
262 |
#ifdef DEBUGLIMIT |
263 |
{ |
264 |
char *p = DecodeDocProperty( meta_entry, fileprop ); |
265 |
int i = Compare_Properties( meta_entry, key, fileprop ); |
266 |
printf("'%s' returning %d\n", p, i ); |
267 |
efree( p ); |
268 |
} |
269 |
#endif |
270 |
|
271 |
|
272 |
cmp_value = Compare_Properties( meta_entry, key, fileprop ); |
273 |
freeProperty( fileprop ); |
274 |
return cmp_value; |
275 |
|
276 |
} |
277 |
|
278 |
|
279 |
|
280 |
|
281 |
/************************************************************************ |
282 |
* Adapted from: msdn, I believe... |
283 |
* |
284 |
* Call with: |
285 |
* See below |
286 |
* |
287 |
* Returns: |
288 |
* Exact match, true (but could be more than one match location |
289 |
* Between two, returns false and the lower position |
290 |
* Below list, returns false and -1 |
291 |
* Above list, return false and numelements (one past end of array) |
292 |
* |
293 |
* ToDo: |
294 |
* Check for out of bounds on entry as that may be reasonably common |
295 |
* |
296 |
***************************************************************************/ |
297 |
|
298 |
static int binary_search( |
299 |
SWISH *sw, // needed to lookup a file entry |
300 |
IndexFILE *indexf, // |
301 |
LOOKUP_TABLE *sort_array, // table to search through |
302 |
int numelements, // size of table |
303 |
propEntry *key, // property to compare against |
304 |
struct metaEntry *meta_entry, // associated meta entry (for metaType) |
305 |
int *result, // result is stored here |
306 |
int direction, // looking up (positive) looking down (negative) |
307 |
int *exact_match) // last exact match found |
308 |
{ |
309 |
int low = 0; |
310 |
int high = numelements - 1; |
311 |
int num = numelements; |
312 |
int mid; |
313 |
int cmp; |
314 |
unsigned int half; |
315 |
|
316 |
*exact_match = -1; |
317 |
|
318 |
#ifdef DEBUGLIMIT |
319 |
printf("\nbinary_search looking for %s entry\n", ( direction > 0 ? "high" : "low" ) ); |
320 |
#endif |
321 |
|
322 |
while ( low <= high ) |
323 |
{ |
324 |
if ( (half = num / 2) ) |
325 |
{ |
326 |
mid = low + (num & 1 ? half : half - 1); |
327 |
|
328 |
|
329 |
if ( (cmp = test_prop( sw, indexf, meta_entry, key, &sort_array[mid] )) == 0 ) |
330 |
{ |
331 |
*exact_match = mid; // exact match |
332 |
cmp = direction; // but still look for the lowest/highest exact match. |
333 |
} |
334 |
|
335 |
if ( cmp < 0 ) |
336 |
{ |
337 |
high = mid - 1; |
338 |
num = (num & 1 ? half : half - 1); |
339 |
} |
340 |
|
341 |
else // cmp > 0 |
342 |
{ |
343 |
low = mid + 1; |
344 |
num = half; |
345 |
} |
346 |
} |
347 |
else if (num) |
348 |
{ |
349 |
if( (cmp = test_prop( sw, indexf, meta_entry, key, &sort_array[low] )) ==0) |
350 |
{ |
351 |
*result = low; |
352 |
return 1; |
353 |
} |
354 |
if ( cmp < 0 ) // this breaks need another compare |
355 |
{ |
356 |
/* less than current, but is is greater */ |
357 |
if ( low > 0 && (test_prop( sw, indexf, meta_entry, key, &sort_array[low-1] ) < 0)) |
358 |
*result = low - 1; |
359 |
else |
360 |
*result = low; |
361 |
return 0; |
362 |
} |
363 |
else |
364 |
{ |
365 |
*result = low + 1; |
366 |
return 0; |
367 |
} |
368 |
} |
369 |
else // if !num |
370 |
{ |
371 |
/* I can't think of a case for this to match?? */ |
372 |
progwarn("Binary Sort issue - please report to swish-e list"); |
373 |
*result = -1; |
374 |
return 0; |
375 |
} |
376 |
} |
377 |
*result = low; // was high, but wasn't returning expected results |
378 |
return 0; |
379 |
} |
380 |
|
381 |
|
382 |
/******************************************************************* |
383 |
* This takes a *sort_array and the low/hi range of limits and marks |
384 |
* which files are in that range |
385 |
* |
386 |
* Call with: |
387 |
* pointer to SWISH |
388 |
* pointer to the IndexFile |
389 |
* pointer to the LOOKUP_TABLE |
390 |
* *metaEntry |
391 |
* PARAMS (low/hi range) |
392 |
* |
393 |
* Returns: |
394 |
* true if any in range, otherwise false |
395 |
* |
396 |
********************************************************************/ |
397 |
static int find_prop(SWISH *sw, IndexFILE *indexf, LOOKUP_TABLE *sort_array, int num, struct metaEntry *meta_entry ) |
398 |
{ |
399 |
int low, high, j; |
400 |
int foundLo, foundHi; |
401 |
int some_selected = 0; |
402 |
int exact_match; |
403 |
|
404 |
|
405 |
if ( !meta_entry->loPropRange ) |
406 |
{ |
407 |
foundLo = 1; /* signal exact match */ |
408 |
low = 0; /* and start at beginning */ |
409 |
} |
410 |
else |
411 |
{ |
412 |
foundLo = binary_search(sw, indexf, sort_array, num, meta_entry->loPropRange, meta_entry, &low, -1, &exact_match); |
413 |
|
414 |
if ( !foundLo && exact_match >= 0 ) |
415 |
{ |
416 |
low = exact_match; |
417 |
foundLo = 1; /* mark as an exact match */ |
418 |
} |
419 |
} |
420 |
|
421 |
|
422 |
|
423 |
if ( !meta_entry->hiPropRange ) |
424 |
{ |
425 |
foundHi = 1; /* signal exact match */ |
426 |
high = num -1; /* and end very end */ |
427 |
} |
428 |
else |
429 |
{ |
430 |
foundHi = binary_search(sw, indexf, sort_array, num, meta_entry->hiPropRange, meta_entry, &high, +1, &exact_match); |
431 |
|
432 |
if ( !foundHi && exact_match >= 0 ) |
433 |
{ |
434 |
high = exact_match; |
435 |
foundHi = 1; |
436 |
} |
437 |
} |
438 |
|
439 |
#ifdef DEBUGLIMIT |
440 |
printf("Returned range %d - %d (exact: %d %d) cnt: %u\n", low, high, foundLo, foundHi, num ); |
441 |
#endif |
442 |
|
443 |
/* both inbetween two adjacent entries */ |
444 |
if ( !foundLo && !foundHi && low == high ) |
445 |
{ |
446 |
for ( j = 0; j < num; j++ ) |
447 |
sort_array[j].sort = 0; |
448 |
|
449 |
return 0; |
450 |
} |
451 |
|
452 |
|
453 |
/* now, if not an exact match for the high range, decrease high by one |
454 |
* because high is pointing to the *next* higher element, which is TOO high |
455 |
*/ |
456 |
|
457 |
if ( !foundHi && low < high ) |
458 |
high--; |
459 |
|
460 |
|
461 |
for ( j = 0; j < num; j++ ) |
462 |
{ |
463 |
if ( j >= low && j <= high ) |
464 |
{ |
465 |
sort_array[j].sort = 1; |
466 |
some_selected++; |
467 |
} |
468 |
else |
469 |
sort_array[j].sort = 0; |
470 |
} |
471 |
|
472 |
return some_selected; |
473 |
|
474 |
} |
475 |
|
476 |
/* These sort the LOOKUP_TABLE */ |
477 |
int sortbysort(const void *s1, const void *s2) |
478 |
{ |
479 |
LOOKUP_TABLE *a = (LOOKUP_TABLE *)s1; |
480 |
LOOKUP_TABLE *b = (LOOKUP_TABLE *)s2; |
481 |
|
482 |
return a->sort - b->sort; |
483 |
} |
484 |
|
485 |
int sortbyfile(const void *s1, const void *s2) |
486 |
{ |
487 |
LOOKUP_TABLE *a = (LOOKUP_TABLE *)s1; |
488 |
LOOKUP_TABLE *b = (LOOKUP_TABLE *)s2; |
489 |
|
490 |
return a->filenum - b->filenum; |
491 |
} |
492 |
|
493 |
|
494 |
/******************************************************************* |
495 |
* This creates the lookup table for the range of values selected |
496 |
* and stores it in the MetaEntry |
497 |
* |
498 |
* Call with: |
499 |
* pointer to SWISH |
500 |
* pointer to the IndexFile |
501 |
* *metaEntry |
502 |
* PARAMS (low/hi range) |
503 |
* |
504 |
* Returns: |
505 |
* true if any were marked as found |
506 |
* false means no match |
507 |
* |
508 |
********************************************************************/ |
509 |
|
510 |
static int create_lookup_array( SWISH *sw, IndexFILE *indexf, struct metaEntry *meta_entry ) |
511 |
{ |
512 |
LOOKUP_TABLE *sort_array; |
513 |
int i; |
514 |
int size = indexf->header.totalfiles; |
515 |
int some_found; |
516 |
|
517 |
/* Now do the work of creating the lookup table */ |
518 |
|
519 |
/* Create memory -- probably could do this once and use it over and over */ |
520 |
sort_array = (LOOKUP_TABLE *) emalloc( size * sizeof(LOOKUP_TABLE) ); |
521 |
|
522 |
/* copy in the data to the sort array */ |
523 |
for (i = 0; i < size; i++) |
524 |
{ |
525 |
sort_array[i].filenum = i+1; |
526 |
sort_array[i].sort = meta_entry->sorted_data[i]; |
527 |
} |
528 |
|
529 |
|
530 |
/* now sort by it's sort value */ |
531 |
swish_qsort(sort_array, size, sizeof(LOOKUP_TABLE), &sortbysort); |
532 |
|
533 |
/* This marks in the new array which ones are in range */ |
534 |
some_found = find_prop( sw, indexf, sort_array, size, meta_entry ); |
535 |
|
536 |
|
537 |
#ifdef DEBUGLIMIT |
538 |
for (i = 0; i < size; i++) |
539 |
{ |
540 |
printf("%d File: %d Sort: %lu : ", i, sort_array[i].filenum, sort_array[i].sort ); |
541 |
printfileprop( sw, indexf, sort_array[i].filenum, meta_entry->metaID ); |
542 |
printf("\n"); |
543 |
} |
544 |
#endif |
545 |
|
546 |
/* If everything in range, then don't even bother creating the lookup array */ |
547 |
if ( some_found && sort_array[0].sort && sort_array[size-1].sort ) |
548 |
{ |
549 |
efree( sort_array ); |
550 |
return 1; |
551 |
} |
552 |
|
553 |
|
554 |
/* sort back by file number */ |
555 |
swish_qsort(sort_array, size, sizeof(LOOKUP_TABLE), &sortbyfile); |
556 |
|
557 |
|
558 |
/* allocate a place to save the lookup table */ |
559 |
meta_entry->inPropRange = (int *) emalloc( size * sizeof(int) ); |
560 |
|
561 |
/* populate the array in the metaEntry */ |
562 |
for (i = 0; i < size; i++) |
563 |
meta_entry->inPropRange[i] = sort_array[i].sort; |
564 |
|
565 |
efree( sort_array ); |
566 |
|
567 |
return some_found; |
568 |
|
569 |
} |
570 |
|
571 |
/******************************************************************* |
572 |
* Encode parameters specified on -L command line into two propEntry's |
573 |
* which can be used to compare with a file's property |
574 |
* |
575 |
* Call with: |
576 |
* *metaEntry -> current meta entry |
577 |
* *PARAMS -> associated parameters |
578 |
* |
579 |
* Returns: |
580 |
* True if a range was found, otherwise false. |
581 |
* sets sw->lasterror on failure |
582 |
* |
583 |
* |
584 |
********************************************************************/ |
585 |
static int params_to_props( SWISH *sw, struct metaEntry *meta_entry, PARAMS *param ) |
586 |
{ |
587 |
int error_flag; |
588 |
unsigned char *lowrange = param->lowrange; |
589 |
unsigned char *highrange = param->highrange; |
590 |
|
591 |
/* properties do not have leading white space */ |
592 |
|
593 |
|
594 |
/* Allow <= and >= in limits. A NULL property means very low/very high */ |
595 |
|
596 |
if ( (strcmp( "<=", (char *)lowrange ) == 0) ) |
597 |
{ |
598 |
meta_entry->loPropRange = NULL; /* indicates very small */ |
599 |
meta_entry->hiPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
600 |
} |
601 |
|
602 |
else if ( (strcmp( ">=", (char *)lowrange ) == 0) ) |
603 |
{ |
604 |
meta_entry->loPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
605 |
meta_entry->hiPropRange = NULL; /* indicates very big */ |
606 |
} |
607 |
|
608 |
else |
609 |
{ |
610 |
meta_entry->loPropRange = CreateProperty( meta_entry, lowrange, strlen( (char *)lowrange ), 0, &error_flag ); |
611 |
meta_entry->hiPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
612 |
|
613 |
|
614 |
if ( !(meta_entry->loPropRange && meta_entry->hiPropRange) ) |
615 |
{ |
616 |
set_progerr(PROP_LIMIT_ERROR, sw, "Failed to set range for property '%s' values '%s' and '%s'", meta_entry->metaName, lowrange, highrange ); |
617 |
return 0; |
618 |
} |
619 |
|
620 |
/* Validate range */ |
621 |
|
622 |
if ( Compare_Properties( meta_entry, meta_entry->loPropRange, meta_entry->hiPropRange ) > 0 ) |
623 |
{ |
624 |
set_progerr(PROP_LIMIT_ERROR, sw, "Property '%s' value '%s' must be <= '%s'", meta_entry->metaName, lowrange, highrange ); |
625 |
return 0; |
626 |
} |
627 |
} |
628 |
|
629 |
|
630 |
return ( meta_entry->loPropRange || meta_entry->hiPropRange ); |
631 |
} |
632 |
|
633 |
|
634 |
/******************************************************************* |
635 |
* Scans all the meta entries to see if any are limited, and if so, creates the lookup array |
636 |
* |
637 |
* Call with: |
638 |
* pointer to SWISH |
639 |
* poinger to an IndexFile |
640 |
* user supplied limit parameters |
641 |
* |
642 |
* Returns: |
643 |
* false if any arrays are all zero |
644 |
* no point in even searching. |
645 |
* (meaning that no possible matches exist) |
646 |
* but also return false on errors, caller must check sw->lasterror |
647 |
* |
648 |
* ToDo: |
649 |
* This ONLY works if the limits are absolute -- that is |
650 |
* that you can't OR limits. Will need fixing at some point |
651 |
* |
652 |
********************************************************************/ |
653 |
static int load_index( SWISH *sw, IndexFILE *indexf, PARAMS *params ) |
654 |
{ |
655 |
struct metaEntry *meta_entry; |
656 |
PARAMS *curp; |
657 |
int found; |
658 |
|
659 |
|
660 |
curp = params; |
661 |
|
662 |
/* Look at each parameter */ |
663 |
for (curp = params; curp; curp = curp->next ) |
664 |
{ |
665 |
found = 0; |
666 |
|
667 |
if ( !(meta_entry = getPropNameByName( &indexf->header, (char *)curp->propname ))) |
668 |
{ |
669 |
set_progerr( PROP_LIMIT_ERROR, sw, "Specified limit name '%s' is not a PropertyName", curp->propname ); |
670 |
return 0; |
671 |
} |
672 |
|
673 |
|
674 |
/* This, of course, is not the truth -- but the only slightly useful would be filenum */ |
675 |
/* indexfile can be specified on the command line, rank and reccount is not really known */ |
676 |
|
677 |
if ( is_meta_internal( meta_entry ) ) |
678 |
{ |
679 |
set_progerr( PROP_LIMIT_ERROR, sw, "Cannot limit by swish result property '%s'", curp->propname ); |
680 |
return 0; |
681 |
} |
682 |
|
683 |
|
684 |
/* see if array has already been allocated (cached) */ |
685 |
if ( meta_entry->inPropRange ) |
686 |
continue; |
687 |
|
688 |
|
689 |
/* Encode the parameters into properties for comparing, and store in the metaEntry */ |
690 |
|
691 |
if ( !params_to_props( sw, meta_entry, curp ) ) |
692 |
{ |
693 |
if ( sw->lasterror ) // check for failure |
694 |
return 0; |
695 |
|
696 |
continue; /* This means that it failed to set a range */ |
697 |
} |
698 |
|
699 |
|
700 |
/* load the sorted_data array, if not already done */ |
701 |
if ( !meta_entry->sorted_data ) |
702 |
if( !LoadSortedProps( sw, indexf, meta_entry ) ) |
703 |
continue; /* thus it will sort manually without pre-sorted index */ |
704 |
|
705 |
|
706 |
/* Now create the lookup table in the metaEntry */ |
707 |
/* A false return means that an array was built but it was all zero */ |
708 |
/* No need to check anything else at this time, since can only AND -L options */ |
709 |
/* i.e. = return No Results right away */ |
710 |
/* This allows search.c to bail out early */ |
711 |
|
712 |
if ( !create_lookup_array( sw, indexf, meta_entry ) ) |
713 |
return 0; |
714 |
} |
715 |
|
716 |
return 1; // ** flag that it's ok to continue the search. |
717 |
|
718 |
} |
719 |
|
720 |
/******************************************************************* |
721 |
* Prepares the lookup tables for every index |
722 |
* |
723 |
* Call with: |
724 |
* pointer to SWISH |
725 |
* |
726 |
* Returns: |
727 |
* true if ok to continue search |
728 |
* false indicates that a lookup array was created, but it is all zero |
729 |
* indicating there will never be a match |
730 |
* ( this falls apart if allow OR limits ) |
731 |
* |
732 |
* ToDo: |
733 |
* How to deal with non-presorted properties? |
734 |
* |
735 |
********************************************************************/ |
736 |
|
737 |
int Prepare_PropLookup(SWISH *sw ) |
738 |
{ |
739 |
IndexFILE *indexf; |
740 |
struct MOD_PropLimit *self = sw->PropLimit; |
741 |
int total_indexes = 0; |
742 |
int total_no_docs = 0; |
743 |
|
744 |
|
745 |
|
746 |
/* nothing to limit by */ |
747 |
if ( !self->params ) |
748 |
return 1; |
749 |
|
750 |
|
751 |
|
752 |
|
753 |
/* process each index file */ |
754 |
for( indexf = sw->indexlist; indexf; indexf = indexf->next) |
755 |
{ |
756 |
total_indexes++; |
757 |
|
758 |
if ( !load_index( sw, indexf, self->params ) ) |
759 |
{ |
760 |
if ( sw->lasterror ) // check for error |
761 |
return 0; |
762 |
|
763 |
total_no_docs++; |
764 |
} |
765 |
} |
766 |
|
767 |
/* if all indexes are all no docs within limits, then return false */ |
768 |
return total_indexes != total_no_docs; |
769 |
|
770 |
} |
771 |
|
772 |
/******************************************************************* |
773 |
* Removes results that don't fit within the limit |
774 |
* |
775 |
* Call with: |
776 |
* *SWISH - to read a file entry if pre-sorted data not available |
777 |
* IndexFILE = current index file |
778 |
* File number |
779 |
* |
780 |
* Returns |
781 |
* true if file should NOT be included in results |
782 |
* |
783 |
* |
784 |
********************************************************************/ |
785 |
int LimitByProperty( SWISH *sw, IndexFILE *indexf, int filenum ) |
786 |
{ |
787 |
int j; |
788 |
struct metaEntry *meta_entry; |
789 |
for ( j = 0; j < indexf->header.metaCounter; j++) |
790 |
{ |
791 |
/* Look at all the properties */ |
792 |
|
793 |
/* Should cache this in the index file, or is this fast enough? */ |
794 |
if ( !(meta_entry = getPropNameByID( &indexf->header, indexf->header.metaEntryArray[j]->metaID ))) |
795 |
continue; /* continue if it's not a property */ |
796 |
|
797 |
/* anything to check? */ |
798 |
if ( !meta_entry->loPropRange && !meta_entry->hiPropRange ) |
799 |
continue; |
800 |
|
801 |
|
802 |
|
803 |
|
804 |
/* If inPropRange is allocated then there is an array for limiting already created from the presorted data */ |
805 |
|
806 |
if ( meta_entry->inPropRange ) |
807 |
return !meta_entry->inPropRange[filenum-1]; |
808 |
|
809 |
|
810 |
|
811 |
|
812 |
/* Otherwise, if either range is set, then use a manual lookup of the property */ |
813 |
|
814 |
{ |
815 |
int limit = 0; |
816 |
propEntry *prop = GetPropertyByFile( sw, indexf, filenum, meta_entry->metaID ); |
817 |
|
818 |
/* Return true (i.e. limit) if the file's prop is less than the low range */ |
819 |
/* or if its property is greater than the high range */ |
820 |
if ( |
821 |
(Compare_Properties( meta_entry, prop, meta_entry->loPropRange ) < 0 ) || |
822 |
(meta_entry->hiPropRange && (Compare_Properties( meta_entry, prop, meta_entry->hiPropRange ) > 0 )) |
823 |
) |
824 |
limit = 1; |
825 |
|
826 |
freeProperty( prop ); |
827 |
/* If limit by this property, then return to limit right away */ |
828 |
if ( limit ) |
829 |
return 1; |
830 |
} |
831 |
} |
832 |
|
833 |
return 0; /* don't limit by default */ |
834 |
} |
835 |
|
836 |
/******************************************************************* |
837 |
* Checks to see if ANY -L parameters were set |
838 |
* |
839 |
* This is just to avoid processing each result in the result list. |
840 |
* |
841 |
********************************************************************/ |
842 |
int is_prop_limit_used( SWISH *sw ) |
843 |
{ |
844 |
struct MOD_PropLimit *self = sw->PropLimit; |
845 |
|
846 |
return self->params ? 1 : 0; |
847 |
} |
848 |
|