1 |
adcroft |
1.1 |
/* |
2 |
|
|
$Id: proplimit.c,v 1.32 2002/05/26 15:38:02 whmoseley Exp $ |
3 |
|
|
** |
4 |
|
|
** This program and library is free software; you can redistribute it and/or |
5 |
|
|
** modify it under the terms of the GNU (Library) General Public License |
6 |
|
|
** as published by the Free Software Foundation; either version 2 |
7 |
|
|
** of the License, or any later version. |
8 |
|
|
** |
9 |
|
|
** This program is distributed in the hope that it will be useful, |
10 |
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
|
|
** GNU (Library) General Public License for more details. |
13 |
|
|
** |
14 |
|
|
** You should have received a copy of the GNU (Library) General Public License |
15 |
|
|
** along with this program; if not, write to the Free Software |
16 |
|
|
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 |
|
|
** |
18 |
|
|
** module to limit within a range of properties |
19 |
|
|
** Created June 10, 2001 - moseley |
20 |
|
|
** |
21 |
|
|
*/ |
22 |
|
|
|
23 |
|
|
#include "swish.h" |
24 |
|
|
#include "string.h" |
25 |
|
|
#include "mem.h" |
26 |
|
|
#include "merge.h" // why is this needed for docprop.h??? |
27 |
|
|
#include "docprop.h" |
28 |
|
|
#include "index.h" |
29 |
|
|
#include "metanames.h" |
30 |
|
|
#include "compress.h" |
31 |
|
|
#include "error.h" |
32 |
|
|
#include "db.h" |
33 |
|
|
#include "result_sort.h" |
34 |
|
|
#include "swish_qsort.h" |
35 |
|
|
#include "proplimit.h" |
36 |
|
|
|
37 |
|
|
|
38 |
|
|
// #define DEBUGLIMIT |
39 |
|
|
|
40 |
|
|
/*==================== These should be in other modules ================*/ |
41 |
|
|
|
42 |
|
|
/* Should be in docprop.c */ |
43 |
|
|
|
44 |
|
|
/******************************************************************* |
45 |
|
|
* Fetch a doc's properties by file number and metaID |
46 |
|
|
* |
47 |
|
|
* Call with: |
48 |
|
|
* *sw |
49 |
|
|
* *indexf |
50 |
|
|
* filenum |
51 |
|
|
* metaID |
52 |
|
|
* |
53 |
|
|
* Returns: |
54 |
|
|
* pointer to a docPropertyEntry or NULL if not found |
55 |
|
|
* |
56 |
|
|
********************************************************************/ |
57 |
|
|
|
58 |
|
|
static propEntry *GetPropertyByFile( SWISH *sw, IndexFILE *indexf, int filenum, int metaID ) |
59 |
|
|
{ |
60 |
|
|
propEntry *d; |
61 |
|
|
FileRec fi; |
62 |
|
|
memset(&fi, 0, sizeof( FileRec )); |
63 |
|
|
fi.filenum = filenum; |
64 |
|
|
|
65 |
|
|
|
66 |
|
|
d = ReadSingleDocPropertiesFromDisk(sw, indexf, &fi, metaID, MAX_SORT_STRING_LEN ); |
67 |
|
|
freefileinfo(&fi); |
68 |
|
|
|
69 |
|
|
return d; |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
#ifdef DEBUGLIMIT |
73 |
|
|
static void printdocprop( propEntry *d ) |
74 |
|
|
{ |
75 |
|
|
char str[1000]; |
76 |
|
|
int j; |
77 |
|
|
|
78 |
|
|
for (j=0; j < d->propLen; j++) |
79 |
|
|
str[j] = (d->propValue)[j]; |
80 |
|
|
|
81 |
|
|
str[ d->propLen ] = '\0'; |
82 |
|
|
|
83 |
|
|
printf("%s (%d)", str, d->propLen ); |
84 |
|
|
} |
85 |
|
|
|
86 |
|
|
static void printfileprop( SWISH *sw, IndexFILE *indexf, int filenum, int metaID ) |
87 |
|
|
{ |
88 |
|
|
propEntry *d; |
89 |
|
|
|
90 |
|
|
if ( (d = GetPropertyByFile( sw, indexf, filenum,metaID ))) |
91 |
|
|
printdocprop( d ); |
92 |
|
|
else |
93 |
|
|
printf("File %d does not have a property for metaID %d", filenum, metaID ); |
94 |
|
|
|
95 |
|
|
freeProperty( d ); |
96 |
|
|
} |
97 |
|
|
#endif |
98 |
|
|
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
/*==============================================================*/ |
103 |
|
|
/* typedefs and structures */ |
104 |
|
|
/*==============================================================*/ |
105 |
|
|
|
106 |
|
|
/* This is used to for inverting the metaEntry->sorted_data array */ |
107 |
|
|
typedef struct LOOKUP_TABLE |
108 |
|
|
{ |
109 |
|
|
int filenum; |
110 |
|
|
unsigned long sort; |
111 |
|
|
} LOOKUP_TABLE; |
112 |
|
|
|
113 |
|
|
|
114 |
|
|
/* This is the list of parameters supplied with the query */ |
115 |
|
|
typedef struct PARAMS |
116 |
|
|
{ |
117 |
|
|
struct PARAMS *next; |
118 |
|
|
unsigned char *propname; |
119 |
|
|
unsigned char *lowrange; |
120 |
|
|
unsigned char *highrange; |
121 |
|
|
} PARAMS; |
122 |
|
|
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
|
126 |
|
|
struct MOD_PropLimit |
127 |
|
|
{ |
128 |
|
|
PARAMS *params; /* parameter */ |
129 |
|
|
}; |
130 |
|
|
|
131 |
|
|
|
132 |
|
|
|
133 |
|
|
/*==============================================================*/ |
134 |
|
|
/* Code */ |
135 |
|
|
/*==============================================================*/ |
136 |
|
|
|
137 |
|
|
void initModule_PropLimit (SWISH *sw) |
138 |
|
|
{ |
139 |
|
|
/* local data */ |
140 |
|
|
struct MOD_PropLimit *self; |
141 |
|
|
self =(struct MOD_PropLimit *) emalloc(sizeof(struct MOD_PropLimit)); |
142 |
|
|
sw->PropLimit = self; |
143 |
|
|
|
144 |
|
|
self->params = NULL; |
145 |
|
|
} |
146 |
|
|
|
147 |
|
|
void ClearLimitParameter (SWISH *sw) |
148 |
|
|
{ |
149 |
|
|
struct MOD_PropLimit *self = sw->PropLimit; |
150 |
|
|
PARAMS *tmp; |
151 |
|
|
|
152 |
|
|
while ( self->params ) { |
153 |
|
|
efree( self->params->propname ); |
154 |
|
|
efree( self->params->lowrange ); |
155 |
|
|
efree( self->params->highrange ); |
156 |
|
|
tmp = (PARAMS *)self->params->next; |
157 |
|
|
efree( self->params ); |
158 |
|
|
self->params = tmp; |
159 |
|
|
} |
160 |
|
|
} |
161 |
|
|
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
void freeModule_PropLimit (SWISH *sw) |
165 |
|
|
{ |
166 |
|
|
ClearLimitParameter( sw ); |
167 |
|
|
efree( sw->PropLimit ); |
168 |
|
|
sw->PropLimit = NULL; |
169 |
|
|
|
170 |
|
|
} |
171 |
|
|
|
172 |
|
|
|
173 |
|
|
/******************************************************************* |
174 |
|
|
* Stores strings away for later processing |
175 |
|
|
* called from someplace? |
176 |
|
|
* |
177 |
|
|
* Call with: |
178 |
|
|
* Three strings, first must be metaname. |
179 |
|
|
* |
180 |
|
|
* Returns: |
181 |
|
|
* returns false (0) on failure |
182 |
|
|
* pointer to a PARAMS |
183 |
|
|
* errors do not return (doesn't do many checks) |
184 |
|
|
* |
185 |
|
|
* ToDo: |
186 |
|
|
* Error checking, and maybe pass in a StringList |
187 |
|
|
* |
188 |
|
|
********************************************************************/ |
189 |
|
|
int SetLimitParameter(SWISH *sw, char *propertyname, char *low, char *hi) |
190 |
|
|
{ |
191 |
|
|
PARAMS *newparam; |
192 |
|
|
PARAMS *params; |
193 |
|
|
struct MOD_PropLimit *self = sw->PropLimit; |
194 |
|
|
|
195 |
|
|
|
196 |
|
|
/* Currently, can only limit by one property -- so check that one hasn't already been used */ |
197 |
|
|
for ( params = self->params; params && (strcmp( (char *)params->propname, propertyname ) != 0); params = (PARAMS *)params->next); |
198 |
|
|
if ( params ) |
199 |
|
|
{ |
200 |
|
|
set_progerr( PROP_LIMIT_ERROR, sw, "Property '%s' is already limited", propertyname ); |
201 |
|
|
return 0; |
202 |
|
|
} |
203 |
|
|
|
204 |
|
|
|
205 |
|
|
|
206 |
|
|
newparam = emalloc( sizeof( PARAMS ) ); |
207 |
|
|
|
208 |
|
|
newparam->propname = (unsigned char *)estrdup( propertyname ); |
209 |
|
|
newparam->lowrange = (unsigned char *)estrdup( low ); |
210 |
|
|
newparam->highrange = (unsigned char *)estrdup( hi ); |
211 |
|
|
|
212 |
|
|
params = self->params; |
213 |
|
|
|
214 |
|
|
/* put at head of list */ |
215 |
|
|
self->params = newparam; |
216 |
|
|
newparam->next = (struct PARAMS *)params; |
217 |
|
|
|
218 |
|
|
return 1; |
219 |
|
|
|
220 |
|
|
} |
221 |
|
|
|
222 |
|
|
|
223 |
|
|
|
224 |
|
|
/******************************************************************* |
225 |
|
|
* This compares the user supplied value with a file's property |
226 |
|
|
* The file's property is looked up and then Compare_Properties is called |
227 |
|
|
* |
228 |
|
|
* Call with: |
229 |
|
|
* *SWISH |
230 |
|
|
* *indexf |
231 |
|
|
* *propEntry key - compare key |
232 |
|
|
* *LOOKUP_TABLE - element containing file number |
233 |
|
|
* |
234 |
|
|
* Returns: |
235 |
|
|
* |
236 |
|
|
********************************************************************/ |
237 |
|
|
static int test_prop( SWISH *sw, IndexFILE *indexf, struct metaEntry *meta_entry, propEntry *key, LOOKUP_TABLE *sort_array) |
238 |
|
|
{ |
239 |
|
|
propEntry *fileprop; |
240 |
|
|
int cmp_value; |
241 |
|
|
|
242 |
|
|
#ifdef DEBUGLIMIT |
243 |
|
|
{ |
244 |
|
|
char *p = DecodeDocProperty( meta_entry, key ); |
245 |
|
|
printf("test_prop comparing '%s' cmp '%s' with ", meta_entry->metaName, p); |
246 |
|
|
efree( p ); |
247 |
|
|
} |
248 |
|
|
#endif |
249 |
|
|
|
250 |
|
|
|
251 |
|
|
|
252 |
|
|
if ( !(fileprop = GetPropertyByFile( sw, indexf, sort_array->filenum, meta_entry->metaID )) ) |
253 |
|
|
{ |
254 |
|
|
#ifdef DEBUGLIMIT |
255 |
|
|
printf("(no prop found for filenum %d) - return +1\n", sort_array->filenum ); |
256 |
|
|
#endif |
257 |
|
|
|
258 |
|
|
/* No property found, assume it's very, very, small */ |
259 |
|
|
return +1; |
260 |
|
|
} |
261 |
|
|
|
262 |
|
|
#ifdef DEBUGLIMIT |
263 |
|
|
{ |
264 |
|
|
char *p = DecodeDocProperty( meta_entry, fileprop ); |
265 |
|
|
int i = Compare_Properties( meta_entry, key, fileprop ); |
266 |
|
|
printf("'%s' returning %d\n", p, i ); |
267 |
|
|
efree( p ); |
268 |
|
|
} |
269 |
|
|
#endif |
270 |
|
|
|
271 |
|
|
|
272 |
|
|
cmp_value = Compare_Properties( meta_entry, key, fileprop ); |
273 |
|
|
freeProperty( fileprop ); |
274 |
|
|
return cmp_value; |
275 |
|
|
|
276 |
|
|
} |
277 |
|
|
|
278 |
|
|
|
279 |
|
|
|
280 |
|
|
|
281 |
|
|
/************************************************************************ |
282 |
|
|
* Adapted from: msdn, I believe... |
283 |
|
|
* |
284 |
|
|
* Call with: |
285 |
|
|
* See below |
286 |
|
|
* |
287 |
|
|
* Returns: |
288 |
|
|
* Exact match, true (but could be more than one match location |
289 |
|
|
* Between two, returns false and the lower position |
290 |
|
|
* Below list, returns false and -1 |
291 |
|
|
* Above list, return false and numelements (one past end of array) |
292 |
|
|
* |
293 |
|
|
* ToDo: |
294 |
|
|
* Check for out of bounds on entry as that may be reasonably common |
295 |
|
|
* |
296 |
|
|
***************************************************************************/ |
297 |
|
|
|
298 |
|
|
static int binary_search( |
299 |
|
|
SWISH *sw, // needed to lookup a file entry |
300 |
|
|
IndexFILE *indexf, // |
301 |
|
|
LOOKUP_TABLE *sort_array, // table to search through |
302 |
|
|
int numelements, // size of table |
303 |
|
|
propEntry *key, // property to compare against |
304 |
|
|
struct metaEntry *meta_entry, // associated meta entry (for metaType) |
305 |
|
|
int *result, // result is stored here |
306 |
|
|
int direction, // looking up (positive) looking down (negative) |
307 |
|
|
int *exact_match) // last exact match found |
308 |
|
|
{ |
309 |
|
|
int low = 0; |
310 |
|
|
int high = numelements - 1; |
311 |
|
|
int num = numelements; |
312 |
|
|
int mid; |
313 |
|
|
int cmp; |
314 |
|
|
unsigned int half; |
315 |
|
|
|
316 |
|
|
*exact_match = -1; |
317 |
|
|
|
318 |
|
|
#ifdef DEBUGLIMIT |
319 |
|
|
printf("\nbinary_search looking for %s entry\n", ( direction > 0 ? "high" : "low" ) ); |
320 |
|
|
#endif |
321 |
|
|
|
322 |
|
|
while ( low <= high ) |
323 |
|
|
{ |
324 |
|
|
if ( (half = num / 2) ) |
325 |
|
|
{ |
326 |
|
|
mid = low + (num & 1 ? half : half - 1); |
327 |
|
|
|
328 |
|
|
|
329 |
|
|
if ( (cmp = test_prop( sw, indexf, meta_entry, key, &sort_array[mid] )) == 0 ) |
330 |
|
|
{ |
331 |
|
|
*exact_match = mid; // exact match |
332 |
|
|
cmp = direction; // but still look for the lowest/highest exact match. |
333 |
|
|
} |
334 |
|
|
|
335 |
|
|
if ( cmp < 0 ) |
336 |
|
|
{ |
337 |
|
|
high = mid - 1; |
338 |
|
|
num = (num & 1 ? half : half - 1); |
339 |
|
|
} |
340 |
|
|
|
341 |
|
|
else // cmp > 0 |
342 |
|
|
{ |
343 |
|
|
low = mid + 1; |
344 |
|
|
num = half; |
345 |
|
|
} |
346 |
|
|
} |
347 |
|
|
else if (num) |
348 |
|
|
{ |
349 |
|
|
if( (cmp = test_prop( sw, indexf, meta_entry, key, &sort_array[low] )) ==0) |
350 |
|
|
{ |
351 |
|
|
*result = low; |
352 |
|
|
return 1; |
353 |
|
|
} |
354 |
|
|
if ( cmp < 0 ) // this breaks need another compare |
355 |
|
|
{ |
356 |
|
|
/* less than current, but is is greater */ |
357 |
|
|
if ( low > 0 && (test_prop( sw, indexf, meta_entry, key, &sort_array[low-1] ) < 0)) |
358 |
|
|
*result = low - 1; |
359 |
|
|
else |
360 |
|
|
*result = low; |
361 |
|
|
return 0; |
362 |
|
|
} |
363 |
|
|
else |
364 |
|
|
{ |
365 |
|
|
*result = low + 1; |
366 |
|
|
return 0; |
367 |
|
|
} |
368 |
|
|
} |
369 |
|
|
else // if !num |
370 |
|
|
{ |
371 |
|
|
/* I can't think of a case for this to match?? */ |
372 |
|
|
progwarn("Binary Sort issue - please report to swish-e list"); |
373 |
|
|
*result = -1; |
374 |
|
|
return 0; |
375 |
|
|
} |
376 |
|
|
} |
377 |
|
|
*result = low; // was high, but wasn't returning expected results |
378 |
|
|
return 0; |
379 |
|
|
} |
380 |
|
|
|
381 |
|
|
|
382 |
|
|
/******************************************************************* |
383 |
|
|
* This takes a *sort_array and the low/hi range of limits and marks |
384 |
|
|
* which files are in that range |
385 |
|
|
* |
386 |
|
|
* Call with: |
387 |
|
|
* pointer to SWISH |
388 |
|
|
* pointer to the IndexFile |
389 |
|
|
* pointer to the LOOKUP_TABLE |
390 |
|
|
* *metaEntry |
391 |
|
|
* PARAMS (low/hi range) |
392 |
|
|
* |
393 |
|
|
* Returns: |
394 |
|
|
* true if any in range, otherwise false |
395 |
|
|
* |
396 |
|
|
********************************************************************/ |
397 |
|
|
static int find_prop(SWISH *sw, IndexFILE *indexf, LOOKUP_TABLE *sort_array, int num, struct metaEntry *meta_entry ) |
398 |
|
|
{ |
399 |
|
|
int low, high, j; |
400 |
|
|
int foundLo, foundHi; |
401 |
|
|
int some_selected = 0; |
402 |
|
|
int exact_match; |
403 |
|
|
|
404 |
|
|
|
405 |
|
|
if ( !meta_entry->loPropRange ) |
406 |
|
|
{ |
407 |
|
|
foundLo = 1; /* signal exact match */ |
408 |
|
|
low = 0; /* and start at beginning */ |
409 |
|
|
} |
410 |
|
|
else |
411 |
|
|
{ |
412 |
|
|
foundLo = binary_search(sw, indexf, sort_array, num, meta_entry->loPropRange, meta_entry, &low, -1, &exact_match); |
413 |
|
|
|
414 |
|
|
if ( !foundLo && exact_match >= 0 ) |
415 |
|
|
{ |
416 |
|
|
low = exact_match; |
417 |
|
|
foundLo = 1; /* mark as an exact match */ |
418 |
|
|
} |
419 |
|
|
} |
420 |
|
|
|
421 |
|
|
|
422 |
|
|
|
423 |
|
|
if ( !meta_entry->hiPropRange ) |
424 |
|
|
{ |
425 |
|
|
foundHi = 1; /* signal exact match */ |
426 |
|
|
high = num -1; /* and end very end */ |
427 |
|
|
} |
428 |
|
|
else |
429 |
|
|
{ |
430 |
|
|
foundHi = binary_search(sw, indexf, sort_array, num, meta_entry->hiPropRange, meta_entry, &high, +1, &exact_match); |
431 |
|
|
|
432 |
|
|
if ( !foundHi && exact_match >= 0 ) |
433 |
|
|
{ |
434 |
|
|
high = exact_match; |
435 |
|
|
foundHi = 1; |
436 |
|
|
} |
437 |
|
|
} |
438 |
|
|
|
439 |
|
|
#ifdef DEBUGLIMIT |
440 |
|
|
printf("Returned range %d - %d (exact: %d %d) cnt: %u\n", low, high, foundLo, foundHi, num ); |
441 |
|
|
#endif |
442 |
|
|
|
443 |
|
|
/* both inbetween two adjacent entries */ |
444 |
|
|
if ( !foundLo && !foundHi && low == high ) |
445 |
|
|
{ |
446 |
|
|
for ( j = 0; j < num; j++ ) |
447 |
|
|
sort_array[j].sort = 0; |
448 |
|
|
|
449 |
|
|
return 0; |
450 |
|
|
} |
451 |
|
|
|
452 |
|
|
|
453 |
|
|
/* now, if not an exact match for the high range, decrease high by one |
454 |
|
|
* because high is pointing to the *next* higher element, which is TOO high |
455 |
|
|
*/ |
456 |
|
|
|
457 |
|
|
if ( !foundHi && low < high ) |
458 |
|
|
high--; |
459 |
|
|
|
460 |
|
|
|
461 |
|
|
for ( j = 0; j < num; j++ ) |
462 |
|
|
{ |
463 |
|
|
if ( j >= low && j <= high ) |
464 |
|
|
{ |
465 |
|
|
sort_array[j].sort = 1; |
466 |
|
|
some_selected++; |
467 |
|
|
} |
468 |
|
|
else |
469 |
|
|
sort_array[j].sort = 0; |
470 |
|
|
} |
471 |
|
|
|
472 |
|
|
return some_selected; |
473 |
|
|
|
474 |
|
|
} |
475 |
|
|
|
476 |
|
|
/* These sort the LOOKUP_TABLE */ |
477 |
|
|
int sortbysort(const void *s1, const void *s2) |
478 |
|
|
{ |
479 |
|
|
LOOKUP_TABLE *a = (LOOKUP_TABLE *)s1; |
480 |
|
|
LOOKUP_TABLE *b = (LOOKUP_TABLE *)s2; |
481 |
|
|
|
482 |
|
|
return a->sort - b->sort; |
483 |
|
|
} |
484 |
|
|
|
485 |
|
|
int sortbyfile(const void *s1, const void *s2) |
486 |
|
|
{ |
487 |
|
|
LOOKUP_TABLE *a = (LOOKUP_TABLE *)s1; |
488 |
|
|
LOOKUP_TABLE *b = (LOOKUP_TABLE *)s2; |
489 |
|
|
|
490 |
|
|
return a->filenum - b->filenum; |
491 |
|
|
} |
492 |
|
|
|
493 |
|
|
|
494 |
|
|
/******************************************************************* |
495 |
|
|
* This creates the lookup table for the range of values selected |
496 |
|
|
* and stores it in the MetaEntry |
497 |
|
|
* |
498 |
|
|
* Call with: |
499 |
|
|
* pointer to SWISH |
500 |
|
|
* pointer to the IndexFile |
501 |
|
|
* *metaEntry |
502 |
|
|
* PARAMS (low/hi range) |
503 |
|
|
* |
504 |
|
|
* Returns: |
505 |
|
|
* true if any were marked as found |
506 |
|
|
* false means no match |
507 |
|
|
* |
508 |
|
|
********************************************************************/ |
509 |
|
|
|
510 |
|
|
static int create_lookup_array( SWISH *sw, IndexFILE *indexf, struct metaEntry *meta_entry ) |
511 |
|
|
{ |
512 |
|
|
LOOKUP_TABLE *sort_array; |
513 |
|
|
int i; |
514 |
|
|
int size = indexf->header.totalfiles; |
515 |
|
|
int some_found; |
516 |
|
|
|
517 |
|
|
/* Now do the work of creating the lookup table */ |
518 |
|
|
|
519 |
|
|
/* Create memory -- probably could do this once and use it over and over */ |
520 |
|
|
sort_array = (LOOKUP_TABLE *) emalloc( size * sizeof(LOOKUP_TABLE) ); |
521 |
|
|
|
522 |
|
|
/* copy in the data to the sort array */ |
523 |
|
|
for (i = 0; i < size; i++) |
524 |
|
|
{ |
525 |
|
|
sort_array[i].filenum = i+1; |
526 |
|
|
sort_array[i].sort = meta_entry->sorted_data[i]; |
527 |
|
|
} |
528 |
|
|
|
529 |
|
|
|
530 |
|
|
/* now sort by it's sort value */ |
531 |
|
|
swish_qsort(sort_array, size, sizeof(LOOKUP_TABLE), &sortbysort); |
532 |
|
|
|
533 |
|
|
/* This marks in the new array which ones are in range */ |
534 |
|
|
some_found = find_prop( sw, indexf, sort_array, size, meta_entry ); |
535 |
|
|
|
536 |
|
|
|
537 |
|
|
#ifdef DEBUGLIMIT |
538 |
|
|
for (i = 0; i < size; i++) |
539 |
|
|
{ |
540 |
|
|
printf("%d File: %d Sort: %lu : ", i, sort_array[i].filenum, sort_array[i].sort ); |
541 |
|
|
printfileprop( sw, indexf, sort_array[i].filenum, meta_entry->metaID ); |
542 |
|
|
printf("\n"); |
543 |
|
|
} |
544 |
|
|
#endif |
545 |
|
|
|
546 |
|
|
/* If everything in range, then don't even bother creating the lookup array */ |
547 |
|
|
if ( some_found && sort_array[0].sort && sort_array[size-1].sort ) |
548 |
|
|
{ |
549 |
|
|
efree( sort_array ); |
550 |
|
|
return 1; |
551 |
|
|
} |
552 |
|
|
|
553 |
|
|
|
554 |
|
|
/* sort back by file number */ |
555 |
|
|
swish_qsort(sort_array, size, sizeof(LOOKUP_TABLE), &sortbyfile); |
556 |
|
|
|
557 |
|
|
|
558 |
|
|
/* allocate a place to save the lookup table */ |
559 |
|
|
meta_entry->inPropRange = (int *) emalloc( size * sizeof(int) ); |
560 |
|
|
|
561 |
|
|
/* populate the array in the metaEntry */ |
562 |
|
|
for (i = 0; i < size; i++) |
563 |
|
|
meta_entry->inPropRange[i] = sort_array[i].sort; |
564 |
|
|
|
565 |
|
|
efree( sort_array ); |
566 |
|
|
|
567 |
|
|
return some_found; |
568 |
|
|
|
569 |
|
|
} |
570 |
|
|
|
571 |
|
|
/******************************************************************* |
572 |
|
|
* Encode parameters specified on -L command line into two propEntry's |
573 |
|
|
* which can be used to compare with a file's property |
574 |
|
|
* |
575 |
|
|
* Call with: |
576 |
|
|
* *metaEntry -> current meta entry |
577 |
|
|
* *PARAMS -> associated parameters |
578 |
|
|
* |
579 |
|
|
* Returns: |
580 |
|
|
* True if a range was found, otherwise false. |
581 |
|
|
* sets sw->lasterror on failure |
582 |
|
|
* |
583 |
|
|
* |
584 |
|
|
********************************************************************/ |
585 |
|
|
static int params_to_props( SWISH *sw, struct metaEntry *meta_entry, PARAMS *param ) |
586 |
|
|
{ |
587 |
|
|
int error_flag; |
588 |
|
|
unsigned char *lowrange = param->lowrange; |
589 |
|
|
unsigned char *highrange = param->highrange; |
590 |
|
|
|
591 |
|
|
/* properties do not have leading white space */ |
592 |
|
|
|
593 |
|
|
|
594 |
|
|
/* Allow <= and >= in limits. A NULL property means very low/very high */ |
595 |
|
|
|
596 |
|
|
if ( (strcmp( "<=", (char *)lowrange ) == 0) ) |
597 |
|
|
{ |
598 |
|
|
meta_entry->loPropRange = NULL; /* indicates very small */ |
599 |
|
|
meta_entry->hiPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
600 |
|
|
} |
601 |
|
|
|
602 |
|
|
else if ( (strcmp( ">=", (char *)lowrange ) == 0) ) |
603 |
|
|
{ |
604 |
|
|
meta_entry->loPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
605 |
|
|
meta_entry->hiPropRange = NULL; /* indicates very big */ |
606 |
|
|
} |
607 |
|
|
|
608 |
|
|
else |
609 |
|
|
{ |
610 |
|
|
meta_entry->loPropRange = CreateProperty( meta_entry, lowrange, strlen( (char *)lowrange ), 0, &error_flag ); |
611 |
|
|
meta_entry->hiPropRange = CreateProperty( meta_entry, highrange, strlen( (char *)highrange ), 0, &error_flag ); |
612 |
|
|
|
613 |
|
|
|
614 |
|
|
if ( !(meta_entry->loPropRange && meta_entry->hiPropRange) ) |
615 |
|
|
{ |
616 |
|
|
set_progerr(PROP_LIMIT_ERROR, sw, "Failed to set range for property '%s' values '%s' and '%s'", meta_entry->metaName, lowrange, highrange ); |
617 |
|
|
return 0; |
618 |
|
|
} |
619 |
|
|
|
620 |
|
|
/* Validate range */ |
621 |
|
|
|
622 |
|
|
if ( Compare_Properties( meta_entry, meta_entry->loPropRange, meta_entry->hiPropRange ) > 0 ) |
623 |
|
|
{ |
624 |
|
|
set_progerr(PROP_LIMIT_ERROR, sw, "Property '%s' value '%s' must be <= '%s'", meta_entry->metaName, lowrange, highrange ); |
625 |
|
|
return 0; |
626 |
|
|
} |
627 |
|
|
} |
628 |
|
|
|
629 |
|
|
|
630 |
|
|
return ( meta_entry->loPropRange || meta_entry->hiPropRange ); |
631 |
|
|
} |
632 |
|
|
|
633 |
|
|
|
634 |
|
|
/******************************************************************* |
635 |
|
|
* Scans all the meta entries to see if any are limited, and if so, creates the lookup array |
636 |
|
|
* |
637 |
|
|
* Call with: |
638 |
|
|
* pointer to SWISH |
639 |
|
|
* poinger to an IndexFile |
640 |
|
|
* user supplied limit parameters |
641 |
|
|
* |
642 |
|
|
* Returns: |
643 |
|
|
* false if any arrays are all zero |
644 |
|
|
* no point in even searching. |
645 |
|
|
* (meaning that no possible matches exist) |
646 |
|
|
* but also return false on errors, caller must check sw->lasterror |
647 |
|
|
* |
648 |
|
|
* ToDo: |
649 |
|
|
* This ONLY works if the limits are absolute -- that is |
650 |
|
|
* that you can't OR limits. Will need fixing at some point |
651 |
|
|
* |
652 |
|
|
********************************************************************/ |
653 |
|
|
static int load_index( SWISH *sw, IndexFILE *indexf, PARAMS *params ) |
654 |
|
|
{ |
655 |
|
|
struct metaEntry *meta_entry; |
656 |
|
|
PARAMS *curp; |
657 |
|
|
int found; |
658 |
|
|
|
659 |
|
|
|
660 |
|
|
curp = params; |
661 |
|
|
|
662 |
|
|
/* Look at each parameter */ |
663 |
|
|
for (curp = params; curp; curp = curp->next ) |
664 |
|
|
{ |
665 |
|
|
found = 0; |
666 |
|
|
|
667 |
|
|
if ( !(meta_entry = getPropNameByName( &indexf->header, (char *)curp->propname ))) |
668 |
|
|
{ |
669 |
|
|
set_progerr( PROP_LIMIT_ERROR, sw, "Specified limit name '%s' is not a PropertyName", curp->propname ); |
670 |
|
|
return 0; |
671 |
|
|
} |
672 |
|
|
|
673 |
|
|
|
674 |
|
|
/* This, of course, is not the truth -- but the only slightly useful would be filenum */ |
675 |
|
|
/* indexfile can be specified on the command line, rank and reccount is not really known */ |
676 |
|
|
|
677 |
|
|
if ( is_meta_internal( meta_entry ) ) |
678 |
|
|
{ |
679 |
|
|
set_progerr( PROP_LIMIT_ERROR, sw, "Cannot limit by swish result property '%s'", curp->propname ); |
680 |
|
|
return 0; |
681 |
|
|
} |
682 |
|
|
|
683 |
|
|
|
684 |
|
|
/* see if array has already been allocated (cached) */ |
685 |
|
|
if ( meta_entry->inPropRange ) |
686 |
|
|
continue; |
687 |
|
|
|
688 |
|
|
|
689 |
|
|
/* Encode the parameters into properties for comparing, and store in the metaEntry */ |
690 |
|
|
|
691 |
|
|
if ( !params_to_props( sw, meta_entry, curp ) ) |
692 |
|
|
{ |
693 |
|
|
if ( sw->lasterror ) // check for failure |
694 |
|
|
return 0; |
695 |
|
|
|
696 |
|
|
continue; /* This means that it failed to set a range */ |
697 |
|
|
} |
698 |
|
|
|
699 |
|
|
|
700 |
|
|
/* load the sorted_data array, if not already done */ |
701 |
|
|
if ( !meta_entry->sorted_data ) |
702 |
|
|
if( !LoadSortedProps( sw, indexf, meta_entry ) ) |
703 |
|
|
continue; /* thus it will sort manually without pre-sorted index */ |
704 |
|
|
|
705 |
|
|
|
706 |
|
|
/* Now create the lookup table in the metaEntry */ |
707 |
|
|
/* A false return means that an array was built but it was all zero */ |
708 |
|
|
/* No need to check anything else at this time, since can only AND -L options */ |
709 |
|
|
/* i.e. = return No Results right away */ |
710 |
|
|
/* This allows search.c to bail out early */ |
711 |
|
|
|
712 |
|
|
if ( !create_lookup_array( sw, indexf, meta_entry ) ) |
713 |
|
|
return 0; |
714 |
|
|
} |
715 |
|
|
|
716 |
|
|
return 1; // ** flag that it's ok to continue the search. |
717 |
|
|
|
718 |
|
|
} |
719 |
|
|
|
720 |
|
|
/******************************************************************* |
721 |
|
|
* Prepares the lookup tables for every index |
722 |
|
|
* |
723 |
|
|
* Call with: |
724 |
|
|
* pointer to SWISH |
725 |
|
|
* |
726 |
|
|
* Returns: |
727 |
|
|
* true if ok to continue search |
728 |
|
|
* false indicates that a lookup array was created, but it is all zero |
729 |
|
|
* indicating there will never be a match |
730 |
|
|
* ( this falls apart if allow OR limits ) |
731 |
|
|
* |
732 |
|
|
* ToDo: |
733 |
|
|
* How to deal with non-presorted properties? |
734 |
|
|
* |
735 |
|
|
********************************************************************/ |
736 |
|
|
|
737 |
|
|
int Prepare_PropLookup(SWISH *sw ) |
738 |
|
|
{ |
739 |
|
|
IndexFILE *indexf; |
740 |
|
|
struct MOD_PropLimit *self = sw->PropLimit; |
741 |
|
|
int total_indexes = 0; |
742 |
|
|
int total_no_docs = 0; |
743 |
|
|
|
744 |
|
|
|
745 |
|
|
|
746 |
|
|
/* nothing to limit by */ |
747 |
|
|
if ( !self->params ) |
748 |
|
|
return 1; |
749 |
|
|
|
750 |
|
|
|
751 |
|
|
|
752 |
|
|
|
753 |
|
|
/* process each index file */ |
754 |
|
|
for( indexf = sw->indexlist; indexf; indexf = indexf->next) |
755 |
|
|
{ |
756 |
|
|
total_indexes++; |
757 |
|
|
|
758 |
|
|
if ( !load_index( sw, indexf, self->params ) ) |
759 |
|
|
{ |
760 |
|
|
if ( sw->lasterror ) // check for error |
761 |
|
|
return 0; |
762 |
|
|
|
763 |
|
|
total_no_docs++; |
764 |
|
|
} |
765 |
|
|
} |
766 |
|
|
|
767 |
|
|
/* if all indexes are all no docs within limits, then return false */ |
768 |
|
|
return total_indexes != total_no_docs; |
769 |
|
|
|
770 |
|
|
} |
771 |
|
|
|
772 |
|
|
/******************************************************************* |
773 |
|
|
* Removes results that don't fit within the limit |
774 |
|
|
* |
775 |
|
|
* Call with: |
776 |
|
|
* *SWISH - to read a file entry if pre-sorted data not available |
777 |
|
|
* IndexFILE = current index file |
778 |
|
|
* File number |
779 |
|
|
* |
780 |
|
|
* Returns |
781 |
|
|
* true if file should NOT be included in results |
782 |
|
|
* |
783 |
|
|
* |
784 |
|
|
********************************************************************/ |
785 |
|
|
int LimitByProperty( SWISH *sw, IndexFILE *indexf, int filenum ) |
786 |
|
|
{ |
787 |
|
|
int j; |
788 |
|
|
struct metaEntry *meta_entry; |
789 |
|
|
for ( j = 0; j < indexf->header.metaCounter; j++) |
790 |
|
|
{ |
791 |
|
|
/* Look at all the properties */ |
792 |
|
|
|
793 |
|
|
/* Should cache this in the index file, or is this fast enough? */ |
794 |
|
|
if ( !(meta_entry = getPropNameByID( &indexf->header, indexf->header.metaEntryArray[j]->metaID ))) |
795 |
|
|
continue; /* continue if it's not a property */ |
796 |
|
|
|
797 |
|
|
/* anything to check? */ |
798 |
|
|
if ( !meta_entry->loPropRange && !meta_entry->hiPropRange ) |
799 |
|
|
continue; |
800 |
|
|
|
801 |
|
|
|
802 |
|
|
|
803 |
|
|
|
804 |
|
|
/* If inPropRange is allocated then there is an array for limiting already created from the presorted data */ |
805 |
|
|
|
806 |
|
|
if ( meta_entry->inPropRange ) |
807 |
|
|
return !meta_entry->inPropRange[filenum-1]; |
808 |
|
|
|
809 |
|
|
|
810 |
|
|
|
811 |
|
|
|
812 |
|
|
/* Otherwise, if either range is set, then use a manual lookup of the property */ |
813 |
|
|
|
814 |
|
|
{ |
815 |
|
|
int limit = 0; |
816 |
|
|
propEntry *prop = GetPropertyByFile( sw, indexf, filenum, meta_entry->metaID ); |
817 |
|
|
|
818 |
|
|
/* Return true (i.e. limit) if the file's prop is less than the low range */ |
819 |
|
|
/* or if its property is greater than the high range */ |
820 |
|
|
if ( |
821 |
|
|
(Compare_Properties( meta_entry, prop, meta_entry->loPropRange ) < 0 ) || |
822 |
|
|
(meta_entry->hiPropRange && (Compare_Properties( meta_entry, prop, meta_entry->hiPropRange ) > 0 )) |
823 |
|
|
) |
824 |
|
|
limit = 1; |
825 |
|
|
|
826 |
|
|
freeProperty( prop ); |
827 |
|
|
/* If limit by this property, then return to limit right away */ |
828 |
|
|
if ( limit ) |
829 |
|
|
return 1; |
830 |
|
|
} |
831 |
|
|
} |
832 |
|
|
|
833 |
|
|
return 0; /* don't limit by default */ |
834 |
|
|
} |
835 |
|
|
|
836 |
|
|
/******************************************************************* |
837 |
|
|
* Checks to see if ANY -L parameters were set |
838 |
|
|
* |
839 |
|
|
* This is just to avoid processing each result in the result list. |
840 |
|
|
* |
841 |
|
|
********************************************************************/ |
842 |
|
|
int is_prop_limit_used( SWISH *sw ) |
843 |
|
|
{ |
844 |
|
|
struct MOD_PropLimit *self = sw->PropLimit; |
845 |
|
|
|
846 |
|
|
return self->params ? 1 : 0; |
847 |
|
|
} |
848 |
|
|
|