1 |
/* |
2 |
$Id: docprop.c,v 1.96 2002/08/22 22:58:39 whmoseley Exp $ |
3 |
** DocProperties.c, DocProperties.h |
4 |
** |
5 |
** Functions to manage the index's Document Properties |
6 |
** |
7 |
** File Created. |
8 |
** Mark Gaulin 11/24/98 |
9 |
** |
10 |
** change sprintf to snprintf to avoid corruption, |
11 |
** and use MAXSTRLEN from swish.h instead of literal "200" |
12 |
** SRE 11/17/99 |
13 |
** |
14 |
** 04/00 Jose ruiz |
15 |
** storeDocProperties and readNextDocPropEntry modified to store |
16 |
** the int numbers compressed. This makes integers "portable" |
17 |
** |
18 |
** 04/00 Jose Ruiz |
19 |
** Added sorting results by property |
20 |
** |
21 |
** 07/00 and 08/00 - Jose Ruiz |
22 |
** Many modifications to make all functions thread safe |
23 |
** |
24 |
** 08/00 - Added ascending and descending capabilities in results sorting |
25 |
** |
26 |
** 2001-01 rasc getResultPropertyByName rewritten, datatypes for properties. |
27 |
** 2001-02 rasc isAutoProperty |
28 |
** printSearchResultProperties changed |
29 |
** 2001-03-15 rasc Outputdelimiter var name changed |
30 |
** 2001-06-08 wsm Store propValue at end of docPropertyEntry to save memory |
31 |
** 2001-06-14 moseley Most of the code rewritten, and propfile added |
32 |
** |
33 |
** 2001-09 jmruiz - ReadAllDocPropertiesFromDisk rewriten to be used |
34 |
** by merge.c |
35 |
** |
36 |
*/ |
37 |
|
38 |
#include <limits.h> // for ULONG_MAX |
39 |
#include "swish.h" |
40 |
#include "string.h" |
41 |
#include "file.h" |
42 |
#include "hash.h" |
43 |
#include "mem.h" |
44 |
#include "merge.h" |
45 |
#include "error.h" |
46 |
#include "search.h" |
47 |
#include "index.h" |
48 |
#include "docprop.h" |
49 |
#include "error.h" |
50 |
#include "compress.h" |
51 |
#include "metanames.h" |
52 |
#include "result_output.h" |
53 |
#include "result_sort.h" |
54 |
#include "entities.h" |
55 |
#include "db.h" |
56 |
#ifdef HAVE_ZLIB |
57 |
#include <zlib.h> |
58 |
#endif |
59 |
|
60 |
|
61 |
|
62 |
|
63 |
/******************************************************************* |
64 |
* Free a property entry |
65 |
* |
66 |
********************************************************************/ |
67 |
|
68 |
void freeProperty( propEntry *prop ) |
69 |
{ |
70 |
if ( prop ) |
71 |
efree(prop); |
72 |
} |
73 |
|
74 |
|
75 |
|
76 |
|
77 |
/******************************************************************* |
78 |
* Free all properties in the docProperties structure |
79 |
* |
80 |
********************************************************************/ |
81 |
|
82 |
|
83 |
void freeDocProperties(docProperties *docProperties) |
84 |
{ |
85 |
int i; |
86 |
|
87 |
for( i = 0; i < docProperties->n; i++ ) |
88 |
{ |
89 |
freeProperty( docProperties->propEntry[i] ); |
90 |
docProperties->propEntry[i] = NULL; |
91 |
} |
92 |
|
93 |
efree(docProperties); |
94 |
docProperties = NULL; |
95 |
|
96 |
} |
97 |
|
98 |
|
99 |
/******************************************************************* |
100 |
* Frees a FileRec (struct file), which just frees |
101 |
* the properties and property index |
102 |
* Doesn't free the FileRec itself. |
103 |
* |
104 |
* move here from swish.c since all FileRec really holds is property info |
105 |
* |
106 |
********************************************************************/ |
107 |
|
108 |
void freefileinfo(FileRec *fi) |
109 |
{ |
110 |
|
111 |
if ( fi->docProperties ) |
112 |
{ |
113 |
freeDocProperties( fi->docProperties ); |
114 |
fi->docProperties = NULL; |
115 |
} |
116 |
|
117 |
if ( fi->prop_index ) |
118 |
{ |
119 |
efree( fi->prop_index ); |
120 |
fi->prop_index = NULL; |
121 |
} |
122 |
} |
123 |
|
124 |
/******************************************************************* |
125 |
* Converts a property into a string, based on it's type. |
126 |
* Numbers are zero filled |
127 |
* |
128 |
* Call with: |
129 |
* *metaEntry |
130 |
* *propEntry |
131 |
* |
132 |
* Returns: |
133 |
* malloc's a new string. Caller must call free(). |
134 |
* |
135 |
* |
136 |
********************************************************************/ |
137 |
|
138 |
char *DecodeDocProperty( struct metaEntry *meta_entry, propEntry *prop ) |
139 |
{ |
140 |
char *s; |
141 |
unsigned long i; |
142 |
|
143 |
if ( is_meta_string(meta_entry) ) /* check for ascii/string data */ |
144 |
return (char *)bin2string(prop->propValue,prop->propLen); |
145 |
|
146 |
|
147 |
if ( is_meta_date(meta_entry) ) |
148 |
{ |
149 |
s=emalloc(20); |
150 |
i = *(unsigned long *) prop->propValue; /* read binary */ |
151 |
i = UNPACKLONG(i); /* Convert the portable number */ |
152 |
strftime(s,20,"%Y-%m-%d %H:%M:%S",(struct tm *)localtime((time_t *)&i)); |
153 |
return s; |
154 |
} |
155 |
|
156 |
|
157 |
|
158 |
if ( is_meta_number(meta_entry) ) |
159 |
{ |
160 |
s=emalloc(14); |
161 |
i=*(unsigned long *)prop->propValue; /* read binary */ |
162 |
i = UNPACKLONG(i); /* Convert the portable number */ |
163 |
sprintf(s,"%.013lu",i); |
164 |
return s; |
165 |
} |
166 |
|
167 |
progwarn("Invalid property type for property '%s'\n", meta_entry->metaName ); |
168 |
return estrdup(""); |
169 |
} |
170 |
|
171 |
/******************************************************************* |
172 |
* Returns a property (really the head of the list) |
173 |
* for the specified property |
174 |
* |
175 |
* Call with: |
176 |
* *RESULT |
177 |
* *metaEntry - pointer to related meta entry |
178 |
* metaID - OR, if metaEntry is NULL uses this to lookup metaEntry |
179 |
* |
180 |
* Returns: |
181 |
* *propEntry |
182 |
* |
183 |
* Warning: |
184 |
* Only returns first property in list (which is the last property added) |
185 |
* |
186 |
* Notes: |
187 |
* with PROPFILE, caller is expected to destroy the property |
188 |
* |
189 |
* |
190 |
********************************************************************/ |
191 |
|
192 |
static propEntry *getDocProperty( SWISH *sw, RESULT *result, struct metaEntry **meta_entry, int metaID ) |
193 |
{ |
194 |
IndexFILE *indexf = result->indexf; |
195 |
int error_flag; |
196 |
unsigned long num; |
197 |
|
198 |
|
199 |
/* Grab the meta structure for this ID, unless one was passed in */ |
200 |
|
201 |
if ( *meta_entry ) |
202 |
metaID = (*meta_entry)->metaID; |
203 |
|
204 |
else if ( !(*meta_entry = getPropNameByID(&indexf->header, metaID )) ) |
205 |
return NULL; |
206 |
|
207 |
|
208 |
/* This is a memory leak if not using PROPFILE */ |
209 |
|
210 |
/* Some properties are generated during a search */ |
211 |
if ( is_meta_internal( *meta_entry ) ) |
212 |
{ |
213 |
if ( is_meta_entry( *meta_entry, AUTOPROPERTY_RESULT_RANK ) ) |
214 |
{ |
215 |
num = PACKLONG( (unsigned long)result->rank ); |
216 |
return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag ); |
217 |
} |
218 |
|
219 |
if ( is_meta_entry( *meta_entry, AUTOPROPERTY_REC_COUNT ) ) |
220 |
{ |
221 |
num = PACKLONG( (unsigned long)result->count ); |
222 |
return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag ); |
223 |
} |
224 |
|
225 |
if ( is_meta_entry( *meta_entry, AUTOPROPERTY_FILENUM ) ) |
226 |
{ |
227 |
num = PACKLONG( (unsigned long)result->filenum ); |
228 |
return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag ); |
229 |
} |
230 |
|
231 |
|
232 |
if ( is_meta_entry( *meta_entry, AUTOPROPERTY_INDEXFILE ) ) |
233 |
return CreateProperty( *meta_entry, (unsigned char *)result->indexf->line, strlen( result->indexf->line ), 0, &error_flag ); |
234 |
} |
235 |
|
236 |
|
237 |
return ReadSingleDocPropertiesFromDisk(sw, indexf, &result->fi, metaID, 0 ); |
238 |
} |
239 |
|
240 |
|
241 |
/******************************************************************* |
242 |
* Returns a string for the property ID supplied |
243 |
* Numbers are zero filled |
244 |
* |
245 |
* Call with: |
246 |
* *RESULT |
247 |
* metaID |
248 |
* |
249 |
* Returns: |
250 |
* malloc's a new string. Caller must call free(). |
251 |
* |
252 |
* Bugs: |
253 |
* Only returns first property in list (which is the last property) |
254 |
* |
255 |
* |
256 |
********************************************************************/ |
257 |
|
258 |
char *getResultPropAsString(SWISH *sw, RESULT *result, int ID) |
259 |
{ |
260 |
char *s = NULL; |
261 |
propEntry *prop; |
262 |
struct metaEntry *meta_entry = NULL; |
263 |
|
264 |
|
265 |
if( !result ) |
266 |
return estrdup(""); // when would this happen? |
267 |
|
268 |
|
269 |
|
270 |
if ( !(prop = getDocProperty(sw, result, &meta_entry, ID )) ) |
271 |
return estrdup(""); |
272 |
|
273 |
/* $$$ Ignores possible other properties that are linked to this one */ |
274 |
s = DecodeDocProperty( meta_entry, prop ); |
275 |
|
276 |
freeProperty( prop ); |
277 |
|
278 |
return s; |
279 |
} |
280 |
|
281 |
/******************************************************************* |
282 |
* SwishResultPropertyStr - Returns a string for the property *name* supplied |
283 |
* Numbers are zero filled |
284 |
* |
285 |
* ** Library interface call ** |
286 |
* |
287 |
* Call with: |
288 |
* *sw |
289 |
* *RESULT |
290 |
* char * property name |
291 |
* |
292 |
* Returns: |
293 |
* A string -- caller does not need to free as the strings are |
294 |
* cleaned up on every call |
295 |
* |
296 |
* |
297 |
********************************************************************/ |
298 |
|
299 |
char *SwishResultPropertyStr(SWISH *sw, RESULT *result, char *pname) |
300 |
{ |
301 |
char *s = NULL; |
302 |
propEntry *prop; |
303 |
struct metaEntry *meta_entry = NULL; |
304 |
IndexFILE *indexf; |
305 |
|
306 |
if( !result ) |
307 |
{ |
308 |
sw->lasterror = SWISH_LISTRESULTS_EOF; |
309 |
return ""; // when would this happen? |
310 |
} |
311 |
|
312 |
|
313 |
indexf = result->indexf; |
314 |
|
315 |
|
316 |
/* Ok property name? */ |
317 |
|
318 |
if ( !(meta_entry = getPropNameByName( &indexf->header, pname )) ) |
319 |
{ |
320 |
set_progerr(UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY, sw, "Invalid property name '%s'", pname ); |
321 |
return "(null)"; |
322 |
} |
323 |
|
324 |
|
325 |
|
326 |
|
327 |
/* Does this results have this property? */ |
328 |
|
329 |
if ( !(prop = getDocProperty(sw, result, &meta_entry, 0 )) ) |
330 |
return ""; |
331 |
|
332 |
s = DecodeDocProperty( meta_entry, prop ); |
333 |
|
334 |
freeProperty( prop ); |
335 |
|
336 |
if ( !*s ) |
337 |
{ |
338 |
efree( s ); |
339 |
return ""; |
340 |
} |
341 |
|
342 |
/* create a place to store the strings */ |
343 |
|
344 |
if ( ! indexf->prop_string_cache ) |
345 |
{ |
346 |
indexf->prop_string_cache = (char **)emalloc( indexf->header.metaCounter * sizeof( char *) ); |
347 |
memset( indexf->prop_string_cache, 0, indexf->header.metaCounter * sizeof( char *) ); |
348 |
} |
349 |
|
350 |
/* Free previous, if needed -- note the metaIDs start at one */ |
351 |
|
352 |
if ( indexf->prop_string_cache[ meta_entry->metaID-1 ] ) |
353 |
efree( indexf->prop_string_cache[ meta_entry->metaID-1 ] ); |
354 |
|
355 |
indexf->prop_string_cache[ meta_entry->metaID-1 ] = s; |
356 |
return s; |
357 |
} |
358 |
|
359 |
|
360 |
/******************************************************************* |
361 |
* SwishResultPropertyULong - Returns an unsigned long for the property *name* supplied |
362 |
* |
363 |
* ** Library interface call ** |
364 |
* |
365 |
* Call with: |
366 |
* *sw |
367 |
* *RESULT |
368 |
* char * property name |
369 |
* |
370 |
* Returns: |
371 |
* unsigned long |
372 |
* ULONG_MAX on error |
373 |
* |
374 |
* |
375 |
********************************************************************/ |
376 |
|
377 |
unsigned long SwishResultPropertyULong(SWISH *sw, RESULT *result, char *pname) |
378 |
{ |
379 |
struct metaEntry *meta_entry = NULL; |
380 |
IndexFILE *indexf; |
381 |
PropValue *pv; |
382 |
unsigned long value; |
383 |
|
384 |
if( !result ) |
385 |
{ |
386 |
sw->lasterror = SWISH_LISTRESULTS_EOF; |
387 |
return ULONG_MAX; |
388 |
} |
389 |
|
390 |
|
391 |
indexf = result->indexf; |
392 |
|
393 |
|
394 |
/* Ok property name? */ |
395 |
|
396 |
if ( !(meta_entry = getPropNameByName( &indexf->header, pname )) ) |
397 |
{ |
398 |
set_progerr(UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY, sw, "Invalid property name '%s'", pname ); |
399 |
return ULONG_MAX; |
400 |
} |
401 |
|
402 |
|
403 |
/* make sure it's a numeric prop */ |
404 |
if ( !is_meta_number(meta_entry) && !is_meta_date(meta_entry) ) |
405 |
{ |
406 |
set_progerr(INVALID_PROPERTY_TYPE, sw, "Property '%s' is not numeric", pname ); |
407 |
return ULONG_MAX; |
408 |
} |
409 |
|
410 |
pv = getResultPropValue (sw, result, pname, 0 ); |
411 |
|
412 |
value = pv->value.v_ulong; |
413 |
|
414 |
efree( pv ); |
415 |
|
416 |
return value; |
417 |
} |
418 |
|
419 |
|
420 |
|
421 |
/******************************************************************* |
422 |
* Returns a property as a *propValue, which is a union of different |
423 |
* data types, with a flag to indicate the type |
424 |
* Can be called with either a metaname, or a metaID. |
425 |
* |
426 |
* Call with: |
427 |
* *SWISH |
428 |
* *RESULT |
429 |
* *metaName -- String name of meta entry |
430 |
* metaID -- OR - meta ID number |
431 |
* |
432 |
* Returns: |
433 |
* pointer to a propValue structure if found -- caller MUST free |
434 |
* Returns NULL if propertyName doesn't exist. |
435 |
* |
436 |
* Note: |
437 |
* Feb 13, 2002 - now defined properties that just don't exist |
438 |
* for the document return a blank *string* even for numeric |
439 |
* and date properties. This it to prevent "(NULL)" from displaying. |
440 |
* They used to return NULL, but since currently only result_output.c |
441 |
* uses this function, it's not a problem. |
442 |
* |
443 |
* |
444 |
********************************************************************/ |
445 |
|
446 |
PropValue *getResultPropValue (SWISH *sw, RESULT *r, char *pname, int ID ) |
447 |
{ |
448 |
PropValue *pv; |
449 |
struct metaEntry *meta_entry = NULL; |
450 |
propEntry *prop; |
451 |
|
452 |
|
453 |
/* Lookup by property name, if supplied */ |
454 |
if ( pname ) |
455 |
if ( !(meta_entry = getPropNameByName( &r->indexf->header, pname )) ) |
456 |
return NULL; |
457 |
|
458 |
|
459 |
/* create a propvalue to return to caller */ |
460 |
pv = (PropValue *) emalloc (sizeof (PropValue)); |
461 |
pv->datatype = PROP_UNDEFINED; |
462 |
pv->destroy = 0; |
463 |
|
464 |
|
465 |
|
466 |
/* This may return false */ |
467 |
prop = getDocProperty( sw, r, &meta_entry, ID ); |
468 |
|
469 |
if ( !prop ) |
470 |
{ |
471 |
pv->datatype = PROP_STRING; |
472 |
pv->value.v_str = ""; |
473 |
return pv; |
474 |
} |
475 |
|
476 |
|
477 |
if ( is_meta_string(meta_entry) ) /* check for ascii/string data */ |
478 |
{ |
479 |
pv->datatype = PROP_STRING; |
480 |
pv->destroy++; // caller must free this |
481 |
pv->value.v_str = (char *)bin2string(prop->propValue,prop->propLen); |
482 |
freeProperty( prop ); |
483 |
return pv; |
484 |
} |
485 |
|
486 |
|
487 |
/* dates and numbers should return null to tell apart from zero */ |
488 |
/* This is a slight problem with display, as blank properties show "(NULL)" */ |
489 |
/* but is needed since other parts of swish (like sorting) need to see NULL. */ |
490 |
|
491 |
/**************** |
492 |
if ( !prop ) |
493 |
{ |
494 |
efree( pv ); |
495 |
return NULL; |
496 |
} |
497 |
****************/ |
498 |
|
499 |
|
500 |
if ( is_meta_number(meta_entry) ) |
501 |
{ |
502 |
unsigned int i; |
503 |
i = *(unsigned int *) prop->propValue; /* read binary */ |
504 |
i = UNPACKLONG(i); /* Convert the portable number */ |
505 |
pv->datatype = PROP_ULONG; |
506 |
pv->value.v_ulong = (long)i; |
507 |
freeProperty( prop ); |
508 |
return pv; |
509 |
} |
510 |
|
511 |
|
512 |
if ( is_meta_date(meta_entry) ) |
513 |
{ |
514 |
unsigned long i; |
515 |
i = *(unsigned long *) prop->propValue; /* read binary */ |
516 |
i = UNPACKLONG(i); /* Convert the portable number */ |
517 |
pv->datatype = PROP_DATE; |
518 |
pv->value.v_date = (time_t)i; |
519 |
freeProperty( prop ); |
520 |
return pv; |
521 |
} |
522 |
|
523 |
freeProperty( prop ); |
524 |
|
525 |
|
526 |
|
527 |
if (pv->datatype == PROP_UNDEFINED) { /* nothing found */ |
528 |
efree (pv); |
529 |
pv = NULL; |
530 |
} |
531 |
|
532 |
return pv; |
533 |
} |
534 |
|
535 |
/******************************************************************* |
536 |
* Destroys a "pv" returned from getResultPropValue |
537 |
* |
538 |
* |
539 |
********************************************************************/ |
540 |
void freeResultPropValue(PropValue *pv) |
541 |
{ |
542 |
if ( !pv ) return; |
543 |
|
544 |
if ( pv->datatype == PROP_STRING && pv->destroy ) |
545 |
efree( pv->value.v_str ); |
546 |
|
547 |
efree(pv); |
548 |
} |
549 |
|
550 |
|
551 |
/******************************************************************* |
552 |
* Displays the "old" style properties for -p |
553 |
* |
554 |
* Call with: |
555 |
* *RESULT |
556 |
* |
557 |
* I think this could be done in result_output.c by creating a standard |
558 |
* -x format (plus properites) for use when there isn't one already. |
559 |
* |
560 |
* |
561 |
********************************************************************/ |
562 |
void printStandardResultProperties(SWISH *sw, FILE *f, RESULT *r) |
563 |
{ |
564 |
int i; |
565 |
struct MOD_Search *srch = sw->Search; |
566 |
char *s; |
567 |
char *propValue; |
568 |
int *metaIDs; |
569 |
|
570 |
metaIDs = r->indexf->propIDToDisplay; |
571 |
|
572 |
if (srch->numPropertiesToDisplay == 0) |
573 |
return; |
574 |
|
575 |
for ( i = 0; i < srch->numPropertiesToDisplay; i++ ) |
576 |
{ |
577 |
propValue = s = getResultPropAsString( sw, r, metaIDs[ i ] ); |
578 |
|
579 |
if (sw->ResultOutput->stdResultFieldDelimiter) |
580 |
fprintf(f, "%s", sw->ResultOutput->stdResultFieldDelimiter); |
581 |
else |
582 |
fprintf(f, " \""); /* default is to quote the string, with leading space */ |
583 |
|
584 |
/* print value, handling newlines and quotes */ |
585 |
while (*propValue) |
586 |
{ |
587 |
if (*propValue == '\n') |
588 |
fprintf(f, " "); |
589 |
|
590 |
else if (*propValue == '\"') /* should not happen */ |
591 |
fprintf(f,"""); |
592 |
|
593 |
else |
594 |
fprintf(f,"%c", *propValue); |
595 |
|
596 |
propValue++; |
597 |
} |
598 |
|
599 |
//fprintf(f,"%s", propValue); |
600 |
|
601 |
if (!sw->ResultOutput->stdResultFieldDelimiter) |
602 |
fprintf(f,"\""); /* default is to quote the string */ |
603 |
|
604 |
efree( s ); |
605 |
} |
606 |
} |
607 |
|
608 |
|
609 |
/******************************************************************* |
610 |
* Converts a string into a string for saving as a property |
611 |
* Which means will either return a duplicated string, |
612 |
* or a packed unsigned long. |
613 |
* |
614 |
* Call with: |
615 |
* *metaEntry |
616 |
* **encodedStr (destination) |
617 |
* *string |
618 |
* *error_flag - integer to indicate the difference between an error and a blank property |
619 |
* |
620 |
* Returns: |
621 |
* malloc's a new string, stored in **encodedStr. Caller must call free(). |
622 |
* length of encoded string, or zero if an error |
623 |
* (zero length strings are not for encoding anyway, I guess) |
624 |
* |
625 |
* QUESTION: ??? |
626 |
* should this return a *docproperty instead? |
627 |
* numbers are unsigned longs. What if someone |
628 |
* wanted to store signed numbers? |
629 |
* |
630 |
* ToDO: |
631 |
* What about convert entities here? |
632 |
* |
633 |
********************************************************************/ |
634 |
static int EncodeProperty( struct metaEntry *meta_entry, char **encodedStr, char *propstring, int *error_flag ) |
635 |
{ |
636 |
unsigned long int num; |
637 |
char *newstr; |
638 |
char *badchar; |
639 |
char *tmpnum; |
640 |
char *string; |
641 |
|
642 |
|
643 |
string = propstring; |
644 |
|
645 |
*error_flag = 0; |
646 |
|
647 |
/* skip leading white space */ |
648 |
while ( isspace( (int)*string )) |
649 |
string++; |
650 |
|
651 |
if ( !string || !*string ) |
652 |
{ |
653 |
// progwarn("Null string passed to EncodeProperty for meta '%s'", meta_entry->metaName); |
654 |
#ifdef BLANK_PROP_VALUE |
655 |
string = BLANK_PROP_VALUE; // gets dup'ed below |
656 |
#else |
657 |
return 0; |
658 |
#endif |
659 |
} |
660 |
|
661 |
|
662 |
/* make a working copy */ |
663 |
string = estrdup( string ); |
664 |
|
665 |
/* remove trailing white space */ |
666 |
{ |
667 |
int i = strlen( string ); |
668 |
|
669 |
while ( i && isspace( (int)string[i-1]) ) |
670 |
string[--i] = '\0'; |
671 |
} |
672 |
|
673 |
|
674 |
if (is_meta_number( meta_entry ) || is_meta_date( meta_entry )) |
675 |
{ |
676 |
int j; |
677 |
|
678 |
newstr = emalloc( sizeof( num ) + 1 ); |
679 |
num = strtoul( string, &badchar, 10 ); // would base zero be more flexible? |
680 |
|
681 |
if ( num == ULONG_MAX ) |
682 |
{ |
683 |
progwarnno("EncodeProperty - Attempted to convert '%s' to a number", string ); |
684 |
efree(string); |
685 |
(*error_flag)++; |
686 |
return 0; |
687 |
} |
688 |
|
689 |
if ( *badchar ) // I think this is how it works... |
690 |
{ |
691 |
progwarn("EncodeProperty - Invalid char '%c' found in string '%s'", badchar[0], string); |
692 |
efree(string); |
693 |
(*error_flag)++; |
694 |
return 0; |
695 |
} |
696 |
/* I'll bet there's an easier way */ |
697 |
num = PACKLONG(num); |
698 |
tmpnum = (char *)# |
699 |
|
700 |
for ( j=0; j <= (int)sizeof(num)-1; j++ ) |
701 |
newstr[j] = (unsigned char)tmpnum[j]; |
702 |
|
703 |
newstr[ sizeof(num) ] = '\0'; |
704 |
|
705 |
*encodedStr = newstr; |
706 |
|
707 |
efree(string); |
708 |
|
709 |
return (int)sizeof(num); |
710 |
} |
711 |
|
712 |
|
713 |
if ( is_meta_string(meta_entry) ) |
714 |
{ |
715 |
/* replace all non-printing chars with a space -- this is questionable */ |
716 |
// yep, sure is questionable -- isprint() kills 8859-1 chars. |
717 |
|
718 |
char *source, *dest; |
719 |
dest = string; |
720 |
for( source = string; *source; source++ ) |
721 |
{ |
722 |
if ( (int)((unsigned char)*source) <= (int)' ' ) |
723 |
{ |
724 |
if ( dest > string && *(dest - 1) != ' ' ) |
725 |
{ |
726 |
*dest = ' '; |
727 |
dest++; |
728 |
} |
729 |
continue; |
730 |
} |
731 |
|
732 |
*dest = *source; |
733 |
dest++; |
734 |
} |
735 |
*dest = '\0'; |
736 |
|
737 |
*encodedStr = string; |
738 |
return (int)strlen( string ); |
739 |
} |
740 |
|
741 |
|
742 |
progwarn("EncodeProperty called but doesn't know the property type :("); |
743 |
return 0; |
744 |
} |
745 |
|
746 |
/******************************************************************* |
747 |
* Creates a document property |
748 |
* |
749 |
* Call with: |
750 |
* *metaEntry |
751 |
* *propValue - string to add |
752 |
* *propLen - length of string to add, but can be limited by metaEntry->max_size |
753 |
* preEncoded - flag saying the data is already encoded |
754 |
* (that's for filesize, last modified, start position) |
755 |
* *error_flag - integer to indicate the difference between an error and a blank property |
756 |
* |
757 |
* Returns: |
758 |
* pointer to a newly created document property |
759 |
* NULL indicates property could not be created |
760 |
* |
761 |
* |
762 |
********************************************************************/ |
763 |
propEntry *CreateProperty(struct metaEntry *meta_entry, unsigned char *propValue, int propLen, int preEncoded, int *error_flag ) |
764 |
{ |
765 |
propEntry *docProp; |
766 |
|
767 |
|
768 |
/* limit length */ |
769 |
if ( !preEncoded && meta_entry->max_len && propLen > meta_entry->max_len ) |
770 |
propLen = meta_entry->max_len; |
771 |
|
772 |
/* convert string to a document property, if not already encoded */ |
773 |
if ( !preEncoded ) |
774 |
{ |
775 |
char *tmp; |
776 |
|
777 |
propLen = EncodeProperty( meta_entry, &tmp, (char *)propValue, error_flag ); |
778 |
|
779 |
if ( !propLen ) /* Error detected in encode */ |
780 |
return NULL; |
781 |
|
782 |
/* Limit length */ |
783 |
if ( is_meta_string(meta_entry) && meta_entry->max_len && propLen > meta_entry->max_len ) |
784 |
propLen = meta_entry->max_len; |
785 |
|
786 |
propValue = (unsigned char *)tmp; |
787 |
} |
788 |
|
789 |
/* Now create the property $$ could be -1 */ |
790 |
docProp=(propEntry *) emalloc(sizeof(propEntry) + propLen); |
791 |
|
792 |
memcpy(docProp->propValue, propValue, propLen); |
793 |
docProp->propLen = propLen; |
794 |
|
795 |
|
796 |
/* EncodeProperty creates a new string */ |
797 |
if ( !preEncoded ) |
798 |
efree( propValue ); |
799 |
|
800 |
return docProp; |
801 |
} |
802 |
|
803 |
/******************************************************************* |
804 |
* Appends a string onto a current property |
805 |
* |
806 |
* Call with: |
807 |
* *propEntry |
808 |
* *string |
809 |
* length of string |
810 |
* |
811 |
* Will limit property length, if needed. |
812 |
* |
813 |
*******************************************************************/ |
814 |
propEntry *append_property( struct metaEntry *meta_entry, propEntry *p, char *txt, int length ) |
815 |
{ |
816 |
int newlen; |
817 |
int add_a_space = 0; |
818 |
char *str = NULL; |
819 |
int error_flag = 0; |
820 |
|
821 |
length = EncodeProperty( meta_entry, &str, txt, &error_flag ); |
822 |
|
823 |
if ( !length ) |
824 |
return p; |
825 |
|
826 |
/* When appending, we separate by a space -- could be a config setting */ |
827 |
if ( !isspace( (int)*str ) && !isspace( (int)p->propValue[p->propLen-1] ) ) |
828 |
add_a_space++; |
829 |
|
830 |
|
831 |
/* Any room to add the property? */ |
832 |
if ( meta_entry->max_len && p->propLen + add_a_space >= meta_entry->max_len ) |
833 |
{ |
834 |
if ( str ) |
835 |
efree( str ); |
836 |
|
837 |
return p; |
838 |
} |
839 |
|
840 |
|
841 |
newlen = p->propLen + length + add_a_space; |
842 |
|
843 |
/* limit length */ |
844 |
if ( meta_entry->max_len && newlen >= meta_entry->max_len ) |
845 |
{ |
846 |
newlen = meta_entry->max_len; |
847 |
length = meta_entry->max_len - p->propLen - add_a_space; |
848 |
} |
849 |
|
850 |
|
851 |
/* Now reallocate the property */ |
852 |
p = (propEntry *) erealloc(p, sizeof(propEntry) + newlen); |
853 |
|
854 |
if ( add_a_space ) |
855 |
p->propValue[p->propLen++] = ' '; |
856 |
|
857 |
memcpy( (void *)&(p->propValue[p->propLen]), str, length ); |
858 |
p->propLen = newlen; |
859 |
|
860 |
if (str) |
861 |
efree(str); |
862 |
|
863 |
return p; |
864 |
} |
865 |
|
866 |
|
867 |
/******************************************************************* |
868 |
* Scans the properties (metaEntry's), and adds a doc property to any that are flagged |
869 |
* Limits size, if needed (for StoreDescription) |
870 |
* Pass in text properties (not pre-encoded binary properties) |
871 |
* |
872 |
* Call with: |
873 |
* *INDEXDATAHEADER (to get to the list of metanames) |
874 |
* **docProperties - pointer to list of properties |
875 |
* *propValue - string to add |
876 |
* *propLen - length of string to add |
877 |
* |
878 |
* Returns: |
879 |
* void, but will warn on failed properties |
880 |
* |
881 |
* |
882 |
********************************************************************/ |
883 |
void addDocProperties( INDEXDATAHEADER *header, docProperties **docProperties, unsigned char *propValue, int propLen, char *filename ) |
884 |
{ |
885 |
struct metaEntry *m; |
886 |
int i; |
887 |
|
888 |
for ( i = 0; i < header->metaCounter; i++) |
889 |
{ |
890 |
m = header->metaEntryArray[i]; |
891 |
|
892 |
if ( (m->metaType & META_PROP) && m->in_tag ) |
893 |
if ( !addDocProperty( docProperties, m, propValue, propLen, 0 ) ) |
894 |
progwarn("Failed to add property '%s' in file '%s'", m->metaName, filename ); |
895 |
} |
896 |
} |
897 |
|
898 |
|
899 |
|
900 |
|
901 |
/******************************************************************* |
902 |
* Adds a document property to the list of properties. |
903 |
* Creates or extends the list, as necessary |
904 |
* |
905 |
* Call with: |
906 |
* **docProperties - pointer to list of properties |
907 |
* *metaEntry |
908 |
* *propValue - string to add |
909 |
* *propLen - length of string to add |
910 |
* preEncoded - flag saying the data is already encoded |
911 |
* (that's for filesize, last modified, start position) |
912 |
* |
913 |
* Returns: |
914 |
* true if added property |
915 |
* sets address of **docProperties, if list changes size |
916 |
* |
917 |
* |
918 |
********************************************************************/ |
919 |
|
920 |
int addDocProperty( docProperties **docProperties, struct metaEntry *meta_entry, unsigned char *propValue, int propLen, int preEncoded ) |
921 |
{ |
922 |
struct docProperties *dp = *docProperties; |
923 |
propEntry *docProp; |
924 |
int i; |
925 |
int error_flag; |
926 |
|
927 |
|
928 |
/* Allocate or extend the property array, if needed */ |
929 |
|
930 |
if( !dp ) |
931 |
{ |
932 |
dp = (struct docProperties *) emalloc(sizeof(struct docProperties) + (meta_entry->metaID + 1) * sizeof(propEntry *)); |
933 |
*docProperties = dp; |
934 |
|
935 |
dp->n = meta_entry->metaID + 1; |
936 |
|
937 |
for( i = 0; i < dp->n; i++ ) |
938 |
dp->propEntry[i] = NULL; |
939 |
} |
940 |
|
941 |
else /* reallocate if needed */ |
942 |
{ |
943 |
if( dp->n <= meta_entry->metaID ) |
944 |
{ |
945 |
dp = (struct docProperties *) erealloc(dp,sizeof(struct docProperties) + (meta_entry->metaID + 1) * sizeof(propEntry *)); |
946 |
|
947 |
*docProperties = dp; |
948 |
for( i = dp->n; i <= meta_entry->metaID; i++ ) |
949 |
dp->propEntry[i] = NULL; |
950 |
|
951 |
dp->n = meta_entry->metaID + 1; |
952 |
} |
953 |
} |
954 |
|
955 |
/* Un-encoded STRINGS get appended to existing properties */ |
956 |
/* Others generate a warning */ |
957 |
if ( dp->propEntry[meta_entry->metaID] ) |
958 |
{ |
959 |
if ( is_meta_string(meta_entry) ) |
960 |
{ |
961 |
dp->propEntry[meta_entry->metaID] = append_property( meta_entry, dp->propEntry[meta_entry->metaID], (char *)propValue, propLen ); |
962 |
return 1; |
963 |
} |
964 |
else // Will this come back and bite me? |
965 |
{ |
966 |
progwarn("Warning: Attempt to add duplicate property." ); |
967 |
return 0; |
968 |
} |
969 |
} |
970 |
|
971 |
|
972 |
/* create the document property */ |
973 |
/* Ignore some errors */ |
974 |
|
975 |
if ( !(docProp = CreateProperty( meta_entry, propValue, propLen, preEncoded, &error_flag )) ) |
976 |
return error_flag ? 0 : 1; |
977 |
|
978 |
dp->propEntry[meta_entry->metaID] = docProp; |
979 |
|
980 |
return 1; |
981 |
} |
982 |
|
983 |
// #define DEBUGPROP 1 |
984 |
#ifdef DEBUGPROP |
985 |
static int insidecompare = 0; |
986 |
#endif |
987 |
|
988 |
/******************************************************************* |
989 |
* Compares two properties for sorting |
990 |
* |
991 |
* Call with: |
992 |
* *metaEntry |
993 |
* *docPropertyEntry1 |
994 |
* *docPropertyEntry2 |
995 |
* |
996 |
* Returns: |
997 |
* 0 - two properties are the same |
998 |
* -1 - docPropertyEntry1 < docPropertyEntry2 |
999 |
* +1 - docPropertyEntry1 > docPropertyEntry2 |
1000 |
* |
1001 |
* |
1002 |
********************************************************************/ |
1003 |
int Compare_Properties( struct metaEntry *meta_entry, propEntry *p1, propEntry *p2 ) |
1004 |
{ |
1005 |
|
1006 |
|
1007 |
#ifdef DEBUGPROP |
1008 |
if ( !insidecompare++ ) |
1009 |
{ |
1010 |
printf("comparing properties for meta %s: returning: %d\n", meta_entry->metaName, Compare_Properties( meta_entry, p1, p2) ); |
1011 |
dump_single_property( p1, meta_entry ); |
1012 |
dump_single_property( p2, meta_entry ); |
1013 |
insidecompare = 0; |
1014 |
} |
1015 |
#endif |
1016 |
|
1017 |
|
1018 |
if ( !p1 && p2 ) |
1019 |
return -1; |
1020 |
|
1021 |
|
1022 |
if ( !p1 && !p2 ) |
1023 |
return 0; |
1024 |
|
1025 |
if ( p1 && !p2 ) |
1026 |
return +1; |
1027 |
|
1028 |
|
1029 |
if (is_meta_number( meta_entry ) || is_meta_date( meta_entry )) |
1030 |
return memcmp( (const void *)p1->propValue, (const void *)p2->propValue, p1->propLen ); |
1031 |
|
1032 |
|
1033 |
if ( is_meta_string(meta_entry) ) |
1034 |
{ |
1035 |
int rc; |
1036 |
int len = Min( p1->propLen, p2->propLen ); |
1037 |
|
1038 |
rc = is_meta_ignore_case( meta_entry) |
1039 |
? strncasecmp( (char *)p1->propValue, (char *)p2->propValue, len ) |
1040 |
: strncmp( (char *)p1->propValue, (char *)p2->propValue, len ); |
1041 |
|
1042 |
if ( rc != 0 ) |
1043 |
return rc; |
1044 |
|
1045 |
return p1->propLen - p2->propLen; |
1046 |
} |
1047 |
|
1048 |
return 0; |
1049 |
|
1050 |
} |
1051 |
|
1052 |
/******************************************************************* |
1053 |
* Duplicate a property that's already in memory and return it. |
1054 |
* |
1055 |
* Caller must destroy |
1056 |
* |
1057 |
*********************************************************************/ |
1058 |
|
1059 |
static propEntry *duplicate_in_mem_property( docProperties *props, int metaID, int max_size ) |
1060 |
{ |
1061 |
propEntry *docProp; |
1062 |
struct metaEntry meta_entry; |
1063 |
int propLen; |
1064 |
int error_flag; |
1065 |
|
1066 |
if ( metaID >= props->n ) |
1067 |
return NULL; |
1068 |
|
1069 |
if ( !(docProp = props->propEntry[ metaID ]) ) |
1070 |
return NULL; |
1071 |
|
1072 |
|
1073 |
meta_entry.metaName = "(default)"; /* for error message, I think */ |
1074 |
meta_entry.metaID = metaID; |
1075 |
|
1076 |
|
1077 |
/* Duplicate the property */ |
1078 |
propLen = docProp->propLen; |
1079 |
|
1080 |
/* Limit size,if possible - should really check if it's a string */ |
1081 |
if ( max_size && (max_size >= 8) && (max_size < propLen )) |
1082 |
propLen = max_size; |
1083 |
|
1084 |
/* Duplicate the property */ |
1085 |
return CreateProperty( &meta_entry, docProp->propValue, propLen, 1, &error_flag ); |
1086 |
} |
1087 |
|
1088 |
|
1089 |
#ifdef HAVE_ZLIB |
1090 |
|
1091 |
/******************************************************************* |
1092 |
* Allocate or reallocate the property buffer |
1093 |
* |
1094 |
* The buffer is kept around to avoid reallocating for every prop of every doc |
1095 |
* |
1096 |
* |
1097 |
* |
1098 |
*********************************************************************/ |
1099 |
|
1100 |
static unsigned char *allocatePropIOBuffer(SWISH *sw, unsigned long buf_needed ) |
1101 |
{ |
1102 |
unsigned long total_size; |
1103 |
|
1104 |
if ( !buf_needed ) |
1105 |
progerr("Asked for too small of a buffer size!"); |
1106 |
|
1107 |
|
1108 |
if ( !sw->Prop_IO_Buf || buf_needed > sw->PropIO_allocated ) |
1109 |
{ |
1110 |
/* don't reallocate because we don't need to memcpy */ |
1111 |
if ( sw->Prop_IO_Buf ) |
1112 |
efree( sw->Prop_IO_Buf ); |
1113 |
|
1114 |
|
1115 |
total_size = buf_needed > sw->PropIO_allocated + RD_BUFFER_SIZE |
1116 |
? buf_needed |
1117 |
: sw->PropIO_allocated + RD_BUFFER_SIZE; |
1118 |
|
1119 |
sw->Prop_IO_Buf = emalloc( total_size ); |
1120 |
sw->PropIO_allocated = total_size; /* keep track of structure size */ |
1121 |
} |
1122 |
|
1123 |
|
1124 |
return sw->Prop_IO_Buf; |
1125 |
} |
1126 |
|
1127 |
#endif |
1128 |
|
1129 |
/******************************************************************* |
1130 |
* Compress a Property |
1131 |
* |
1132 |
* Call with: |
1133 |
* propEntry - the in data and its length |
1134 |
* propID - current property |
1135 |
* SWISH - to get access to the common buffer |
1136 |
* *uncompress_len - returns the length of the original buffer, or zero if not compressed |
1137 |
* *buf_len - the length of the returned buffer |
1138 |
* |
1139 |
* Returns: |
1140 |
* pointer the buffer of buf_len size |
1141 |
* |
1142 |
* |
1143 |
*********************************************************************/ |
1144 |
|
1145 |
static unsigned char *compress_property( propEntry *prop, int propID, SWISH *sw, int *buf_len, int *uncompressed_len ) |
1146 |
{ |
1147 |
#ifndef HAVE_ZLIB |
1148 |
*buf_len = prop->propLen; |
1149 |
*uncompressed_len = 0; |
1150 |
return prop->propValue; |
1151 |
|
1152 |
#else |
1153 |
unsigned char *PropBuf; /* For compressing and uncompressing */ |
1154 |
int dest_size; |
1155 |
|
1156 |
|
1157 |
/* Don't bother compressing smaller items */ |
1158 |
if ( prop->propLen < MIN_PROP_COMPRESS_SIZE ) |
1159 |
{ |
1160 |
*buf_len = prop->propLen; |
1161 |
*uncompressed_len = 0; |
1162 |
return prop->propValue; |
1163 |
} |
1164 |
|
1165 |
/* Buffer should be +1% + a few bytes. */ |
1166 |
dest_size = prop->propLen + ( prop->propLen / 100 ) + 1000; // way more than should be needed |
1167 |
|
1168 |
|
1169 |
/* Get an output buffer */ |
1170 |
PropBuf = allocatePropIOBuffer( sw, dest_size ); |
1171 |
|
1172 |
|
1173 |
if ( compress2( (Bytef *)PropBuf, (uLongf *)&dest_size, prop->propValue, prop->propLen, sw->PropCompressionLevel) != Z_OK) |
1174 |
progerr("Property Compression Error"); |
1175 |
|
1176 |
|
1177 |
/* Make sure it's compressed enough */ |
1178 |
if ( dest_size >= prop->propLen ) |
1179 |
{ |
1180 |
*buf_len = prop->propLen; |
1181 |
*uncompressed_len = 0; |
1182 |
return prop->propValue; |
1183 |
} |
1184 |
|
1185 |
*buf_len = dest_size; |
1186 |
*uncompressed_len = prop->propLen; |
1187 |
|
1188 |
return PropBuf; |
1189 |
|
1190 |
#endif |
1191 |
} |
1192 |
|
1193 |
/******************************************************************* |
1194 |
* Uncompress a Property |
1195 |
* |
1196 |
* Call with: |
1197 |
* SWISH |
1198 |
* *input_buf - buffer address |
1199 |
* buf_len - size of buffer |
1200 |
* *uncompressed_size - size of original prop, or zero if not compressed. |
1201 |
* |
1202 |
* Returns: |
1203 |
* buffer address of uncompressed property |
1204 |
* uncompressed_size is set to length of buffer |
1205 |
* |
1206 |
* |
1207 |
*********************************************************************/ |
1208 |
|
1209 |
static unsigned char *uncompress_property( SWISH *sw, unsigned char *input_buf, int buf_len, int *uncompressed_size ) |
1210 |
{ |
1211 |
|
1212 |
#ifndef HAVE_ZLIB |
1213 |
|
1214 |
if ( *uncompressed_size ) |
1215 |
progerr("The index was created with zlib compression.\nThis version of swish was not compiled with zlib"); |
1216 |
|
1217 |
*uncompressed_size = buf_len; |
1218 |
return input_buf; |
1219 |
|
1220 |
#else |
1221 |
unsigned char *PropBuf; |
1222 |
|
1223 |
|
1224 |
if ( *uncompressed_size == 0 ) /* wasn't compressed */ |
1225 |
{ |
1226 |
*uncompressed_size = buf_len; |
1227 |
return input_buf; |
1228 |
} |
1229 |
|
1230 |
|
1231 |
|
1232 |
/* make sure we have enough space */ |
1233 |
|
1234 |
PropBuf = allocatePropIOBuffer( sw, *uncompressed_size ); |
1235 |
|
1236 |
|
1237 |
if ( uncompress(PropBuf, (uLongf *)uncompressed_size, input_buf, buf_len ) != Z_OK ) |
1238 |
{ |
1239 |
progwarn("Failed to uncompress Property\n"); |
1240 |
return NULL; |
1241 |
} |
1242 |
|
1243 |
|
1244 |
return PropBuf; |
1245 |
|
1246 |
|
1247 |
#endif |
1248 |
|
1249 |
} |
1250 |
|
1251 |
|
1252 |
|
1253 |
/******************************************************************* |
1254 |
* Write Properties to disk, and save seek pointers |
1255 |
* |
1256 |
* DB_WriteProperty - should write filenum:propID as the key |
1257 |
* DB_WritePropPositions - writes the stored positions |
1258 |
* |
1259 |
* |
1260 |
* |
1261 |
*********************************************************************/ |
1262 |
void WritePropertiesToDisk( SWISH *sw , FileRec *fi ) |
1263 |
{ |
1264 |
IndexFILE *indexf = sw->indexlist; |
1265 |
INDEXDATAHEADER *header = &indexf->header; |
1266 |
docProperties *docProperties = fi->docProperties; |
1267 |
propEntry *prop; |
1268 |
int uncompressed_len; |
1269 |
unsigned char *buf; |
1270 |
int buf_len; |
1271 |
int count; |
1272 |
int i; |
1273 |
|
1274 |
|
1275 |
/* initialize the first time called */ |
1276 |
if ( header->property_count == 0 ) |
1277 |
{ |
1278 |
/* Get the current seek position in the index, since will now write the file info */ |
1279 |
DB_InitWriteFiles(sw, indexf->DB); |
1280 |
|
1281 |
/* build a list of properties that are in use */ |
1282 |
/* And create the prop index to propID (metaID) mapping arrays */ |
1283 |
init_property_list(header); |
1284 |
} |
1285 |
|
1286 |
|
1287 |
if ( (count = header->property_count) <= 0) |
1288 |
return; |
1289 |
|
1290 |
|
1291 |
/* any props exist, unlikely, but need to save a space. */ |
1292 |
if ( !docProperties ) |
1293 |
{ |
1294 |
DB_WritePropPositions( sw, indexf, fi, indexf->DB); |
1295 |
return; |
1296 |
} |
1297 |
|
1298 |
|
1299 |
for( i = 0; i < count; i++ ) |
1300 |
{ |
1301 |
/* convert the count to a propID */ |
1302 |
int propID = header->propIDX_to_metaID[i]; // here's the array created in init_property_list() |
1303 |
|
1304 |
|
1305 |
/* Here's why I need to redo the properties so it's always header->property_count size in the fi rec */ |
1306 |
/* The mapping is all a temporary kludge */ |
1307 |
if ( propID >= docProperties->n ) // Does this file have this many properties? |
1308 |
continue; |
1309 |
|
1310 |
|
1311 |
if ( !(prop = docProperties->propEntry[propID])) // does this file have this prop? |
1312 |
continue; |
1313 |
|
1314 |
buf = compress_property( prop, propID, sw, &buf_len, &uncompressed_len ); |
1315 |
|
1316 |
DB_WriteProperty( sw, indexf, fi, propID, (char *)buf, buf_len, uncompressed_len, indexf->DB ); |
1317 |
} |
1318 |
|
1319 |
|
1320 |
|
1321 |
|
1322 |
/* Write the position data */ |
1323 |
DB_WritePropPositions( sw, indexf, fi, indexf->DB); |
1324 |
|
1325 |
freeDocProperties( docProperties ); |
1326 |
fi->docProperties = NULL; |
1327 |
|
1328 |
|
1329 |
|
1330 |
} |
1331 |
|
1332 |
/******************************************************************* |
1333 |
* Reads a single doc property - this is used for sorting |
1334 |
* |
1335 |
* Caller needs to destroy returned property |
1336 |
* |
1337 |
* Call with: |
1338 |
* sw - everyone needs a sw |
1339 |
* indexf - which index to read from |
1340 |
* FileRec - which contains filenum (key part 1) |
1341 |
* metaID - which prop (key part 2) |
1342 |
* max_size- to limit size of property |
1343 |
* |
1344 |
* Returns: |
1345 |
* *propEntry - caller *must* destroy |
1346 |
* |
1347 |
* |
1348 |
*********************************************************************/ |
1349 |
propEntry *ReadSingleDocPropertiesFromDisk( SWISH *sw, IndexFILE *indexf, FileRec *fi, int metaID, int max_size ) |
1350 |
{ |
1351 |
int propLen; |
1352 |
int error_flag; |
1353 |
struct metaEntry meta_entry; |
1354 |
unsigned char *buf; |
1355 |
int buf_len; /* size on disk */ |
1356 |
int uncompressed_len; /* size uncompressed */ |
1357 |
propEntry *docProp; |
1358 |
unsigned char *propbuf; |
1359 |
INDEXDATAHEADER *header = &indexf->header; |
1360 |
int count; |
1361 |
int propIDX; |
1362 |
|
1363 |
|
1364 |
/* initialize the first time called */ |
1365 |
if ( header->property_count == 0 ) |
1366 |
init_property_list(header); |
1367 |
|
1368 |
if ( (count = header->property_count) <= 0) |
1369 |
return NULL; |
1370 |
|
1371 |
|
1372 |
/* Map the propID to an index number */ |
1373 |
propIDX = header->metaID_to_PropIDX[metaID]; |
1374 |
|
1375 |
if ( propIDX < 0 ) |
1376 |
progerr("Mapped propID %d to invalid property index", metaID ); |
1377 |
|
1378 |
|
1379 |
|
1380 |
/* already loaded? -- if so, duplicate the property for the given length */ |
1381 |
/* This should only happen if ReadAllDocPropertiesFromDisk() was called, and only with db_native.c */ |
1382 |
|
1383 |
if ( fi->docProperties ) |
1384 |
return duplicate_in_mem_property( fi->docProperties, metaID, max_size ); |
1385 |
|
1386 |
|
1387 |
/* Otherwise, read from disk */ |
1388 |
|
1389 |
if ( !(buf = (unsigned char*)DB_ReadProperty( sw, indexf, fi, metaID, &buf_len, &uncompressed_len, indexf->DB ))) |
1390 |
return NULL; |
1391 |
|
1392 |
propbuf = uncompress_property( sw, buf, buf_len, &uncompressed_len ); |
1393 |
|
1394 |
propLen = uncompressed_len; /* just to be clear ;) */ |
1395 |
|
1396 |
/* Limit size,if possible - should really check if it's a string */ |
1397 |
if ( max_size && (max_size >= 8) && (max_size < propLen )) |
1398 |
propLen = max_size; |
1399 |
|
1400 |
|
1401 |
meta_entry.metaName = "(default)"; /* for error message, I think */ |
1402 |
meta_entry.metaID = metaID; |
1403 |
|
1404 |
docProp = CreateProperty( &meta_entry, propbuf, propLen, 1, &error_flag ); |
1405 |
|
1406 |
efree( buf ); |
1407 |
return docProp; |
1408 |
} |
1409 |
|
1410 |
|
1411 |
|
1412 |
/******************************************************************* |
1413 |
* Reads the doc properties from disk |
1414 |
* |
1415 |
* Maybe should return void, and just set? |
1416 |
* Or maybe should take a filenum, and instead take a position? |
1417 |
* |
1418 |
* The original idea (and the way it was written) was to use the seek |
1419 |
* position of the first property, and the total length of all properties |
1420 |
* then read all the properties in one fread call. |
1421 |
* The plan was to call it in result_output.c, so all the props would get loaded |
1422 |
* in one shot. |
1423 |
* That design probably has little effect on performance. Now we just call |
1424 |
* ReadSingleDocPropertiesFromDisk for each prop. |
1425 |
* |
1426 |
* Now, this is really just a way to populate the fi->docProperties structure. |
1427 |
* |
1428 |
* 2001-09 jmruiz Modified to be used by merge.c |
1429 |
*********************************************************************/ |
1430 |
|
1431 |
docProperties *ReadAllDocPropertiesFromDisk( SWISH *sw, IndexFILE *indexf, int filenum ) |
1432 |
{ |
1433 |
FileRec fi; |
1434 |
propEntry *new_prop; |
1435 |
int count; |
1436 |
struct metaEntry meta_entry; |
1437 |
docProperties *docProperties=NULL; |
1438 |
INDEXDATAHEADER *header = &indexf->header; |
1439 |
int propIDX; |
1440 |
|
1441 |
|
1442 |
|
1443 |
/* Get a place to cache the pointers */ |
1444 |
memset(&fi,0, sizeof( FileRec )); |
1445 |
fi.filenum = filenum; |
1446 |
|
1447 |
|
1448 |
meta_entry.metaName = "(default)"; /* for error message, I think */ |
1449 |
|
1450 |
|
1451 |
/* initialize the first time called */ |
1452 |
if ( header->property_count == 0 ) |
1453 |
init_property_list(header); |
1454 |
|
1455 |
if ( (count = header->property_count) <= 0) |
1456 |
return NULL; |
1457 |
|
1458 |
|
1459 |
for ( propIDX = 0; propIDX < count; propIDX++ ) |
1460 |
{ |
1461 |
meta_entry.metaID = header->propIDX_to_metaID[propIDX]; |
1462 |
|
1463 |
new_prop = ReadSingleDocPropertiesFromDisk( sw, indexf, &fi, meta_entry.metaID, 0); |
1464 |
|
1465 |
if ( !new_prop ) |
1466 |
continue; |
1467 |
|
1468 |
// would be better if we didn't need to create a new property just to free one |
1469 |
// this routine is currently only used by merge and dump.c |
1470 |
|
1471 |
addDocProperty(&docProperties, &meta_entry, new_prop->propValue, new_prop->propLen, 1 ); |
1472 |
|
1473 |
efree( new_prop ); |
1474 |
} |
1475 |
|
1476 |
/* Free the prop seek location cache */ |
1477 |
if ( fi.prop_index ) |
1478 |
efree( fi.prop_index ); |
1479 |
|
1480 |
return docProperties; |
1481 |
} |
1482 |
|
1483 |
|
1484 |
|
1485 |
|
1486 |
|
1487 |
|
1488 |
void addSearchResultDisplayProperty(SWISH *sw, char *propName) |
1489 |
{ |
1490 |
struct MOD_Search *srch = sw->Search; |
1491 |
|
1492 |
/* add a property to the list of properties that will be displayed */ |
1493 |
if (srch->numPropertiesToDisplay >= srch->currentMaxPropertiesToDisplay) |
1494 |
{ |
1495 |
if(srch->currentMaxPropertiesToDisplay) { |
1496 |
srch->currentMaxPropertiesToDisplay+=2; |
1497 |
srch->propNameToDisplay=(char **)erealloc(srch->propNameToDisplay,srch->currentMaxPropertiesToDisplay*sizeof(char *)); |
1498 |
} else { |
1499 |
srch->currentMaxPropertiesToDisplay=5; |
1500 |
srch->propNameToDisplay=(char **)emalloc(srch->currentMaxPropertiesToDisplay*sizeof(char *)); |
1501 |
} |
1502 |
} |
1503 |
srch->propNameToDisplay[srch->numPropertiesToDisplay++] = estrdup(propName); |
1504 |
} |
1505 |
|
1506 |
|
1507 |
|
1508 |
|
1509 |
|
1510 |
/* For faster proccess, get de ID of the properties to sort */ |
1511 |
int initSearchResultProperties(SWISH *sw) |
1512 |
{ |
1513 |
IndexFILE *indexf; |
1514 |
int i; |
1515 |
struct MOD_Search *srch = sw->Search; |
1516 |
struct metaEntry *meta_entry; |
1517 |
|
1518 |
|
1519 |
/* lookup selected property names */ |
1520 |
|
1521 |
if (srch->numPropertiesToDisplay == 0) |
1522 |
return RC_OK; |
1523 |
|
1524 |
for( indexf = sw->indexlist; indexf; indexf = indexf->next ) |
1525 |
indexf->propIDToDisplay=(int *) emalloc(srch->numPropertiesToDisplay*sizeof(int)); |
1526 |
|
1527 |
for (i = 0; i < srch->numPropertiesToDisplay; i++) |
1528 |
{ |
1529 |
makeItLow(srch->propNameToDisplay[i]); |
1530 |
|
1531 |
/* Get ID for each index file */ |
1532 |
for( indexf = sw->indexlist; indexf; indexf = indexf->next ) |
1533 |
{ |
1534 |
if ( !(meta_entry = getPropNameByName( &indexf->header, srch->propNameToDisplay[i]))) |
1535 |
{ |
1536 |
progerr ("Unknown Display property name \"%s\"", srch->propNameToDisplay[i]); |
1537 |
return (sw->lasterror=UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY); |
1538 |
} |
1539 |
else |
1540 |
indexf->propIDToDisplay[i] = meta_entry->metaID; |
1541 |
} |
1542 |
} |
1543 |
return RC_OK; |
1544 |
} |
1545 |
|
1546 |
|
1547 |
|
1548 |
void dump_single_property( propEntry *prop, struct metaEntry *meta_entry ) |
1549 |
{ |
1550 |
char *propstr; |
1551 |
char proptype = '?'; |
1552 |
int i; |
1553 |
|
1554 |
|
1555 |
if ( is_meta_string(meta_entry) ) |
1556 |
proptype = 'S'; |
1557 |
|
1558 |
else if ( is_meta_date(meta_entry) ) |
1559 |
proptype = 'D'; |
1560 |
|
1561 |
else if ( is_meta_number(meta_entry) ) |
1562 |
proptype = 'N'; |
1563 |
|
1564 |
|
1565 |
i = prop ? prop->propLen : 0; |
1566 |
|
1567 |
printf(" %20s:%2d (%3d) %c:", meta_entry->metaName, meta_entry->metaID, i, proptype ); |
1568 |
|
1569 |
|
1570 |
if ( !prop ) |
1571 |
{ |
1572 |
printf(" propEntry=NULL\n"); |
1573 |
return; |
1574 |
} |
1575 |
|
1576 |
propstr = DecodeDocProperty( meta_entry, prop ); |
1577 |
i = 0; |
1578 |
printf(" \""); |
1579 |
|
1580 |
while ( i < strlen( propstr ) ) |
1581 |
{ |
1582 |
if ( 1 ) // ( isprint( (int)propstr[i] )) |
1583 |
printf("%c", propstr[i] ); |
1584 |
|
1585 |
else if ( propstr[i] == '\n' ) |
1586 |
printf("\n"); |
1587 |
|
1588 |
else |
1589 |
printf(".."); |
1590 |
|
1591 |
i++; |
1592 |
if ( i > 300 ) |
1593 |
{ |
1594 |
printf(" ..."); |
1595 |
break; |
1596 |
} |
1597 |
} |
1598 |
printf("\"\n"); |
1599 |
|
1600 |
efree( propstr ); |
1601 |
} |
1602 |
|
1603 |
/*************************************************************** |
1604 |
* Dumps what's currently in the fi->docProperties structure |
1605 |
* |
1606 |
**************************************************************/ |
1607 |
|
1608 |
void dump_file_properties(IndexFILE * indexf, FileRec *fi ) |
1609 |
{ |
1610 |
int j; |
1611 |
propEntry *prop; |
1612 |
struct metaEntry *meta_entry; |
1613 |
|
1614 |
if ( !fi->docProperties ) /* may not be any properties */ |
1615 |
{ |
1616 |
printf(" (No Properties)\n"); |
1617 |
return; |
1618 |
} |
1619 |
|
1620 |
for (j = 0; j < fi->docProperties->n; j++) |
1621 |
{ |
1622 |
if ( !fi->docProperties->propEntry[j] ) |
1623 |
continue; |
1624 |
|
1625 |
meta_entry = getPropNameByID( &indexf->header, j ); |
1626 |
prop = fi->docProperties->propEntry[j]; |
1627 |
|
1628 |
dump_single_property( prop, meta_entry ); |
1629 |
} |
1630 |
} |
1631 |
|