/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/docprop.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/docprop.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Error occurred while calculating annotation data.
Importing web-site building process.

1 /*
2 $Id: docprop.c,v 1.96 2002/08/22 22:58:39 whmoseley Exp $
3 ** DocProperties.c, DocProperties.h
4 **
5 ** Functions to manage the index's Document Properties
6 **
7 ** File Created.
8 ** Mark Gaulin 11/24/98
9 **
10 ** change sprintf to snprintf to avoid corruption,
11 ** and use MAXSTRLEN from swish.h instead of literal "200"
12 ** SRE 11/17/99
13 **
14 ** 04/00 Jose ruiz
15 ** storeDocProperties and readNextDocPropEntry modified to store
16 ** the int numbers compressed. This makes integers "portable"
17 **
18 ** 04/00 Jose Ruiz
19 ** Added sorting results by property
20 **
21 ** 07/00 and 08/00 - Jose Ruiz
22 ** Many modifications to make all functions thread safe
23 **
24 ** 08/00 - Added ascending and descending capabilities in results sorting
25 **
26 ** 2001-01 rasc getResultPropertyByName rewritten, datatypes for properties.
27 ** 2001-02 rasc isAutoProperty
28 ** printSearchResultProperties changed
29 ** 2001-03-15 rasc Outputdelimiter var name changed
30 ** 2001-06-08 wsm Store propValue at end of docPropertyEntry to save memory
31 ** 2001-06-14 moseley Most of the code rewritten, and propfile added
32 **
33 ** 2001-09 jmruiz - ReadAllDocPropertiesFromDisk rewriten to be used
34 ** by merge.c
35 **
36 */
37
38 #include <limits.h> // for ULONG_MAX
39 #include "swish.h"
40 #include "string.h"
41 #include "file.h"
42 #include "hash.h"
43 #include "mem.h"
44 #include "merge.h"
45 #include "error.h"
46 #include "search.h"
47 #include "index.h"
48 #include "docprop.h"
49 #include "error.h"
50 #include "compress.h"
51 #include "metanames.h"
52 #include "result_output.h"
53 #include "result_sort.h"
54 #include "entities.h"
55 #include "db.h"
56 #ifdef HAVE_ZLIB
57 #include <zlib.h>
58 #endif
59
60
61
62
63 /*******************************************************************
64 * Free a property entry
65 *
66 ********************************************************************/
67
68 void freeProperty( propEntry *prop )
69 {
70 if ( prop )
71 efree(prop);
72 }
73
74
75
76
77 /*******************************************************************
78 * Free all properties in the docProperties structure
79 *
80 ********************************************************************/
81
82
83 void freeDocProperties(docProperties *docProperties)
84 {
85 int i;
86
87 for( i = 0; i < docProperties->n; i++ )
88 {
89 freeProperty( docProperties->propEntry[i] );
90 docProperties->propEntry[i] = NULL;
91 }
92
93 efree(docProperties);
94 docProperties = NULL;
95
96 }
97
98
99 /*******************************************************************
100 * Frees a FileRec (struct file), which just frees
101 * the properties and property index
102 * Doesn't free the FileRec itself.
103 *
104 * move here from swish.c since all FileRec really holds is property info
105 *
106 ********************************************************************/
107
108 void freefileinfo(FileRec *fi)
109 {
110
111 if ( fi->docProperties )
112 {
113 freeDocProperties( fi->docProperties );
114 fi->docProperties = NULL;
115 }
116
117 if ( fi->prop_index )
118 {
119 efree( fi->prop_index );
120 fi->prop_index = NULL;
121 }
122 }
123
124 /*******************************************************************
125 * Converts a property into a string, based on it's type.
126 * Numbers are zero filled
127 *
128 * Call with:
129 * *metaEntry
130 * *propEntry
131 *
132 * Returns:
133 * malloc's a new string. Caller must call free().
134 *
135 *
136 ********************************************************************/
137
138 char *DecodeDocProperty( struct metaEntry *meta_entry, propEntry *prop )
139 {
140 char *s;
141 unsigned long i;
142
143 if ( is_meta_string(meta_entry) ) /* check for ascii/string data */
144 return (char *)bin2string(prop->propValue,prop->propLen);
145
146
147 if ( is_meta_date(meta_entry) )
148 {
149 s=emalloc(20);
150 i = *(unsigned long *) prop->propValue; /* read binary */
151 i = UNPACKLONG(i); /* Convert the portable number */
152 strftime(s,20,"%Y-%m-%d %H:%M:%S",(struct tm *)localtime((time_t *)&i));
153 return s;
154 }
155
156
157
158 if ( is_meta_number(meta_entry) )
159 {
160 s=emalloc(14);
161 i=*(unsigned long *)prop->propValue; /* read binary */
162 i = UNPACKLONG(i); /* Convert the portable number */
163 sprintf(s,"%.013lu",i);
164 return s;
165 }
166
167 progwarn("Invalid property type for property '%s'\n", meta_entry->metaName );
168 return estrdup("");
169 }
170
171 /*******************************************************************
172 * Returns a property (really the head of the list)
173 * for the specified property
174 *
175 * Call with:
176 * *RESULT
177 * *metaEntry - pointer to related meta entry
178 * metaID - OR, if metaEntry is NULL uses this to lookup metaEntry
179 *
180 * Returns:
181 * *propEntry
182 *
183 * Warning:
184 * Only returns first property in list (which is the last property added)
185 *
186 * Notes:
187 * with PROPFILE, caller is expected to destroy the property
188 *
189 *
190 ********************************************************************/
191
192 static propEntry *getDocProperty( SWISH *sw, RESULT *result, struct metaEntry **meta_entry, int metaID )
193 {
194 IndexFILE *indexf = result->indexf;
195 int error_flag;
196 unsigned long num;
197
198
199 /* Grab the meta structure for this ID, unless one was passed in */
200
201 if ( *meta_entry )
202 metaID = (*meta_entry)->metaID;
203
204 else if ( !(*meta_entry = getPropNameByID(&indexf->header, metaID )) )
205 return NULL;
206
207
208 /* This is a memory leak if not using PROPFILE */
209
210 /* Some properties are generated during a search */
211 if ( is_meta_internal( *meta_entry ) )
212 {
213 if ( is_meta_entry( *meta_entry, AUTOPROPERTY_RESULT_RANK ) )
214 {
215 num = PACKLONG( (unsigned long)result->rank );
216 return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag );
217 }
218
219 if ( is_meta_entry( *meta_entry, AUTOPROPERTY_REC_COUNT ) )
220 {
221 num = PACKLONG( (unsigned long)result->count );
222 return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag );
223 }
224
225 if ( is_meta_entry( *meta_entry, AUTOPROPERTY_FILENUM ) )
226 {
227 num = PACKLONG( (unsigned long)result->filenum );
228 return CreateProperty( *meta_entry, (unsigned char *)&num, sizeof( num ), 1, &error_flag );
229 }
230
231
232 if ( is_meta_entry( *meta_entry, AUTOPROPERTY_INDEXFILE ) )
233 return CreateProperty( *meta_entry, (unsigned char *)result->indexf->line, strlen( result->indexf->line ), 0, &error_flag );
234 }
235
236
237 return ReadSingleDocPropertiesFromDisk(sw, indexf, &result->fi, metaID, 0 );
238 }
239
240
241 /*******************************************************************
242 * Returns a string for the property ID supplied
243 * Numbers are zero filled
244 *
245 * Call with:
246 * *RESULT
247 * metaID
248 *
249 * Returns:
250 * malloc's a new string. Caller must call free().
251 *
252 * Bugs:
253 * Only returns first property in list (which is the last property)
254 *
255 *
256 ********************************************************************/
257
258 char *getResultPropAsString(SWISH *sw, RESULT *result, int ID)
259 {
260 char *s = NULL;
261 propEntry *prop;
262 struct metaEntry *meta_entry = NULL;
263
264
265 if( !result )
266 return estrdup(""); // when would this happen?
267
268
269
270 if ( !(prop = getDocProperty(sw, result, &meta_entry, ID )) )
271 return estrdup("");
272
273 /* $$$ Ignores possible other properties that are linked to this one */
274 s = DecodeDocProperty( meta_entry, prop );
275
276 freeProperty( prop );
277
278 return s;
279 }
280
281 /*******************************************************************
282 * SwishResultPropertyStr - Returns a string for the property *name* supplied
283 * Numbers are zero filled
284 *
285 * ** Library interface call **
286 *
287 * Call with:
288 * *sw
289 * *RESULT
290 * char * property name
291 *
292 * Returns:
293 * A string -- caller does not need to free as the strings are
294 * cleaned up on every call
295 *
296 *
297 ********************************************************************/
298
299 char *SwishResultPropertyStr(SWISH *sw, RESULT *result, char *pname)
300 {
301 char *s = NULL;
302 propEntry *prop;
303 struct metaEntry *meta_entry = NULL;
304 IndexFILE *indexf;
305
306 if( !result )
307 {
308 sw->lasterror = SWISH_LISTRESULTS_EOF;
309 return ""; // when would this happen?
310 }
311
312
313 indexf = result->indexf;
314
315
316 /* Ok property name? */
317
318 if ( !(meta_entry = getPropNameByName( &indexf->header, pname )) )
319 {
320 set_progerr(UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY, sw, "Invalid property name '%s'", pname );
321 return "(null)";
322 }
323
324
325
326
327 /* Does this results have this property? */
328
329 if ( !(prop = getDocProperty(sw, result, &meta_entry, 0 )) )
330 return "";
331
332 s = DecodeDocProperty( meta_entry, prop );
333
334 freeProperty( prop );
335
336 if ( !*s )
337 {
338 efree( s );
339 return "";
340 }
341
342 /* create a place to store the strings */
343
344 if ( ! indexf->prop_string_cache )
345 {
346 indexf->prop_string_cache = (char **)emalloc( indexf->header.metaCounter * sizeof( char *) );
347 memset( indexf->prop_string_cache, 0, indexf->header.metaCounter * sizeof( char *) );
348 }
349
350 /* Free previous, if needed -- note the metaIDs start at one */
351
352 if ( indexf->prop_string_cache[ meta_entry->metaID-1 ] )
353 efree( indexf->prop_string_cache[ meta_entry->metaID-1 ] );
354
355 indexf->prop_string_cache[ meta_entry->metaID-1 ] = s;
356 return s;
357 }
358
359
360 /*******************************************************************
361 * SwishResultPropertyULong - Returns an unsigned long for the property *name* supplied
362 *
363 * ** Library interface call **
364 *
365 * Call with:
366 * *sw
367 * *RESULT
368 * char * property name
369 *
370 * Returns:
371 * unsigned long
372 * ULONG_MAX on error
373 *
374 *
375 ********************************************************************/
376
377 unsigned long SwishResultPropertyULong(SWISH *sw, RESULT *result, char *pname)
378 {
379 struct metaEntry *meta_entry = NULL;
380 IndexFILE *indexf;
381 PropValue *pv;
382 unsigned long value;
383
384 if( !result )
385 {
386 sw->lasterror = SWISH_LISTRESULTS_EOF;
387 return ULONG_MAX;
388 }
389
390
391 indexf = result->indexf;
392
393
394 /* Ok property name? */
395
396 if ( !(meta_entry = getPropNameByName( &indexf->header, pname )) )
397 {
398 set_progerr(UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY, sw, "Invalid property name '%s'", pname );
399 return ULONG_MAX;
400 }
401
402
403 /* make sure it's a numeric prop */
404 if ( !is_meta_number(meta_entry) && !is_meta_date(meta_entry) )
405 {
406 set_progerr(INVALID_PROPERTY_TYPE, sw, "Property '%s' is not numeric", pname );
407 return ULONG_MAX;
408 }
409
410 pv = getResultPropValue (sw, result, pname, 0 );
411
412 value = pv->value.v_ulong;
413
414 efree( pv );
415
416 return value;
417 }
418
419
420
421 /*******************************************************************
422 * Returns a property as a *propValue, which is a union of different
423 * data types, with a flag to indicate the type
424 * Can be called with either a metaname, or a metaID.
425 *
426 * Call with:
427 * *SWISH
428 * *RESULT
429 * *metaName -- String name of meta entry
430 * metaID -- OR - meta ID number
431 *
432 * Returns:
433 * pointer to a propValue structure if found -- caller MUST free
434 * Returns NULL if propertyName doesn't exist.
435 *
436 * Note:
437 * Feb 13, 2002 - now defined properties that just don't exist
438 * for the document return a blank *string* even for numeric
439 * and date properties. This it to prevent "(NULL)" from displaying.
440 * They used to return NULL, but since currently only result_output.c
441 * uses this function, it's not a problem.
442 *
443 *
444 ********************************************************************/
445
446 PropValue *getResultPropValue (SWISH *sw, RESULT *r, char *pname, int ID )
447 {
448 PropValue *pv;
449 struct metaEntry *meta_entry = NULL;
450 propEntry *prop;
451
452
453 /* Lookup by property name, if supplied */
454 if ( pname )
455 if ( !(meta_entry = getPropNameByName( &r->indexf->header, pname )) )
456 return NULL;
457
458
459 /* create a propvalue to return to caller */
460 pv = (PropValue *) emalloc (sizeof (PropValue));
461 pv->datatype = PROP_UNDEFINED;
462 pv->destroy = 0;
463
464
465
466 /* This may return false */
467 prop = getDocProperty( sw, r, &meta_entry, ID );
468
469 if ( !prop )
470 {
471 pv->datatype = PROP_STRING;
472 pv->value.v_str = "";
473 return pv;
474 }
475
476
477 if ( is_meta_string(meta_entry) ) /* check for ascii/string data */
478 {
479 pv->datatype = PROP_STRING;
480 pv->destroy++; // caller must free this
481 pv->value.v_str = (char *)bin2string(prop->propValue,prop->propLen);
482 freeProperty( prop );
483 return pv;
484 }
485
486
487 /* dates and numbers should return null to tell apart from zero */
488 /* This is a slight problem with display, as blank properties show "(NULL)" */
489 /* but is needed since other parts of swish (like sorting) need to see NULL. */
490
491 /****************
492 if ( !prop )
493 {
494 efree( pv );
495 return NULL;
496 }
497 ****************/
498
499
500 if ( is_meta_number(meta_entry) )
501 {
502 unsigned int i;
503 i = *(unsigned int *) prop->propValue; /* read binary */
504 i = UNPACKLONG(i); /* Convert the portable number */
505 pv->datatype = PROP_ULONG;
506 pv->value.v_ulong = (long)i;
507 freeProperty( prop );
508 return pv;
509 }
510
511
512 if ( is_meta_date(meta_entry) )
513 {
514 unsigned long i;
515 i = *(unsigned long *) prop->propValue; /* read binary */
516 i = UNPACKLONG(i); /* Convert the portable number */
517 pv->datatype = PROP_DATE;
518 pv->value.v_date = (time_t)i;
519 freeProperty( prop );
520 return pv;
521 }
522
523 freeProperty( prop );
524
525
526
527 if (pv->datatype == PROP_UNDEFINED) { /* nothing found */
528 efree (pv);
529 pv = NULL;
530 }
531
532 return pv;
533 }
534
535 /*******************************************************************
536 * Destroys a "pv" returned from getResultPropValue
537 *
538 *
539 ********************************************************************/
540 void freeResultPropValue(PropValue *pv)
541 {
542 if ( !pv ) return;
543
544 if ( pv->datatype == PROP_STRING && pv->destroy )
545 efree( pv->value.v_str );
546
547 efree(pv);
548 }
549
550
551 /*******************************************************************
552 * Displays the "old" style properties for -p
553 *
554 * Call with:
555 * *RESULT
556 *
557 * I think this could be done in result_output.c by creating a standard
558 * -x format (plus properites) for use when there isn't one already.
559 *
560 *
561 ********************************************************************/
562 void printStandardResultProperties(SWISH *sw, FILE *f, RESULT *r)
563 {
564 int i;
565 struct MOD_Search *srch = sw->Search;
566 char *s;
567 char *propValue;
568 int *metaIDs;
569
570 metaIDs = r->indexf->propIDToDisplay;
571
572 if (srch->numPropertiesToDisplay == 0)
573 return;
574
575 for ( i = 0; i < srch->numPropertiesToDisplay; i++ )
576 {
577 propValue = s = getResultPropAsString( sw, r, metaIDs[ i ] );
578
579 if (sw->ResultOutput->stdResultFieldDelimiter)
580 fprintf(f, "%s", sw->ResultOutput->stdResultFieldDelimiter);
581 else
582 fprintf(f, " \""); /* default is to quote the string, with leading space */
583
584 /* print value, handling newlines and quotes */
585 while (*propValue)
586 {
587 if (*propValue == '\n')
588 fprintf(f, " ");
589
590 else if (*propValue == '\"') /* should not happen */
591 fprintf(f,"&quot;");
592
593 else
594 fprintf(f,"%c", *propValue);
595
596 propValue++;
597 }
598
599 //fprintf(f,"%s", propValue);
600
601 if (!sw->ResultOutput->stdResultFieldDelimiter)
602 fprintf(f,"\""); /* default is to quote the string */
603
604 efree( s );
605 }
606 }
607
608
609 /*******************************************************************
610 * Converts a string into a string for saving as a property
611 * Which means will either return a duplicated string,
612 * or a packed unsigned long.
613 *
614 * Call with:
615 * *metaEntry
616 * **encodedStr (destination)
617 * *string
618 * *error_flag - integer to indicate the difference between an error and a blank property
619 *
620 * Returns:
621 * malloc's a new string, stored in **encodedStr. Caller must call free().
622 * length of encoded string, or zero if an error
623 * (zero length strings are not for encoding anyway, I guess)
624 *
625 * QUESTION: ???
626 * should this return a *docproperty instead?
627 * numbers are unsigned longs. What if someone
628 * wanted to store signed numbers?
629 *
630 * ToDO:
631 * What about convert entities here?
632 *
633 ********************************************************************/
634 static int EncodeProperty( struct metaEntry *meta_entry, char **encodedStr, char *propstring, int *error_flag )
635 {
636 unsigned long int num;
637 char *newstr;
638 char *badchar;
639 char *tmpnum;
640 char *string;
641
642
643 string = propstring;
644
645 *error_flag = 0;
646
647 /* skip leading white space */
648 while ( isspace( (int)*string ))
649 string++;
650
651 if ( !string || !*string )
652 {
653 // progwarn("Null string passed to EncodeProperty for meta '%s'", meta_entry->metaName);
654 #ifdef BLANK_PROP_VALUE
655 string = BLANK_PROP_VALUE; // gets dup'ed below
656 #else
657 return 0;
658 #endif
659 }
660
661
662 /* make a working copy */
663 string = estrdup( string );
664
665 /* remove trailing white space */
666 {
667 int i = strlen( string );
668
669 while ( i && isspace( (int)string[i-1]) )
670 string[--i] = '\0';
671 }
672
673
674 if (is_meta_number( meta_entry ) || is_meta_date( meta_entry ))
675 {
676 int j;
677
678 newstr = emalloc( sizeof( num ) + 1 );
679 num = strtoul( string, &badchar, 10 ); // would base zero be more flexible?
680
681 if ( num == ULONG_MAX )
682 {
683 progwarnno("EncodeProperty - Attempted to convert '%s' to a number", string );
684 efree(string);
685 (*error_flag)++;
686 return 0;
687 }
688
689 if ( *badchar ) // I think this is how it works...
690 {
691 progwarn("EncodeProperty - Invalid char '%c' found in string '%s'", badchar[0], string);
692 efree(string);
693 (*error_flag)++;
694 return 0;
695 }
696 /* I'll bet there's an easier way */
697 num = PACKLONG(num);
698 tmpnum = (char *)&num;
699
700 for ( j=0; j <= (int)sizeof(num)-1; j++ )
701 newstr[j] = (unsigned char)tmpnum[j];
702
703 newstr[ sizeof(num) ] = '\0';
704
705 *encodedStr = newstr;
706
707 efree(string);
708
709 return (int)sizeof(num);
710 }
711
712
713 if ( is_meta_string(meta_entry) )
714 {
715 /* replace all non-printing chars with a space -- this is questionable */
716 // yep, sure is questionable -- isprint() kills 8859-1 chars.
717
718 char *source, *dest;
719 dest = string;
720 for( source = string; *source; source++ )
721 {
722 if ( (int)((unsigned char)*source) <= (int)' ' )
723 {
724 if ( dest > string && *(dest - 1) != ' ' )
725 {
726 *dest = ' ';
727 dest++;
728 }
729 continue;
730 }
731
732 *dest = *source;
733 dest++;
734 }
735 *dest = '\0';
736
737 *encodedStr = string;
738 return (int)strlen( string );
739 }
740
741
742 progwarn("EncodeProperty called but doesn't know the property type :(");
743 return 0;
744 }
745
746 /*******************************************************************
747 * Creates a document property
748 *
749 * Call with:
750 * *metaEntry
751 * *propValue - string to add
752 * *propLen - length of string to add, but can be limited by metaEntry->max_size
753 * preEncoded - flag saying the data is already encoded
754 * (that's for filesize, last modified, start position)
755 * *error_flag - integer to indicate the difference between an error and a blank property
756 *
757 * Returns:
758 * pointer to a newly created document property
759 * NULL indicates property could not be created
760 *
761 *
762 ********************************************************************/
763 propEntry *CreateProperty(struct metaEntry *meta_entry, unsigned char *propValue, int propLen, int preEncoded, int *error_flag )
764 {
765 propEntry *docProp;
766
767
768 /* limit length */
769 if ( !preEncoded && meta_entry->max_len && propLen > meta_entry->max_len )
770 propLen = meta_entry->max_len;
771
772 /* convert string to a document property, if not already encoded */
773 if ( !preEncoded )
774 {
775 char *tmp;
776
777 propLen = EncodeProperty( meta_entry, &tmp, (char *)propValue, error_flag );
778
779 if ( !propLen ) /* Error detected in encode */
780 return NULL;
781
782 /* Limit length */
783 if ( is_meta_string(meta_entry) && meta_entry->max_len && propLen > meta_entry->max_len )
784 propLen = meta_entry->max_len;
785
786 propValue = (unsigned char *)tmp;
787 }
788
789 /* Now create the property $$ could be -1 */
790 docProp=(propEntry *) emalloc(sizeof(propEntry) + propLen);
791
792 memcpy(docProp->propValue, propValue, propLen);
793 docProp->propLen = propLen;
794
795
796 /* EncodeProperty creates a new string */
797 if ( !preEncoded )
798 efree( propValue );
799
800 return docProp;
801 }
802
803 /*******************************************************************
804 * Appends a string onto a current property
805 *
806 * Call with:
807 * *propEntry
808 * *string
809 * length of string
810 *
811 * Will limit property length, if needed.
812 *
813 *******************************************************************/
814 propEntry *append_property( struct metaEntry *meta_entry, propEntry *p, char *txt, int length )
815 {
816 int newlen;
817 int add_a_space = 0;
818 char *str = NULL;
819 int error_flag = 0;
820
821 length = EncodeProperty( meta_entry, &str, txt, &error_flag );
822
823 if ( !length )
824 return p;
825
826 /* When appending, we separate by a space -- could be a config setting */
827 if ( !isspace( (int)*str ) && !isspace( (int)p->propValue[p->propLen-1] ) )
828 add_a_space++;
829
830
831 /* Any room to add the property? */
832 if ( meta_entry->max_len && p->propLen + add_a_space >= meta_entry->max_len )
833 {
834 if ( str )
835 efree( str );
836
837 return p;
838 }
839
840
841 newlen = p->propLen + length + add_a_space;
842
843 /* limit length */
844 if ( meta_entry->max_len && newlen >= meta_entry->max_len )
845 {
846 newlen = meta_entry->max_len;
847 length = meta_entry->max_len - p->propLen - add_a_space;
848 }
849
850
851 /* Now reallocate the property */
852 p = (propEntry *) erealloc(p, sizeof(propEntry) + newlen);
853
854 if ( add_a_space )
855 p->propValue[p->propLen++] = ' ';
856
857 memcpy( (void *)&(p->propValue[p->propLen]), str, length );
858 p->propLen = newlen;
859
860 if (str)
861 efree(str);
862
863 return p;
864 }
865
866
867 /*******************************************************************
868 * Scans the properties (metaEntry's), and adds a doc property to any that are flagged
869 * Limits size, if needed (for StoreDescription)
870 * Pass in text properties (not pre-encoded binary properties)
871 *
872 * Call with:
873 * *INDEXDATAHEADER (to get to the list of metanames)
874 * **docProperties - pointer to list of properties
875 * *propValue - string to add
876 * *propLen - length of string to add
877 *
878 * Returns:
879 * void, but will warn on failed properties
880 *
881 *
882 ********************************************************************/
883 void addDocProperties( INDEXDATAHEADER *header, docProperties **docProperties, unsigned char *propValue, int propLen, char *filename )
884 {
885 struct metaEntry *m;
886 int i;
887
888 for ( i = 0; i < header->metaCounter; i++)
889 {
890 m = header->metaEntryArray[i];
891
892 if ( (m->metaType & META_PROP) && m->in_tag )
893 if ( !addDocProperty( docProperties, m, propValue, propLen, 0 ) )
894 progwarn("Failed to add property '%s' in file '%s'", m->metaName, filename );
895 }
896 }
897
898
899
900
901 /*******************************************************************
902 * Adds a document property to the list of properties.
903 * Creates or extends the list, as necessary
904 *
905 * Call with:
906 * **docProperties - pointer to list of properties
907 * *metaEntry
908 * *propValue - string to add
909 * *propLen - length of string to add
910 * preEncoded - flag saying the data is already encoded
911 * (that's for filesize, last modified, start position)
912 *
913 * Returns:
914 * true if added property
915 * sets address of **docProperties, if list changes size
916 *
917 *
918 ********************************************************************/
919
920 int addDocProperty( docProperties **docProperties, struct metaEntry *meta_entry, unsigned char *propValue, int propLen, int preEncoded )
921 {
922 struct docProperties *dp = *docProperties;
923 propEntry *docProp;
924 int i;
925 int error_flag;
926
927
928 /* Allocate or extend the property array, if needed */
929
930 if( !dp )
931 {
932 dp = (struct docProperties *) emalloc(sizeof(struct docProperties) + (meta_entry->metaID + 1) * sizeof(propEntry *));
933 *docProperties = dp;
934
935 dp->n = meta_entry->metaID + 1;
936
937 for( i = 0; i < dp->n; i++ )
938 dp->propEntry[i] = NULL;
939 }
940
941 else /* reallocate if needed */
942 {
943 if( dp->n <= meta_entry->metaID )
944 {
945 dp = (struct docProperties *) erealloc(dp,sizeof(struct docProperties) + (meta_entry->metaID + 1) * sizeof(propEntry *));
946
947 *docProperties = dp;
948 for( i = dp->n; i <= meta_entry->metaID; i++ )
949 dp->propEntry[i] = NULL;
950
951 dp->n = meta_entry->metaID + 1;
952 }
953 }
954
955 /* Un-encoded STRINGS get appended to existing properties */
956 /* Others generate a warning */
957 if ( dp->propEntry[meta_entry->metaID] )
958 {
959 if ( is_meta_string(meta_entry) )
960 {
961 dp->propEntry[meta_entry->metaID] = append_property( meta_entry, dp->propEntry[meta_entry->metaID], (char *)propValue, propLen );
962 return 1;
963 }
964 else // Will this come back and bite me?
965 {
966 progwarn("Warning: Attempt to add duplicate property." );
967 return 0;
968 }
969 }
970
971
972 /* create the document property */
973 /* Ignore some errors */
974
975 if ( !(docProp = CreateProperty( meta_entry, propValue, propLen, preEncoded, &error_flag )) )
976 return error_flag ? 0 : 1;
977
978 dp->propEntry[meta_entry->metaID] = docProp;
979
980 return 1;
981 }
982
983 // #define DEBUGPROP 1
984 #ifdef DEBUGPROP
985 static int insidecompare = 0;
986 #endif
987
988 /*******************************************************************
989 * Compares two properties for sorting
990 *
991 * Call with:
992 * *metaEntry
993 * *docPropertyEntry1
994 * *docPropertyEntry2
995 *
996 * Returns:
997 * 0 - two properties are the same
998 * -1 - docPropertyEntry1 < docPropertyEntry2
999 * +1 - docPropertyEntry1 > docPropertyEntry2
1000 *
1001 *
1002 ********************************************************************/
1003 int Compare_Properties( struct metaEntry *meta_entry, propEntry *p1, propEntry *p2 )
1004 {
1005
1006
1007 #ifdef DEBUGPROP
1008 if ( !insidecompare++ )
1009 {
1010 printf("comparing properties for meta %s: returning: %d\n", meta_entry->metaName, Compare_Properties( meta_entry, p1, p2) );
1011 dump_single_property( p1, meta_entry );
1012 dump_single_property( p2, meta_entry );
1013 insidecompare = 0;
1014 }
1015 #endif
1016
1017
1018 if ( !p1 && p2 )
1019 return -1;
1020
1021
1022 if ( !p1 && !p2 )
1023 return 0;
1024
1025 if ( p1 && !p2 )
1026 return +1;
1027
1028
1029 if (is_meta_number( meta_entry ) || is_meta_date( meta_entry ))
1030 return memcmp( (const void *)p1->propValue, (const void *)p2->propValue, p1->propLen );
1031
1032
1033 if ( is_meta_string(meta_entry) )
1034 {
1035 int rc;
1036 int len = Min( p1->propLen, p2->propLen );
1037
1038 rc = is_meta_ignore_case( meta_entry)
1039 ? strncasecmp( (char *)p1->propValue, (char *)p2->propValue, len )
1040 : strncmp( (char *)p1->propValue, (char *)p2->propValue, len );
1041
1042 if ( rc != 0 )
1043 return rc;
1044
1045 return p1->propLen - p2->propLen;
1046 }
1047
1048 return 0;
1049
1050 }
1051
1052 /*******************************************************************
1053 * Duplicate a property that's already in memory and return it.
1054 *
1055 * Caller must destroy
1056 *
1057 *********************************************************************/
1058
1059 static propEntry *duplicate_in_mem_property( docProperties *props, int metaID, int max_size )
1060 {
1061 propEntry *docProp;
1062 struct metaEntry meta_entry;
1063 int propLen;
1064 int error_flag;
1065
1066 if ( metaID >= props->n )
1067 return NULL;
1068
1069 if ( !(docProp = props->propEntry[ metaID ]) )
1070 return NULL;
1071
1072
1073 meta_entry.metaName = "(default)"; /* for error message, I think */
1074 meta_entry.metaID = metaID;
1075
1076
1077 /* Duplicate the property */
1078 propLen = docProp->propLen;
1079
1080 /* Limit size,if possible - should really check if it's a string */
1081 if ( max_size && (max_size >= 8) && (max_size < propLen ))
1082 propLen = max_size;
1083
1084 /* Duplicate the property */
1085 return CreateProperty( &meta_entry, docProp->propValue, propLen, 1, &error_flag );
1086 }
1087
1088
1089 #ifdef HAVE_ZLIB
1090
1091 /*******************************************************************
1092 * Allocate or reallocate the property buffer
1093 *
1094 * The buffer is kept around to avoid reallocating for every prop of every doc
1095 *
1096 *
1097 *
1098 *********************************************************************/
1099
1100 static unsigned char *allocatePropIOBuffer(SWISH *sw, unsigned long buf_needed )
1101 {
1102 unsigned long total_size;
1103
1104 if ( !buf_needed )
1105 progerr("Asked for too small of a buffer size!");
1106
1107
1108 if ( !sw->Prop_IO_Buf || buf_needed > sw->PropIO_allocated )
1109 {
1110 /* don't reallocate because we don't need to memcpy */
1111 if ( sw->Prop_IO_Buf )
1112 efree( sw->Prop_IO_Buf );
1113
1114
1115 total_size = buf_needed > sw->PropIO_allocated + RD_BUFFER_SIZE
1116 ? buf_needed
1117 : sw->PropIO_allocated + RD_BUFFER_SIZE;
1118
1119 sw->Prop_IO_Buf = emalloc( total_size );
1120 sw->PropIO_allocated = total_size; /* keep track of structure size */
1121 }
1122
1123
1124 return sw->Prop_IO_Buf;
1125 }
1126
1127 #endif
1128
1129 /*******************************************************************
1130 * Compress a Property
1131 *
1132 * Call with:
1133 * propEntry - the in data and its length
1134 * propID - current property
1135 * SWISH - to get access to the common buffer
1136 * *uncompress_len - returns the length of the original buffer, or zero if not compressed
1137 * *buf_len - the length of the returned buffer
1138 *
1139 * Returns:
1140 * pointer the buffer of buf_len size
1141 *
1142 *
1143 *********************************************************************/
1144
1145 static unsigned char *compress_property( propEntry *prop, int propID, SWISH *sw, int *buf_len, int *uncompressed_len )
1146 {
1147 #ifndef HAVE_ZLIB
1148 *buf_len = prop->propLen;
1149 *uncompressed_len = 0;
1150 return prop->propValue;
1151
1152 #else
1153 unsigned char *PropBuf; /* For compressing and uncompressing */
1154 int dest_size;
1155
1156
1157 /* Don't bother compressing smaller items */
1158 if ( prop->propLen < MIN_PROP_COMPRESS_SIZE )
1159 {
1160 *buf_len = prop->propLen;
1161 *uncompressed_len = 0;
1162 return prop->propValue;
1163 }
1164
1165 /* Buffer should be +1% + a few bytes. */
1166 dest_size = prop->propLen + ( prop->propLen / 100 ) + 1000; // way more than should be needed
1167
1168
1169 /* Get an output buffer */
1170 PropBuf = allocatePropIOBuffer( sw, dest_size );
1171
1172
1173 if ( compress2( (Bytef *)PropBuf, (uLongf *)&dest_size, prop->propValue, prop->propLen, sw->PropCompressionLevel) != Z_OK)
1174 progerr("Property Compression Error");
1175
1176
1177 /* Make sure it's compressed enough */
1178 if ( dest_size >= prop->propLen )
1179 {
1180 *buf_len = prop->propLen;
1181 *uncompressed_len = 0;
1182 return prop->propValue;
1183 }
1184
1185 *buf_len = dest_size;
1186 *uncompressed_len = prop->propLen;
1187
1188 return PropBuf;
1189
1190 #endif
1191 }
1192
1193 /*******************************************************************
1194 * Uncompress a Property
1195 *
1196 * Call with:
1197 * SWISH
1198 * *input_buf - buffer address
1199 * buf_len - size of buffer
1200 * *uncompressed_size - size of original prop, or zero if not compressed.
1201 *
1202 * Returns:
1203 * buffer address of uncompressed property
1204 * uncompressed_size is set to length of buffer
1205 *
1206 *
1207 *********************************************************************/
1208
1209 static unsigned char *uncompress_property( SWISH *sw, unsigned char *input_buf, int buf_len, int *uncompressed_size )
1210 {
1211
1212 #ifndef HAVE_ZLIB
1213
1214 if ( *uncompressed_size )
1215 progerr("The index was created with zlib compression.\nThis version of swish was not compiled with zlib");
1216
1217 *uncompressed_size = buf_len;
1218 return input_buf;
1219
1220 #else
1221 unsigned char *PropBuf;
1222
1223
1224 if ( *uncompressed_size == 0 ) /* wasn't compressed */
1225 {
1226 *uncompressed_size = buf_len;
1227 return input_buf;
1228 }
1229
1230
1231
1232 /* make sure we have enough space */
1233
1234 PropBuf = allocatePropIOBuffer( sw, *uncompressed_size );
1235
1236
1237 if ( uncompress(PropBuf, (uLongf *)uncompressed_size, input_buf, buf_len ) != Z_OK )
1238 {
1239 progwarn("Failed to uncompress Property\n");
1240 return NULL;
1241 }
1242
1243
1244 return PropBuf;
1245
1246
1247 #endif
1248
1249 }
1250
1251
1252
1253 /*******************************************************************
1254 * Write Properties to disk, and save seek pointers
1255 *
1256 * DB_WriteProperty - should write filenum:propID as the key
1257 * DB_WritePropPositions - writes the stored positions
1258 *
1259 *
1260 *
1261 *********************************************************************/
1262 void WritePropertiesToDisk( SWISH *sw , FileRec *fi )
1263 {
1264 IndexFILE *indexf = sw->indexlist;
1265 INDEXDATAHEADER *header = &indexf->header;
1266 docProperties *docProperties = fi->docProperties;
1267 propEntry *prop;
1268 int uncompressed_len;
1269 unsigned char *buf;
1270 int buf_len;
1271 int count;
1272 int i;
1273
1274
1275 /* initialize the first time called */
1276 if ( header->property_count == 0 )
1277 {
1278 /* Get the current seek position in the index, since will now write the file info */
1279 DB_InitWriteFiles(sw, indexf->DB);
1280
1281 /* build a list of properties that are in use */
1282 /* And create the prop index to propID (metaID) mapping arrays */
1283 init_property_list(header);
1284 }
1285
1286
1287 if ( (count = header->property_count) <= 0)
1288 return;
1289
1290
1291 /* any props exist, unlikely, but need to save a space. */
1292 if ( !docProperties )
1293 {
1294 DB_WritePropPositions( sw, indexf, fi, indexf->DB);
1295 return;
1296 }
1297
1298
1299 for( i = 0; i < count; i++ )
1300 {
1301 /* convert the count to a propID */
1302 int propID = header->propIDX_to_metaID[i]; // here's the array created in init_property_list()
1303
1304
1305 /* Here's why I need to redo the properties so it's always header->property_count size in the fi rec */
1306 /* The mapping is all a temporary kludge */
1307 if ( propID >= docProperties->n ) // Does this file have this many properties?
1308 continue;
1309
1310
1311 if ( !(prop = docProperties->propEntry[propID])) // does this file have this prop?
1312 continue;
1313
1314 buf = compress_property( prop, propID, sw, &buf_len, &uncompressed_len );
1315
1316 DB_WriteProperty( sw, indexf, fi, propID, (char *)buf, buf_len, uncompressed_len, indexf->DB );
1317 }
1318
1319
1320
1321
1322 /* Write the position data */
1323 DB_WritePropPositions( sw, indexf, fi, indexf->DB);
1324
1325 freeDocProperties( docProperties );
1326 fi->docProperties = NULL;
1327
1328
1329
1330 }
1331
1332 /*******************************************************************
1333 * Reads a single doc property - this is used for sorting
1334 *
1335 * Caller needs to destroy returned property
1336 *
1337 * Call with:
1338 * sw - everyone needs a sw
1339 * indexf - which index to read from
1340 * FileRec - which contains filenum (key part 1)
1341 * metaID - which prop (key part 2)
1342 * max_size- to limit size of property
1343 *
1344 * Returns:
1345 * *propEntry - caller *must* destroy
1346 *
1347 *
1348 *********************************************************************/
1349 propEntry *ReadSingleDocPropertiesFromDisk( SWISH *sw, IndexFILE *indexf, FileRec *fi, int metaID, int max_size )
1350 {
1351 int propLen;
1352 int error_flag;
1353 struct metaEntry meta_entry;
1354 unsigned char *buf;
1355 int buf_len; /* size on disk */
1356 int uncompressed_len; /* size uncompressed */
1357 propEntry *docProp;
1358 unsigned char *propbuf;
1359 INDEXDATAHEADER *header = &indexf->header;
1360 int count;
1361 int propIDX;
1362
1363
1364 /* initialize the first time called */
1365 if ( header->property_count == 0 )
1366 init_property_list(header);
1367
1368 if ( (count = header->property_count) <= 0)
1369 return NULL;
1370
1371
1372 /* Map the propID to an index number */
1373 propIDX = header->metaID_to_PropIDX[metaID];
1374
1375 if ( propIDX < 0 )
1376 progerr("Mapped propID %d to invalid property index", metaID );
1377
1378
1379
1380 /* already loaded? -- if so, duplicate the property for the given length */
1381 /* This should only happen if ReadAllDocPropertiesFromDisk() was called, and only with db_native.c */
1382
1383 if ( fi->docProperties )
1384 return duplicate_in_mem_property( fi->docProperties, metaID, max_size );
1385
1386
1387 /* Otherwise, read from disk */
1388
1389 if ( !(buf = (unsigned char*)DB_ReadProperty( sw, indexf, fi, metaID, &buf_len, &uncompressed_len, indexf->DB )))
1390 return NULL;
1391
1392 propbuf = uncompress_property( sw, buf, buf_len, &uncompressed_len );
1393
1394 propLen = uncompressed_len; /* just to be clear ;) */
1395
1396 /* Limit size,if possible - should really check if it's a string */
1397 if ( max_size && (max_size >= 8) && (max_size < propLen ))
1398 propLen = max_size;
1399
1400
1401 meta_entry.metaName = "(default)"; /* for error message, I think */
1402 meta_entry.metaID = metaID;
1403
1404 docProp = CreateProperty( &meta_entry, propbuf, propLen, 1, &error_flag );
1405
1406 efree( buf );
1407 return docProp;
1408 }
1409
1410
1411
1412 /*******************************************************************
1413 * Reads the doc properties from disk
1414 *
1415 * Maybe should return void, and just set?
1416 * Or maybe should take a filenum, and instead take a position?
1417 *
1418 * The original idea (and the way it was written) was to use the seek
1419 * position of the first property, and the total length of all properties
1420 * then read all the properties in one fread call.
1421 * The plan was to call it in result_output.c, so all the props would get loaded
1422 * in one shot.
1423 * That design probably has little effect on performance. Now we just call
1424 * ReadSingleDocPropertiesFromDisk for each prop.
1425 *
1426 * Now, this is really just a way to populate the fi->docProperties structure.
1427 *
1428 * 2001-09 jmruiz Modified to be used by merge.c
1429 *********************************************************************/
1430
1431 docProperties *ReadAllDocPropertiesFromDisk( SWISH *sw, IndexFILE *indexf, int filenum )
1432 {
1433 FileRec fi;
1434 propEntry *new_prop;
1435 int count;
1436 struct metaEntry meta_entry;
1437 docProperties *docProperties=NULL;
1438 INDEXDATAHEADER *header = &indexf->header;
1439 int propIDX;
1440
1441
1442
1443 /* Get a place to cache the pointers */
1444 memset(&fi,0, sizeof( FileRec ));
1445 fi.filenum = filenum;
1446
1447
1448 meta_entry.metaName = "(default)"; /* for error message, I think */
1449
1450
1451 /* initialize the first time called */
1452 if ( header->property_count == 0 )
1453 init_property_list(header);
1454
1455 if ( (count = header->property_count) <= 0)
1456 return NULL;
1457
1458
1459 for ( propIDX = 0; propIDX < count; propIDX++ )
1460 {
1461 meta_entry.metaID = header->propIDX_to_metaID[propIDX];
1462
1463 new_prop = ReadSingleDocPropertiesFromDisk( sw, indexf, &fi, meta_entry.metaID, 0);
1464
1465 if ( !new_prop )
1466 continue;
1467
1468 // would be better if we didn't need to create a new property just to free one
1469 // this routine is currently only used by merge and dump.c
1470
1471 addDocProperty(&docProperties, &meta_entry, new_prop->propValue, new_prop->propLen, 1 );
1472
1473 efree( new_prop );
1474 }
1475
1476 /* Free the prop seek location cache */
1477 if ( fi.prop_index )
1478 efree( fi.prop_index );
1479
1480 return docProperties;
1481 }
1482
1483
1484
1485
1486
1487
1488 void addSearchResultDisplayProperty(SWISH *sw, char *propName)
1489 {
1490 struct MOD_Search *srch = sw->Search;
1491
1492 /* add a property to the list of properties that will be displayed */
1493 if (srch->numPropertiesToDisplay >= srch->currentMaxPropertiesToDisplay)
1494 {
1495 if(srch->currentMaxPropertiesToDisplay) {
1496 srch->currentMaxPropertiesToDisplay+=2;
1497 srch->propNameToDisplay=(char **)erealloc(srch->propNameToDisplay,srch->currentMaxPropertiesToDisplay*sizeof(char *));
1498 } else {
1499 srch->currentMaxPropertiesToDisplay=5;
1500 srch->propNameToDisplay=(char **)emalloc(srch->currentMaxPropertiesToDisplay*sizeof(char *));
1501 }
1502 }
1503 srch->propNameToDisplay[srch->numPropertiesToDisplay++] = estrdup(propName);
1504 }
1505
1506
1507
1508
1509
1510 /* For faster proccess, get de ID of the properties to sort */
1511 int initSearchResultProperties(SWISH *sw)
1512 {
1513 IndexFILE *indexf;
1514 int i;
1515 struct MOD_Search *srch = sw->Search;
1516 struct metaEntry *meta_entry;
1517
1518
1519 /* lookup selected property names */
1520
1521 if (srch->numPropertiesToDisplay == 0)
1522 return RC_OK;
1523
1524 for( indexf = sw->indexlist; indexf; indexf = indexf->next )
1525 indexf->propIDToDisplay=(int *) emalloc(srch->numPropertiesToDisplay*sizeof(int));
1526
1527 for (i = 0; i < srch->numPropertiesToDisplay; i++)
1528 {
1529 makeItLow(srch->propNameToDisplay[i]);
1530
1531 /* Get ID for each index file */
1532 for( indexf = sw->indexlist; indexf; indexf = indexf->next )
1533 {
1534 if ( !(meta_entry = getPropNameByName( &indexf->header, srch->propNameToDisplay[i])))
1535 {
1536 progerr ("Unknown Display property name \"%s\"", srch->propNameToDisplay[i]);
1537 return (sw->lasterror=UNKNOWN_PROPERTY_NAME_IN_SEARCH_DISPLAY);
1538 }
1539 else
1540 indexf->propIDToDisplay[i] = meta_entry->metaID;
1541 }
1542 }
1543 return RC_OK;
1544 }
1545
1546
1547
1548 void dump_single_property( propEntry *prop, struct metaEntry *meta_entry )
1549 {
1550 char *propstr;
1551 char proptype = '?';
1552 int i;
1553
1554
1555 if ( is_meta_string(meta_entry) )
1556 proptype = 'S';
1557
1558 else if ( is_meta_date(meta_entry) )
1559 proptype = 'D';
1560
1561 else if ( is_meta_number(meta_entry) )
1562 proptype = 'N';
1563
1564
1565 i = prop ? prop->propLen : 0;
1566
1567 printf(" %20s:%2d (%3d) %c:", meta_entry->metaName, meta_entry->metaID, i, proptype );
1568
1569
1570 if ( !prop )
1571 {
1572 printf(" propEntry=NULL\n");
1573 return;
1574 }
1575
1576 propstr = DecodeDocProperty( meta_entry, prop );
1577 i = 0;
1578 printf(" \"");
1579
1580 while ( i < strlen( propstr ) )
1581 {
1582 if ( 1 ) // ( isprint( (int)propstr[i] ))
1583 printf("%c", propstr[i] );
1584
1585 else if ( propstr[i] == '\n' )
1586 printf("\n");
1587
1588 else
1589 printf("..");
1590
1591 i++;
1592 if ( i > 300 )
1593 {
1594 printf(" ...");
1595 break;
1596 }
1597 }
1598 printf("\"\n");
1599
1600 efree( propstr );
1601 }
1602
1603 /***************************************************************
1604 * Dumps what's currently in the fi->docProperties structure
1605 *
1606 **************************************************************/
1607
1608 void dump_file_properties(IndexFILE * indexf, FileRec *fi )
1609 {
1610 int j;
1611 propEntry *prop;
1612 struct metaEntry *meta_entry;
1613
1614 if ( !fi->docProperties ) /* may not be any properties */
1615 {
1616 printf(" (No Properties)\n");
1617 return;
1618 }
1619
1620 for (j = 0; j < fi->docProperties->n; j++)
1621 {
1622 if ( !fi->docProperties->propEntry[j] )
1623 continue;
1624
1625 meta_entry = getPropNameByID( &indexf->header, j );
1626 prop = fi->docProperties->propEntry[j];
1627
1628 dump_single_property( prop, meta_entry );
1629 }
1630 }
1631

  ViewVC Help
Powered by ViewVC 1.1.22