/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/dump.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/dump.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
File MIME type: text/plain
Initial revision

1 adcroft 1.1 /*
2     **
3     ** This program and library is free software; you can redistribute it and/or
4     ** modify it under the terms of the GNU (Library) General Public License
5     ** as published by the Free Software Foundation; either version 2
6     ** of the License, or any later version.
7     **
8     ** This program is distributed in the hope that it will be useful,
9     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
10     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11     ** GNU (Library) General Public License for more details.
12     **
13     ** You should have received a copy of the GNU (Library) General Public License
14     ** along with this program; if not, write to the Free Software
15     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16     **
17     **
18     **
19     ** 2001-05-07 jmruiz init coding
20     **
21     */
22    
23     #include "swish.h"
24     #include "merge.h"
25     #include "docprop.h"
26     #include "hash.h"
27     #include "string.h"
28     #include "mem.h"
29     #include "db.h"
30     #include "compress.h"
31     #include "index.h"
32     #include "search.h"
33     #include "result_output.h"
34     #include "metanames.h"
35     #include "dump.h"
36    
37    
38    
39    
40     void dump_index_file_list( SWISH *sw, IndexFILE *indexf )
41     {
42     int i;
43     int end = indexf->header.totalfiles;
44    
45     i = sw->Search->beginhits ? sw->Search->beginhits - 1 : 0;
46    
47     if ( i >= indexf->header.totalfiles )
48     {
49     printf("Hey, there are only %d files\n", indexf->header.totalfiles );
50     exit(-1);
51     }
52    
53     end = indexf->header.totalfiles;
54    
55     if ( sw->Search->maxhits > 0 )
56     {
57     end = i + sw->Search->maxhits;
58     if ( end > indexf->header.totalfiles )
59     end = indexf->header.totalfiles;
60     }
61    
62    
63     printf("\n\n-----> FILES in index %s <-----\n", indexf->line );
64    
65     for (; i < end; i++)
66     {
67     FileRec fi;
68    
69     memset( &fi, 0, sizeof( FileRec ) );
70    
71     fi.filenum = i+1;
72    
73     fflush(stdout);
74     printf("Dumping File Properties for File Number: %d\n", i+1);
75    
76    
77     dump_file_properties( indexf, &fi );
78     printf("\n");
79    
80    
81     printf("ReadAllDocProperties:\n");
82     fi.docProperties = ReadAllDocPropertiesFromDisk( sw, indexf, i+1 );
83     dump_file_properties( indexf, &fi );
84     freefileinfo( &fi );
85    
86     printf("\n");
87    
88    
89     /* dump one at a time */
90     {
91     propEntry *p;
92     int j;
93     struct metaEntry *meta_entry;
94     INDEXDATAHEADER *header = &indexf->header;
95     int count = header->property_count;
96    
97     printf("ReadSingleDocPropertiesFromDisk:\n");
98    
99     for (j=0; j< count; j++) // just for testing
100     {
101     int metaID = header->propIDX_to_metaID[j];
102    
103     if ( !(p = ReadSingleDocPropertiesFromDisk(sw, indexf, &fi, metaID, 0 )) )
104     continue;
105    
106     meta_entry = getPropNameByID( &indexf->header, metaID );
107     dump_single_property( p, meta_entry );
108    
109     { // show compression
110     char *buffer;
111     int uncompressed_len;
112     int buf_len;
113    
114     if ( (buffer = DB_ReadProperty( sw, indexf, &fi, meta_entry->metaID, &buf_len, &uncompressed_len, indexf->DB )))
115     {
116     if ( uncompressed_len )
117     printf(" %20s: %d -> %d (%4.2f%%)\n", "**Compressed**", uncompressed_len , buf_len, (float)buf_len/(float)uncompressed_len * 100.00f );
118    
119     efree(buffer);
120     }
121     }
122    
123    
124    
125     freeProperty( p );
126     }
127     }
128     printf("\n");
129    
130    
131     freefileinfo(&fi);
132     }
133     printf("\nNumber of File Entries: %d\n", indexf->header.totalfiles);
134     fflush(stdout);
135     }
136    
137    
138    
139    
140     /* Prints out the data in an index DB */
141     void DB_decompress(SWISH * sw, IndexFILE * indexf)
142     {
143     int i,
144     j,
145     c,
146     fieldnum,
147     frequency,
148     metaname,
149     tmpval,
150     filenum,
151     *posdata;
152     unsigned long nextposmetaname;
153     char word[2];
154     char *resultword;
155     unsigned char *worddata, *s, flag;
156     int sz_worddata;
157     long wordID;
158    
159    
160    
161     indexf->DB = DB_Open(sw, indexf->line,DB_READ);
162    
163     metaname = 0;
164    
165     nextposmetaname = 0L;
166    
167     c = 0;
168    
169     frequency = 0;
170    
171     /* Read header */
172     read_header(sw, &indexf->header, indexf->DB);
173    
174    
175     if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_HEADER) )
176     resultPrintHeader(sw, 0, &indexf->header, indexf->line, 0);
177    
178     fieldnum = 0;
179    
180    
181     /* Do metanames first as that will be helpful for decoding next */
182     if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_METANAMES) )
183     dump_metanames( sw, indexf, 1 );
184    
185     if (DEBUG_MASK & DEBUG_INDEX_WORDS_ONLY)
186     {
187     DB_InitReadWords(sw, indexf->DB);
188    
189     for( j = 0; j < 256; j++ )
190     {
191     word[0] = (unsigned char) j;
192     word[1] = '\0';
193     DB_ReadFirstWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
194    
195     while(wordID)
196     {
197     printf("%s\n",resultword);
198    
199    
200     efree(resultword);
201     DB_ReadNextWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
202    
203     }
204     }
205     DB_EndReadWords(sw, indexf->DB);
206     }
207    
208    
209     else if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_WORDS | DEBUG_INDEX_WORDS_FULL | DEBUG_INDEX_WORDS_META) )
210     {
211     int *meta_used;
212     int end_meta = 0;
213    
214     printf("\n-----> WORD INFO in index %s <-----\n", indexf->line);
215    
216     for(i = 0; i < indexf->header.metaCounter; i++)
217     if ( indexf->header.metaEntryArray[i]->metaID > end_meta )
218     end_meta = indexf->header.metaEntryArray[i]->metaID;
219    
220     meta_used = emalloc( sizeof(int) * ( end_meta + 1) );
221    
222     /* _META only reports which tags the words are found in */
223     for(i = 0; i <= end_meta; i++)
224     meta_used[i] = 0;
225    
226    
227     DB_InitReadWords(sw, indexf->DB);
228    
229     for(j=1;j<256;j++)
230     {
231     word[0] = (unsigned char) j; word[1] = '\0';
232     DB_ReadFirstWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
233    
234     while(wordID && (((int)((unsigned char)resultword[0]))== j))
235     {
236     printf("\n%s",resultword);
237    
238     /* Read Word's data */
239     DB_ReadWordData(sw, wordID, &worddata, &sz_worddata, indexf->DB);
240    
241     /* parse and print word's data */
242     s = worddata;
243    
244     tmpval = uncompress2(&s); /* tfrequency */
245     metaname = uncompress2(&s); /* metaname */
246     if (metaname)
247     {
248     nextposmetaname = UNPACKLONG2(s);
249     s += sizeof(long);
250     }
251    
252     filenum = 0;
253     while(1)
254     { /* Read on all items */
255     uncompress_location_values(&s,&flag,&tmpval,&frequency);
256     filenum += tmpval;
257     posdata = (int *) emalloc(frequency * sizeof(int));
258     uncompress_location_positions(&s,flag,frequency,posdata);
259    
260    
261     // if (sw->verbose >= 4)
262     if (DEBUG_MASK & (DEBUG_INDEX_ALL|DEBUG_INDEX_WORDS_FULL))
263     {
264     struct metaEntry *m;
265    
266     printf("\n Meta:%d", metaname);
267    
268    
269     /* Get path from property list */
270     if ( (m = getPropNameByName( &sw->indexlist->header, AUTOPROPERTY_DOCPATH )) )
271     {
272     RESULT r;
273     char *s;
274    
275     memset( &r, 0, sizeof( RESULT ) );
276    
277     r.indexf = indexf;
278     r.filenum = filenum;
279     r.fi.filenum = filenum;
280    
281     s = getResultPropAsString( sw, &r, m->metaID);
282    
283     printf(" %s", s );
284     efree( s );
285    
286     }
287     else
288     printf(" Failed to lookup meta entry");
289    
290    
291     printf(" Freq:%d", frequency);
292     printf(" Pos/Struct:");
293     }
294     else if ( DEBUG_MASK & DEBUG_INDEX_WORDS_META)
295     meta_used[ metaname ]++;
296     else
297     {
298     printf(" [%d", metaname);
299     printf(" %d", filenum);
300     printf(" %d (", frequency);
301     }
302    
303    
304     for (i = 0; i < frequency; i++)
305     {
306     if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_WORDS_FULL))
307     //if (sw->verbose >= 4)
308     {
309     if (i)
310     printf(",%d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
311     else
312     printf("%d/%x", GET_POSITION(posdata[i]), GET_STRUCTURE(posdata[i]));
313     }
314     else if ( DEBUG_MASK & DEBUG_INDEX_WORDS)
315     {
316     if (i)
317     printf(" %d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
318     else
319     printf("%d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
320     }
321     }
322    
323     efree(posdata);
324    
325     if ( DEBUG_MASK & DEBUG_INDEX_WORDS )
326     printf(")]");
327    
328     if ((s - worddata) == sz_worddata)
329     break; /* End of worddata */
330    
331     if ((unsigned long)(s - worddata) == nextposmetaname)
332     {
333     filenum = 0;
334     metaname = uncompress2(&s);
335     if (metaname)
336     {
337     nextposmetaname = UNPACKLONG2(s);
338     s += sizeof(long);
339     }
340     else
341     nextposmetaname = 0L;
342     }
343     }
344    
345     if ( DEBUG_MASK & DEBUG_INDEX_WORDS_META)
346     {
347     for(i = 0; i <= end_meta; i++)
348     {
349     if ( meta_used[i] )
350     printf( "\t%d", i );
351     meta_used[i] = 0;
352     }
353     }
354    
355    
356     if ( !( DEBUG_MASK & DEBUG_INDEX_WORDS_META ))
357     printf("\n");
358    
359     efree(worddata);
360     efree(resultword);
361     DB_ReadNextWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
362     }
363     }
364     DB_EndReadWords(sw, indexf->DB);
365    
366     efree( meta_used );
367     }
368    
369    
370    
371     /* Decode Stop Words: All them are in just one line */
372     if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_STOPWORDS) )
373     {
374     printf("\n\n-----> STOP WORDS in %s <-----\n" , indexf->line);
375     for(i=0;i<indexf->header.stopPos;i++)
376     printf("%s ",indexf->header.stopList[i]);
377     printf("\n");
378     }
379    
380    
381    
382     /* Decode File Info */
383     if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_FILES) )
384     dump_index_file_list( sw, indexf );
385    
386    
387     DB_Close(sw, indexf->DB);
388    
389     }
390    
391    
392     int check_sorted_index( SWISH *sw, IndexFILE *indexf, struct metaEntry *m )
393     {
394     unsigned char *buffer;
395     int sz_buffer;
396    
397     DB_InitReadSortedIndex(sw, indexf->DB);
398    
399     /* Get the sorted index of the property */
400     DB_ReadSortedIndex(sw, m->metaID, &buffer, &sz_buffer, indexf->DB);
401    
402     if ( sz_buffer )
403     efree( buffer );
404    
405     /* Table doesn't exist */
406     return sz_buffer;
407     }
408    
409    
410     void dump_metanames( SWISH *sw, IndexFILE *indexf, int check_presorted )
411     {
412     struct metaEntry *meta_entry;
413     int i;
414    
415     printf("\n\n-----> METANAMES for %s <-----\n", indexf->line );
416     for(i = 0; i < indexf->header.metaCounter; i++)
417     {
418     meta_entry = indexf->header.metaEntryArray[i];
419    
420     printf("%20s : id=%2d type=%2d ",meta_entry->metaName, meta_entry->metaID, meta_entry->metaType);
421    
422     if ( is_meta_index( meta_entry ) )
423     printf(" META_INDEX Rank Bias=%3d", meta_entry->rank_bias );
424    
425    
426    
427     if ( is_meta_internal( meta_entry ) )
428     printf(" META_INTERNAL");
429    
430    
431     if ( is_meta_property( meta_entry ) )
432     {
433     printf(" META_PROP:");
434    
435     if ( is_meta_string(meta_entry) )
436     printf("STRING(case:%s)", is_meta_ignore_case(meta_entry)? "ignore" : "compare");
437    
438     else if ( is_meta_date(meta_entry) )
439     printf("DATE");
440    
441     else if ( is_meta_number(meta_entry) )
442     printf("NUMBER");
443    
444     else
445     printf("unknown!");
446     }
447    
448    
449     if ( check_presorted && check_sorted_index( sw, indexf, meta_entry) )
450     printf(" *presorted*");
451    
452    
453     if ( meta_entry->alias )
454     {
455     struct metaEntry *m = is_meta_index( meta_entry )
456     ? getMetaNameByID( &indexf->header, meta_entry->alias )
457     : getPropNameByID( &indexf->header, meta_entry->alias );
458    
459     printf(" [Alias for %s (%d)]", m->metaName, m->metaID );
460     }
461    
462    
463     printf("\n");
464    
465     }
466     printf("\n");
467     }
468    
469    

  ViewVC Help
Powered by ViewVC 1.1.22