/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/dump.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/dump.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 /*
2 **
3 ** This program and library is free software; you can redistribute it and/or
4 ** modify it under the terms of the GNU (Library) General Public License
5 ** as published by the Free Software Foundation; either version 2
6 ** of the License, or any later version.
7 **
8 ** This program is distributed in the hope that it will be useful,
9 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
10 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 ** GNU (Library) General Public License for more details.
12 **
13 ** You should have received a copy of the GNU (Library) General Public License
14 ** along with this program; if not, write to the Free Software
15 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 **
17 **
18 **
19 ** 2001-05-07 jmruiz init coding
20 **
21 */
22
23 #include "swish.h"
24 #include "merge.h"
25 #include "docprop.h"
26 #include "hash.h"
27 #include "string.h"
28 #include "mem.h"
29 #include "db.h"
30 #include "compress.h"
31 #include "index.h"
32 #include "search.h"
33 #include "result_output.h"
34 #include "metanames.h"
35 #include "dump.h"
36
37
38
39
40 void dump_index_file_list( SWISH *sw, IndexFILE *indexf )
41 {
42 int i;
43 int end = indexf->header.totalfiles;
44
45 i = sw->Search->beginhits ? sw->Search->beginhits - 1 : 0;
46
47 if ( i >= indexf->header.totalfiles )
48 {
49 printf("Hey, there are only %d files\n", indexf->header.totalfiles );
50 exit(-1);
51 }
52
53 end = indexf->header.totalfiles;
54
55 if ( sw->Search->maxhits > 0 )
56 {
57 end = i + sw->Search->maxhits;
58 if ( end > indexf->header.totalfiles )
59 end = indexf->header.totalfiles;
60 }
61
62
63 printf("\n\n-----> FILES in index %s <-----\n", indexf->line );
64
65 for (; i < end; i++)
66 {
67 FileRec fi;
68
69 memset( &fi, 0, sizeof( FileRec ) );
70
71 fi.filenum = i+1;
72
73 fflush(stdout);
74 printf("Dumping File Properties for File Number: %d\n", i+1);
75
76
77 dump_file_properties( indexf, &fi );
78 printf("\n");
79
80
81 printf("ReadAllDocProperties:\n");
82 fi.docProperties = ReadAllDocPropertiesFromDisk( sw, indexf, i+1 );
83 dump_file_properties( indexf, &fi );
84 freefileinfo( &fi );
85
86 printf("\n");
87
88
89 /* dump one at a time */
90 {
91 propEntry *p;
92 int j;
93 struct metaEntry *meta_entry;
94 INDEXDATAHEADER *header = &indexf->header;
95 int count = header->property_count;
96
97 printf("ReadSingleDocPropertiesFromDisk:\n");
98
99 for (j=0; j< count; j++) // just for testing
100 {
101 int metaID = header->propIDX_to_metaID[j];
102
103 if ( !(p = ReadSingleDocPropertiesFromDisk(sw, indexf, &fi, metaID, 0 )) )
104 continue;
105
106 meta_entry = getPropNameByID( &indexf->header, metaID );
107 dump_single_property( p, meta_entry );
108
109 { // show compression
110 char *buffer;
111 int uncompressed_len;
112 int buf_len;
113
114 if ( (buffer = DB_ReadProperty( sw, indexf, &fi, meta_entry->metaID, &buf_len, &uncompressed_len, indexf->DB )))
115 {
116 if ( uncompressed_len )
117 printf(" %20s: %d -> %d (%4.2f%%)\n", "**Compressed**", uncompressed_len , buf_len, (float)buf_len/(float)uncompressed_len * 100.00f );
118
119 efree(buffer);
120 }
121 }
122
123
124
125 freeProperty( p );
126 }
127 }
128 printf("\n");
129
130
131 freefileinfo(&fi);
132 }
133 printf("\nNumber of File Entries: %d\n", indexf->header.totalfiles);
134 fflush(stdout);
135 }
136
137
138
139
140 /* Prints out the data in an index DB */
141 void DB_decompress(SWISH * sw, IndexFILE * indexf)
142 {
143 int i,
144 j,
145 c,
146 fieldnum,
147 frequency,
148 metaname,
149 tmpval,
150 filenum,
151 *posdata;
152 unsigned long nextposmetaname;
153 char word[2];
154 char *resultword;
155 unsigned char *worddata, *s, flag;
156 int sz_worddata;
157 long wordID;
158
159
160
161 indexf->DB = DB_Open(sw, indexf->line,DB_READ);
162
163 metaname = 0;
164
165 nextposmetaname = 0L;
166
167 c = 0;
168
169 frequency = 0;
170
171 /* Read header */
172 read_header(sw, &indexf->header, indexf->DB);
173
174
175 if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_HEADER) )
176 resultPrintHeader(sw, 0, &indexf->header, indexf->line, 0);
177
178 fieldnum = 0;
179
180
181 /* Do metanames first as that will be helpful for decoding next */
182 if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_METANAMES) )
183 dump_metanames( sw, indexf, 1 );
184
185 if (DEBUG_MASK & DEBUG_INDEX_WORDS_ONLY)
186 {
187 DB_InitReadWords(sw, indexf->DB);
188
189 for( j = 0; j < 256; j++ )
190 {
191 word[0] = (unsigned char) j;
192 word[1] = '\0';
193 DB_ReadFirstWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
194
195 while(wordID)
196 {
197 printf("%s\n",resultword);
198
199
200 efree(resultword);
201 DB_ReadNextWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
202
203 }
204 }
205 DB_EndReadWords(sw, indexf->DB);
206 }
207
208
209 else if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_WORDS | DEBUG_INDEX_WORDS_FULL | DEBUG_INDEX_WORDS_META) )
210 {
211 int *meta_used;
212 int end_meta = 0;
213
214 printf("\n-----> WORD INFO in index %s <-----\n", indexf->line);
215
216 for(i = 0; i < indexf->header.metaCounter; i++)
217 if ( indexf->header.metaEntryArray[i]->metaID > end_meta )
218 end_meta = indexf->header.metaEntryArray[i]->metaID;
219
220 meta_used = emalloc( sizeof(int) * ( end_meta + 1) );
221
222 /* _META only reports which tags the words are found in */
223 for(i = 0; i <= end_meta; i++)
224 meta_used[i] = 0;
225
226
227 DB_InitReadWords(sw, indexf->DB);
228
229 for(j=1;j<256;j++)
230 {
231 word[0] = (unsigned char) j; word[1] = '\0';
232 DB_ReadFirstWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
233
234 while(wordID && (((int)((unsigned char)resultword[0]))== j))
235 {
236 printf("\n%s",resultword);
237
238 /* Read Word's data */
239 DB_ReadWordData(sw, wordID, &worddata, &sz_worddata, indexf->DB);
240
241 /* parse and print word's data */
242 s = worddata;
243
244 tmpval = uncompress2(&s); /* tfrequency */
245 metaname = uncompress2(&s); /* metaname */
246 if (metaname)
247 {
248 nextposmetaname = UNPACKLONG2(s);
249 s += sizeof(long);
250 }
251
252 filenum = 0;
253 while(1)
254 { /* Read on all items */
255 uncompress_location_values(&s,&flag,&tmpval,&frequency);
256 filenum += tmpval;
257 posdata = (int *) emalloc(frequency * sizeof(int));
258 uncompress_location_positions(&s,flag,frequency,posdata);
259
260
261 // if (sw->verbose >= 4)
262 if (DEBUG_MASK & (DEBUG_INDEX_ALL|DEBUG_INDEX_WORDS_FULL))
263 {
264 struct metaEntry *m;
265
266 printf("\n Meta:%d", metaname);
267
268
269 /* Get path from property list */
270 if ( (m = getPropNameByName( &sw->indexlist->header, AUTOPROPERTY_DOCPATH )) )
271 {
272 RESULT r;
273 char *s;
274
275 memset( &r, 0, sizeof( RESULT ) );
276
277 r.indexf = indexf;
278 r.filenum = filenum;
279 r.fi.filenum = filenum;
280
281 s = getResultPropAsString( sw, &r, m->metaID);
282
283 printf(" %s", s );
284 efree( s );
285
286 }
287 else
288 printf(" Failed to lookup meta entry");
289
290
291 printf(" Freq:%d", frequency);
292 printf(" Pos/Struct:");
293 }
294 else if ( DEBUG_MASK & DEBUG_INDEX_WORDS_META)
295 meta_used[ metaname ]++;
296 else
297 {
298 printf(" [%d", metaname);
299 printf(" %d", filenum);
300 printf(" %d (", frequency);
301 }
302
303
304 for (i = 0; i < frequency; i++)
305 {
306 if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_WORDS_FULL))
307 //if (sw->verbose >= 4)
308 {
309 if (i)
310 printf(",%d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
311 else
312 printf("%d/%x", GET_POSITION(posdata[i]), GET_STRUCTURE(posdata[i]));
313 }
314 else if ( DEBUG_MASK & DEBUG_INDEX_WORDS)
315 {
316 if (i)
317 printf(" %d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
318 else
319 printf("%d/%x", GET_POSITION(posdata[i]),GET_STRUCTURE(posdata[i]));
320 }
321 }
322
323 efree(posdata);
324
325 if ( DEBUG_MASK & DEBUG_INDEX_WORDS )
326 printf(")]");
327
328 if ((s - worddata) == sz_worddata)
329 break; /* End of worddata */
330
331 if ((unsigned long)(s - worddata) == nextposmetaname)
332 {
333 filenum = 0;
334 metaname = uncompress2(&s);
335 if (metaname)
336 {
337 nextposmetaname = UNPACKLONG2(s);
338 s += sizeof(long);
339 }
340 else
341 nextposmetaname = 0L;
342 }
343 }
344
345 if ( DEBUG_MASK & DEBUG_INDEX_WORDS_META)
346 {
347 for(i = 0; i <= end_meta; i++)
348 {
349 if ( meta_used[i] )
350 printf( "\t%d", i );
351 meta_used[i] = 0;
352 }
353 }
354
355
356 if ( !( DEBUG_MASK & DEBUG_INDEX_WORDS_META ))
357 printf("\n");
358
359 efree(worddata);
360 efree(resultword);
361 DB_ReadNextWordInvertedIndex(sw, word,&resultword,&wordID,indexf->DB);
362 }
363 }
364 DB_EndReadWords(sw, indexf->DB);
365
366 efree( meta_used );
367 }
368
369
370
371 /* Decode Stop Words: All them are in just one line */
372 if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_STOPWORDS) )
373 {
374 printf("\n\n-----> STOP WORDS in %s <-----\n" , indexf->line);
375 for(i=0;i<indexf->header.stopPos;i++)
376 printf("%s ",indexf->header.stopList[i]);
377 printf("\n");
378 }
379
380
381
382 /* Decode File Info */
383 if (DEBUG_MASK & (DEBUG_INDEX_ALL | DEBUG_INDEX_FILES) )
384 dump_index_file_list( sw, indexf );
385
386
387 DB_Close(sw, indexf->DB);
388
389 }
390
391
392 int check_sorted_index( SWISH *sw, IndexFILE *indexf, struct metaEntry *m )
393 {
394 unsigned char *buffer;
395 int sz_buffer;
396
397 DB_InitReadSortedIndex(sw, indexf->DB);
398
399 /* Get the sorted index of the property */
400 DB_ReadSortedIndex(sw, m->metaID, &buffer, &sz_buffer, indexf->DB);
401
402 if ( sz_buffer )
403 efree( buffer );
404
405 /* Table doesn't exist */
406 return sz_buffer;
407 }
408
409
410 void dump_metanames( SWISH *sw, IndexFILE *indexf, int check_presorted )
411 {
412 struct metaEntry *meta_entry;
413 int i;
414
415 printf("\n\n-----> METANAMES for %s <-----\n", indexf->line );
416 for(i = 0; i < indexf->header.metaCounter; i++)
417 {
418 meta_entry = indexf->header.metaEntryArray[i];
419
420 printf("%20s : id=%2d type=%2d ",meta_entry->metaName, meta_entry->metaID, meta_entry->metaType);
421
422 if ( is_meta_index( meta_entry ) )
423 printf(" META_INDEX Rank Bias=%3d", meta_entry->rank_bias );
424
425
426
427 if ( is_meta_internal( meta_entry ) )
428 printf(" META_INTERNAL");
429
430
431 if ( is_meta_property( meta_entry ) )
432 {
433 printf(" META_PROP:");
434
435 if ( is_meta_string(meta_entry) )
436 printf("STRING(case:%s)", is_meta_ignore_case(meta_entry)? "ignore" : "compare");
437
438 else if ( is_meta_date(meta_entry) )
439 printf("DATE");
440
441 else if ( is_meta_number(meta_entry) )
442 printf("NUMBER");
443
444 else
445 printf("unknown!");
446 }
447
448
449 if ( check_presorted && check_sorted_index( sw, indexf, meta_entry) )
450 printf(" *presorted*");
451
452
453 if ( meta_entry->alias )
454 {
455 struct metaEntry *m = is_meta_index( meta_entry )
456 ? getMetaNameByID( &indexf->header, meta_entry->alias )
457 : getPropNameByID( &indexf->header, meta_entry->alias );
458
459 printf(" [Alias for %s (%d)]", m->metaName, m->metaID );
460 }
461
462
463 printf("\n");
464
465 }
466 printf("\n");
467 }
468
469

  ViewVC Help
Powered by ViewVC 1.1.22