1 |
adcroft |
1.1 |
/* |
2 |
|
|
** |
3 |
|
|
** This program and library is free software; you can redistribute it and/or |
4 |
|
|
** modify it under the terms of the GNU (Library) General Public License |
5 |
|
|
** as published by the Free Software Foundation; either version 2 |
6 |
|
|
** of the License, or any later version. |
7 |
|
|
** |
8 |
|
|
** This program is distributed in the hope that it will be useful, |
9 |
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 |
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 |
|
|
** GNU (Library) General Public License for more details. |
12 |
|
|
** |
13 |
|
|
** You should have received a copy of the GNU (Library) General Public License |
14 |
|
|
** along with this program; if not, write to the Free Software |
15 |
|
|
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
16 |
|
|
** |
17 |
|
|
** |
18 |
|
|
** |
19 |
|
|
** 2001-05-07 jmruiz init coding |
20 |
|
|
** |
21 |
|
|
*/ |
22 |
|
|
|
23 |
|
|
#include <time.h> |
24 |
|
|
#include "swish.h" |
25 |
|
|
#include "mem.h" |
26 |
|
|
#include "file.h" |
27 |
|
|
#include "error.h" |
28 |
|
|
#include "string.h" |
29 |
|
|
#include "compress.h" |
30 |
|
|
#include "hash.h" |
31 |
|
|
#include "db.h" |
32 |
|
|
#include "swish_qsort.h" |
33 |
|
|
#include "ramdisk.h" |
34 |
|
|
#include "db_native.h" |
35 |
|
|
|
36 |
|
|
#ifdef USE_BTREE |
37 |
|
|
#define WRITE_WORDS_RAMDISK 0 |
38 |
|
|
#else |
39 |
|
|
#define WRITE_WORDS_RAMDISK 1 |
40 |
|
|
#endif |
41 |
|
|
|
42 |
|
|
// #define DEBUG_PROP 1 |
43 |
|
|
|
44 |
|
|
/* |
45 |
|
|
-- init structures for this module |
46 |
|
|
*/ |
47 |
|
|
|
48 |
|
|
void initModule_DBNative(SWISH * sw) |
49 |
|
|
{ |
50 |
|
|
struct MOD_DB *Db; |
51 |
|
|
|
52 |
|
|
Db = (struct MOD_DB *) emalloc(sizeof(struct MOD_DB)); |
53 |
|
|
|
54 |
|
|
Db->DB_name = (char *) estrdup("native"); |
55 |
|
|
|
56 |
|
|
Db->DB_Create = DB_Create_Native; |
57 |
|
|
Db->DB_Open = DB_Open_Native; |
58 |
|
|
Db->DB_Close = DB_Close_Native; |
59 |
|
|
Db->DB_Remove = DB_Remove_Native; |
60 |
|
|
|
61 |
|
|
Db->DB_InitWriteHeader = DB_InitWriteHeader_Native; |
62 |
|
|
Db->DB_WriteHeaderData = DB_WriteHeaderData_Native; |
63 |
|
|
Db->DB_EndWriteHeader = DB_EndWriteHeader_Native; |
64 |
|
|
|
65 |
|
|
Db->DB_InitReadHeader = DB_InitReadHeader_Native; |
66 |
|
|
Db->DB_ReadHeaderData = DB_ReadHeaderData_Native; |
67 |
|
|
Db->DB_EndReadHeader = DB_EndReadHeader_Native; |
68 |
|
|
|
69 |
|
|
Db->DB_InitWriteWords = DB_InitWriteWords_Native; |
70 |
|
|
Db->DB_GetWordID = DB_GetWordID_Native; |
71 |
|
|
Db->DB_WriteWord = DB_WriteWord_Native; |
72 |
|
|
|
73 |
|
|
#ifndef USE_BTREE |
74 |
|
|
Db->DB_WriteWordHash = DB_WriteWordHash_Native; |
75 |
|
|
#else |
76 |
|
|
Db->DB_UpdateWordID = DB_UpdateWordID_Native; |
77 |
|
|
Db->DB_DeleteWordData = DB_DeleteWordData_Native; |
78 |
|
|
#endif |
79 |
|
|
|
80 |
|
|
Db->DB_WriteWordData = DB_WriteWordData_Native; |
81 |
|
|
Db->DB_EndWriteWords = DB_EndWriteWords_Native; |
82 |
|
|
|
83 |
|
|
Db->DB_InitReadWords = DB_InitReadWords_Native; |
84 |
|
|
Db->DB_ReadWordHash = DB_ReadWordHash_Native; |
85 |
|
|
Db->DB_ReadFirstWordInvertedIndex = DB_ReadFirstWordInvertedIndex_Native; |
86 |
|
|
Db->DB_ReadNextWordInvertedIndex = DB_ReadNextWordInvertedIndex_Native; |
87 |
|
|
Db->DB_ReadWordData = DB_ReadWordData_Native; |
88 |
|
|
Db->DB_EndReadWords = DB_EndReadWords_Native; |
89 |
|
|
|
90 |
|
|
Db->DB_InitWriteFiles = DB_InitWriteFiles_Native; |
91 |
|
|
Db->DB_WriteFile = DB_WriteFile_Native; |
92 |
|
|
Db->DB_EndWriteFiles = DB_EndWriteFiles_Native; |
93 |
|
|
|
94 |
|
|
Db->DB_InitReadFiles = DB_InitReadFiles_Native; |
95 |
|
|
Db->DB_ReadFile = DB_ReadFile_Native; |
96 |
|
|
Db->DB_EndReadFiles = DB_EndReadFiles_Native; |
97 |
|
|
|
98 |
|
|
Db->DB_InitWriteSortedIndex = DB_InitWriteSortedIndex_Native; |
99 |
|
|
Db->DB_WriteSortedIndex = DB_WriteSortedIndex_Native; |
100 |
|
|
Db->DB_EndWriteSortedIndex = DB_EndWriteSortedIndex_Native; |
101 |
|
|
|
102 |
|
|
Db->DB_InitReadSortedIndex = DB_InitReadSortedIndex_Native; |
103 |
|
|
Db->DB_ReadSortedIndex = DB_ReadSortedIndex_Native; |
104 |
|
|
Db->DB_ReadSortedData = DB_ReadSortedData_Native; |
105 |
|
|
Db->DB_EndReadSortedIndex = DB_EndReadSortedIndex_Native; |
106 |
|
|
|
107 |
|
|
Db->DB_WriteProperty = DB_WriteProperty_Native; |
108 |
|
|
Db->DB_WritePropPositions = DB_WritePropPositions_Native; |
109 |
|
|
Db->DB_ReadProperty = DB_ReadProperty_Native; |
110 |
|
|
Db->DB_ReadPropPositions = DB_ReadPropPositions_Native; |
111 |
|
|
Db->DB_Reopen_PropertiesForRead = DB_Reopen_PropertiesForRead_Native; |
112 |
|
|
|
113 |
|
|
#ifdef USE_BTREE |
114 |
|
|
Db->DB_WriteTotalWordsPerFile = DB_WriteTotalWordsPerFile_Native; |
115 |
|
|
Db->DB_ReadTotalWordsPerFile = DB_ReadTotalWordsPerFile_Native; |
116 |
|
|
#endif |
117 |
|
|
|
118 |
|
|
sw->Db = Db; |
119 |
|
|
|
120 |
|
|
return; |
121 |
|
|
} |
122 |
|
|
|
123 |
|
|
|
124 |
|
|
/* |
125 |
|
|
-- release all wired memory for this module |
126 |
|
|
*/ |
127 |
|
|
|
128 |
|
|
void freeModule_DBNative(SWISH * sw) |
129 |
|
|
{ |
130 |
|
|
efree(sw->Db->DB_name); |
131 |
|
|
efree(sw->Db); |
132 |
|
|
sw->Db = NULL; |
133 |
|
|
return; |
134 |
|
|
} |
135 |
|
|
|
136 |
|
|
|
137 |
|
|
|
138 |
|
|
/* ---------------------------------------------- */ |
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
|
|
/* |
143 |
|
|
-- Config Directives |
144 |
|
|
-- Configuration directives for this Module |
145 |
|
|
-- return: 0/1 = none/config applied |
146 |
|
|
*/ |
147 |
|
|
|
148 |
|
|
int configModule_DBNative(SWISH * sw, StringList * sl) |
149 |
|
|
{ |
150 |
|
|
// struct MOD_DBNative *md = sw->DBNative; |
151 |
|
|
// char *w0 = sl->word[0]; |
152 |
|
|
int retval = 1; |
153 |
|
|
|
154 |
|
|
|
155 |
|
|
retval = 0; // tmp due to empty routine |
156 |
|
|
|
157 |
|
|
return retval; |
158 |
|
|
} |
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
|
165 |
|
|
/* Does an index file have a readable format? |
166 |
|
|
*/ |
167 |
|
|
|
168 |
|
|
static void DB_CheckHeader(struct Handle_DBNative *DB) |
169 |
|
|
{ |
170 |
|
|
long swish_magic; |
171 |
|
|
|
172 |
|
|
fseek(DB->fp, 0, 0); |
173 |
|
|
swish_magic = readlong(DB->fp, fread); |
174 |
|
|
|
175 |
|
|
if (swish_magic != SWISH_MAGIC) |
176 |
|
|
{ |
177 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "File \"%s\" has an unknown format.", DB->cur_index_file); |
178 |
|
|
return; |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
|
182 |
|
|
|
183 |
|
|
{ |
184 |
|
|
long |
185 |
|
|
#ifdef USE_BTREE |
186 |
|
|
btree, |
187 |
|
|
worddata, |
188 |
|
|
array, |
189 |
|
|
presorted, |
190 |
|
|
#endif |
191 |
|
|
prop; |
192 |
|
|
|
193 |
|
|
DB->unique_ID = readlong(DB->fp, fread); |
194 |
|
|
prop = readlong(DB->prop, fread); |
195 |
|
|
|
196 |
|
|
if (DB->unique_ID != prop) |
197 |
|
|
{ |
198 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "Index file '%s' and property file '%s' are not related.", DB->cur_index_file, DB->cur_prop_file); |
199 |
|
|
return; |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
#ifdef USE_BTREE |
203 |
|
|
btree = readlong(DB->fp_btree, fread); |
204 |
|
|
if (DB->unique_ID != btree) |
205 |
|
|
{ |
206 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "Index file '%s' and btree file '%s' are not related.", DB->cur_index_file, DB->cur_btree_file); |
207 |
|
|
return; |
208 |
|
|
} |
209 |
|
|
|
210 |
|
|
worddata = readlong(DB->fp_worddata, fread); |
211 |
|
|
if (DB->unique_ID != worddata) |
212 |
|
|
{ |
213 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "Index file '%s' and worddata file '%s' are not related.", DB->cur_index_file, DB->cur_worddata_file); |
214 |
|
|
return; |
215 |
|
|
} |
216 |
|
|
|
217 |
|
|
array = readlong(DB->fp_array, fread); |
218 |
|
|
if (DB->unique_ID != array) |
219 |
|
|
{ |
220 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "Index file '%s' and array file '%s' are not related.", DB->cur_index_file, DB->cur_array_file); |
221 |
|
|
return; |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
presorted = readlong(DB->fp_presorted, fread); |
225 |
|
|
|
226 |
|
|
if (DB->unique_ID != presorted) |
227 |
|
|
{ |
228 |
|
|
set_progerr(INDEX_FILE_ERROR, DB->sw, "Index file '%s' and presorted index file '%s' are not related.", DB->cur_index_file, DB->cur_presorted_file); |
229 |
|
|
return; |
230 |
|
|
} |
231 |
|
|
#endif |
232 |
|
|
} |
233 |
|
|
|
234 |
|
|
} |
235 |
|
|
|
236 |
|
|
static struct Handle_DBNative *newNativeDBHandle(SWISH *sw, char *dbname) |
237 |
|
|
{ |
238 |
|
|
struct Handle_DBNative *DB; |
239 |
|
|
|
240 |
|
|
/* Allocate structure */ |
241 |
|
|
DB = (struct Handle_DBNative *) emalloc(sizeof(struct Handle_DBNative)); |
242 |
|
|
|
243 |
|
|
DB->sw = sw; /* for error messages */ |
244 |
|
|
|
245 |
|
|
DB->offsetstart = 0; |
246 |
|
|
#ifndef USE_BTREE |
247 |
|
|
DB->hashstart = 0; |
248 |
|
|
#endif |
249 |
|
|
|
250 |
|
|
DB->nextwordoffset = 0; |
251 |
|
|
DB->num_words = 0; |
252 |
|
|
|
253 |
|
|
#ifndef USE_BTREE |
254 |
|
|
DB->wordhash_counter = 0; |
255 |
|
|
DB->wordhashdata = NULL; |
256 |
|
|
#endif |
257 |
|
|
|
258 |
|
|
DB->worddata_counter = 0; |
259 |
|
|
DB->lastsortedindex = 0; |
260 |
|
|
DB->next_sortedindex = 0; |
261 |
|
|
|
262 |
|
|
#ifndef USE_BTREE |
263 |
|
|
DB->rd = NULL; |
264 |
|
|
#endif |
265 |
|
|
|
266 |
|
|
DB->tmp_index = 0; /* flags that the index is opened as create as a temporary file name */ |
267 |
|
|
DB->tmp_prop = 0; |
268 |
|
|
DB->cur_index_file = NULL; |
269 |
|
|
DB->cur_prop_file = NULL; |
270 |
|
|
DB->fp = NULL; |
271 |
|
|
DB->prop = NULL; |
272 |
|
|
|
273 |
|
|
#ifdef USE_BTREE |
274 |
|
|
DB->bt = NULL; |
275 |
|
|
DB->fp_btree = NULL; |
276 |
|
|
DB->tmp_btree = 0; |
277 |
|
|
DB->cur_btree_file = NULL; |
278 |
|
|
|
279 |
|
|
DB->worddata = NULL; |
280 |
|
|
DB->fp_worddata = NULL; |
281 |
|
|
DB->tmp_worddata = 0; |
282 |
|
|
DB->cur_worddata_file = NULL; |
283 |
|
|
|
284 |
|
|
DB->fp_array = NULL; |
285 |
|
|
DB->tmp_array = 0; |
286 |
|
|
DB->cur_array_file = NULL; |
287 |
|
|
|
288 |
|
|
DB->presorted_array = NULL; |
289 |
|
|
DB->presorted_root_node = NULL; |
290 |
|
|
DB->presorted_propid = NULL; |
291 |
|
|
DB->n_presorted_array = 0; |
292 |
|
|
DB->tmp_presorted = 0; |
293 |
|
|
DB->cur_presorted_file = NULL; |
294 |
|
|
DB->fp_presorted = NULL; |
295 |
|
|
DB->cur_presorted_array = NULL; |
296 |
|
|
DB->cur_presorted_propid = 0; |
297 |
|
|
DB->totwords_array = NULL; |
298 |
|
|
DB->props_array = NULL; |
299 |
|
|
#endif |
300 |
|
|
|
301 |
|
|
|
302 |
|
|
if (WRITE_WORDS_RAMDISK) |
303 |
|
|
{ |
304 |
|
|
DB->w_tell = ramdisk_tell; |
305 |
|
|
DB->w_write = ramdisk_write; |
306 |
|
|
DB->w_seek = ramdisk_seek; |
307 |
|
|
DB->w_read = ramdisk_read; |
308 |
|
|
DB->w_close = ramdisk_close; |
309 |
|
|
DB->w_putc = ramdisk_putc; |
310 |
|
|
DB->w_getc = ramdisk_getc; |
311 |
|
|
} |
312 |
|
|
else |
313 |
|
|
{ |
314 |
|
|
DB->w_tell = ftell; |
315 |
|
|
DB->w_write = fwrite; |
316 |
|
|
DB->w_seek = fseek; |
317 |
|
|
DB->w_read = fread; |
318 |
|
|
DB->w_close = fclose; |
319 |
|
|
DB->w_putc = fputc; |
320 |
|
|
DB->w_getc = fgetc; |
321 |
|
|
} |
322 |
|
|
|
323 |
|
|
DB->dbname = estrdup(dbname); |
324 |
|
|
|
325 |
|
|
return DB; |
326 |
|
|
} |
327 |
|
|
|
328 |
|
|
|
329 |
|
|
void *DB_Create_Native(SWISH *sw, char *dbname) |
330 |
|
|
{ |
331 |
|
|
int i; |
332 |
|
|
long swish_magic; |
333 |
|
|
char *filename; |
334 |
|
|
#ifdef USE_BTREE |
335 |
|
|
FILE *fp_tmp; |
336 |
|
|
#endif |
337 |
|
|
struct Handle_DBNative *DB; |
338 |
|
|
|
339 |
|
|
swish_magic = SWISH_MAGIC; |
340 |
|
|
/* Allocate structure */ |
341 |
|
|
DB = (struct Handle_DBNative *) newNativeDBHandle(sw, dbname); |
342 |
|
|
DB->mode = DB_CREATE; |
343 |
|
|
DB->unique_ID = (long) time(NULL); /* Ok, so if more than one index is created the second... */ |
344 |
|
|
|
345 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
346 |
|
|
filename = emalloc(strlen(dbname) + strlen(USE_TEMPFILE_EXTENSION) + strlen(PROPFILE_EXTENSION) + strlen(BTREE_EXTENSION) + strlen(WORDDATA_EXTENSION) + strlen(ARRAY_EXTENSION) + strlen(PRESORTED_EXTENSION) + 1); |
347 |
|
|
strcpy(filename, dbname); |
348 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
349 |
|
|
DB->tmp_index = 1; |
350 |
|
|
#else |
351 |
|
|
filename = emalloc(strlen(dbname) + strlen(PROPFILE_EXTENSION) + +strlen(BTREE_EXTENSION) + strlen(WORDDATA_EXTENSION) + strlen(ARRAY_EXTENSION) + strlen(PRESORTED_EXTENSION) + 1); |
352 |
|
|
strcpy(filename, dbname); |
353 |
|
|
#endif |
354 |
|
|
|
355 |
|
|
|
356 |
|
|
/* Create index File */ |
357 |
|
|
|
358 |
|
|
CreateEmptyFile(filename); |
359 |
|
|
if (!(DB->fp = openIndexFILEForReadAndWrite(filename))) |
360 |
|
|
progerrno("Couldn't create the index file \"%s\": ", filename); |
361 |
|
|
|
362 |
|
|
DB->cur_index_file = estrdup(filename); |
363 |
|
|
printlong(DB->fp, swish_magic, fwrite); |
364 |
|
|
printlong(DB->fp, DB->unique_ID, fwrite); |
365 |
|
|
|
366 |
|
|
|
367 |
|
|
/* Create property File */ |
368 |
|
|
strcpy(filename, dbname); |
369 |
|
|
strcat(filename, PROPFILE_EXTENSION); |
370 |
|
|
|
371 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
372 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
373 |
|
|
DB->tmp_prop = 1; |
374 |
|
|
#endif |
375 |
|
|
|
376 |
|
|
CreateEmptyFile(filename); |
377 |
|
|
if (!(DB->prop = openIndexFILEForWrite(filename))) |
378 |
|
|
progerrno("Couldn't create the property file \"%s\": ", filename); |
379 |
|
|
|
380 |
|
|
DB->cur_prop_file = estrdup(filename); |
381 |
|
|
printlong(DB->prop, DB->unique_ID, fwrite); |
382 |
|
|
|
383 |
|
|
|
384 |
|
|
#ifdef USE_BTREE |
385 |
|
|
/* Create Btree File */ |
386 |
|
|
strcpy(filename, dbname); |
387 |
|
|
strcat(filename, BTREE_EXTENSION); |
388 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
389 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
390 |
|
|
DB->tmp_btree = 1; |
391 |
|
|
#endif |
392 |
|
|
CreateEmptyFile(filename); |
393 |
|
|
if (!(fp_tmp = openIndexFILEForReadAndWrite(filename))) |
394 |
|
|
progerrno("Couldn't create the btree file \"%s\": ", filename); |
395 |
|
|
DB->cur_btree_file = estrdup(filename); |
396 |
|
|
printlong(fp_tmp, DB->unique_ID, fwrite); |
397 |
|
|
DB->fp_btree = fp_tmp; |
398 |
|
|
DB->bt=BTREE_Create(DB->fp_btree,4096); |
399 |
|
|
|
400 |
|
|
|
401 |
|
|
/* Create WordData File */ |
402 |
|
|
strcpy(filename, dbname); |
403 |
|
|
strcat(filename, WORDDATA_EXTENSION); |
404 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
405 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
406 |
|
|
DB->tmp_worddata = 1; |
407 |
|
|
#endif |
408 |
|
|
CreateEmptyFile(filename); |
409 |
|
|
if (!(fp_tmp = openIndexFILEForReadAndWrite(filename))) |
410 |
|
|
progerrno("Couldn't create the worddata file \"%s\": ", filename); |
411 |
|
|
printlong(fp_tmp, DB->unique_ID, fwrite); |
412 |
|
|
DB->fp_worddata = fp_tmp; |
413 |
|
|
DB->cur_worddata_file = estrdup(filename); |
414 |
|
|
DB->worddata=WORDDATA_Open(DB->fp_worddata); |
415 |
|
|
|
416 |
|
|
/* Create Array File */ |
417 |
|
|
strcpy(filename, dbname); |
418 |
|
|
strcat(filename, ARRAY_EXTENSION); |
419 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
420 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
421 |
|
|
DB->tmp_array = 1; |
422 |
|
|
#endif |
423 |
|
|
CreateEmptyFile(filename); |
424 |
|
|
if (!(fp_tmp = openIndexFILEForReadAndWrite(filename))) |
425 |
|
|
progerrno("Couldn't create the array file \"%s\": ", filename); |
426 |
|
|
printlong(fp_tmp, DB->unique_ID, fwrite); |
427 |
|
|
DB->cur_array_file = estrdup(filename); |
428 |
|
|
DB->fp_array = fp_tmp; |
429 |
|
|
DB->totwords_array = ARRAY_Create(DB->fp_array); |
430 |
|
|
DB->props_array = ARRAY_Create(DB->fp_array); |
431 |
|
|
|
432 |
|
|
/* Create PreSorted Index File */ |
433 |
|
|
strcpy(filename, dbname); |
434 |
|
|
strcat(filename, PRESORTED_EXTENSION); |
435 |
|
|
|
436 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
437 |
|
|
strcat(filename, USE_TEMPFILE_EXTENSION); |
438 |
|
|
DB->tmp_presorted = 1; |
439 |
|
|
#endif |
440 |
|
|
|
441 |
|
|
CreateEmptyFile(filename); |
442 |
|
|
if (!(DB->fp_presorted = openIndexFILEForWrite(filename))) |
443 |
|
|
progerrno("Couldn't create the presorted index file \"%s\": ", filename); |
444 |
|
|
|
445 |
|
|
DB->cur_presorted_file = estrdup(filename); |
446 |
|
|
printlong(DB->fp_presorted, DB->unique_ID, fwrite); |
447 |
|
|
|
448 |
|
|
#endif |
449 |
|
|
|
450 |
|
|
efree(filename); |
451 |
|
|
|
452 |
|
|
|
453 |
|
|
for (i = 0; i < MAXCHARS; i++) |
454 |
|
|
DB->offsets[i] = 0L; |
455 |
|
|
|
456 |
|
|
#ifndef USE_BTREE |
457 |
|
|
for (i = 0; i < VERYBIGHASHSIZE; i++) |
458 |
|
|
DB->hashoffsets[i] = 0L; |
459 |
|
|
for (i = 0; i < VERYBIGHASHSIZE; i++) |
460 |
|
|
DB->lasthashval[i] = 0L; |
461 |
|
|
#endif |
462 |
|
|
|
463 |
|
|
|
464 |
|
|
|
465 |
|
|
|
466 |
|
|
/* Reserve space for offset pointers */ |
467 |
|
|
DB->offsetstart = ftell(DB->fp); |
468 |
|
|
for (i = 0; i < MAXCHARS; i++) |
469 |
|
|
printlong(DB->fp, (long) 0, fwrite); |
470 |
|
|
|
471 |
|
|
#ifndef USE_BTREE |
472 |
|
|
DB->hashstart = ftell(DB->fp); |
473 |
|
|
for (i = 0; i < VERYBIGHASHSIZE; i++) |
474 |
|
|
printlong(DB->fp, (long) 0, fwrite); |
475 |
|
|
#endif |
476 |
|
|
|
477 |
|
|
return (void *) DB; |
478 |
|
|
} |
479 |
|
|
|
480 |
|
|
|
481 |
|
|
/******************************************************************* |
482 |
|
|
* DB_Open_Native |
483 |
|
|
* |
484 |
|
|
*******************************************************************/ |
485 |
|
|
|
486 |
|
|
void *DB_Open_Native(SWISH *sw, char *dbname,int mode) |
487 |
|
|
{ |
488 |
|
|
struct Handle_DBNative *DB; |
489 |
|
|
int i; |
490 |
|
|
FILE *(*openRoutine)(char *) = NULL; |
491 |
|
|
char *s; |
492 |
|
|
#ifdef USE_BTREE |
493 |
|
|
FILE *fp_tmp; |
494 |
|
|
#endif |
495 |
|
|
|
496 |
|
|
switch(mode) |
497 |
|
|
{ |
498 |
|
|
case DB_READ: |
499 |
|
|
openRoutine = openIndexFILEForRead; |
500 |
|
|
break; |
501 |
|
|
case DB_READWRITE: |
502 |
|
|
openRoutine = openIndexFILEForReadAndWrite; |
503 |
|
|
break; |
504 |
|
|
default: |
505 |
|
|
openRoutine = openIndexFILEForRead; |
506 |
|
|
} |
507 |
|
|
|
508 |
|
|
DB = (struct Handle_DBNative *) newNativeDBHandle(sw, dbname); |
509 |
|
|
DB->mode = mode; |
510 |
|
|
|
511 |
|
|
/* Open index File */ |
512 |
|
|
if (!(DB->fp = openRoutine(dbname))) |
513 |
|
|
{ |
514 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Could not open the index file '%s': ", dbname); |
515 |
|
|
return (void *) DB; |
516 |
|
|
} |
517 |
|
|
|
518 |
|
|
DB->cur_index_file = estrdup(dbname); |
519 |
|
|
|
520 |
|
|
s = emalloc(strlen(dbname) + strlen(PROPFILE_EXTENSION) + 1); |
521 |
|
|
|
522 |
|
|
strcpy(s, dbname); |
523 |
|
|
strcat(s, PROPFILE_EXTENSION); |
524 |
|
|
|
525 |
|
|
if (!(DB->prop = openRoutine(s))) |
526 |
|
|
{ |
527 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the property file \"%s\": ", s); |
528 |
|
|
return (void *) DB; |
529 |
|
|
} |
530 |
|
|
|
531 |
|
|
DB->cur_prop_file = s; |
532 |
|
|
|
533 |
|
|
#ifdef USE_BTREE |
534 |
|
|
|
535 |
|
|
s = emalloc(strlen(dbname) + strlen(BTREE_EXTENSION) + 1); |
536 |
|
|
|
537 |
|
|
strcpy(s, dbname); |
538 |
|
|
strcat(s, BTREE_EXTENSION); |
539 |
|
|
|
540 |
|
|
if (!(fp_tmp = openRoutine(s))) |
541 |
|
|
{ |
542 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the btree file \"%s\": ", s); |
543 |
|
|
return (void *) DB; |
544 |
|
|
} |
545 |
|
|
|
546 |
|
|
|
547 |
|
|
|
548 |
|
|
DB->fp_btree = fp_tmp; |
549 |
|
|
DB->cur_btree_file = s; |
550 |
|
|
|
551 |
|
|
s = emalloc(strlen(dbname) + strlen(PRESORTED_EXTENSION) + 1); |
552 |
|
|
|
553 |
|
|
strcpy(s, dbname); |
554 |
|
|
strcat(s, PRESORTED_EXTENSION); |
555 |
|
|
|
556 |
|
|
if (!(DB->fp_presorted = openRoutine(s))) |
557 |
|
|
{ |
558 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the presorted index file \"%s\": ", s); |
559 |
|
|
return (void *) DB; |
560 |
|
|
} |
561 |
|
|
|
562 |
|
|
|
563 |
|
|
DB->cur_presorted_file = s; |
564 |
|
|
|
565 |
|
|
|
566 |
|
|
|
567 |
|
|
s = emalloc(strlen(dbname) + strlen(WORDDATA_EXTENSION) + 1); |
568 |
|
|
|
569 |
|
|
strcpy(s, dbname); |
570 |
|
|
strcat(s, WORDDATA_EXTENSION); |
571 |
|
|
|
572 |
|
|
if (!(fp_tmp = openRoutine(s))) |
573 |
|
|
{ |
574 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the worddata file \"%s\": ", s); |
575 |
|
|
return (void *) DB; |
576 |
|
|
} |
577 |
|
|
|
578 |
|
|
|
579 |
|
|
DB->fp_worddata = fp_tmp; |
580 |
|
|
DB->cur_worddata_file = s; |
581 |
|
|
|
582 |
|
|
s = emalloc(strlen(dbname) + strlen(ARRAY_EXTENSION) + 1); |
583 |
|
|
|
584 |
|
|
strcpy(s, dbname); |
585 |
|
|
strcat(s, ARRAY_EXTENSION); |
586 |
|
|
|
587 |
|
|
if (!(fp_tmp = openRoutine(s))) |
588 |
|
|
{ |
589 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the array file \"%s\": ", s); |
590 |
|
|
return (void *) DB; |
591 |
|
|
} |
592 |
|
|
|
593 |
|
|
DB->fp_array = fp_tmp; |
594 |
|
|
DB->cur_array_file = s; |
595 |
|
|
|
596 |
|
|
s = emalloc(strlen(dbname) + strlen(PRESORTED_EXTENSION) + 1); |
597 |
|
|
|
598 |
|
|
strcpy(s, dbname); |
599 |
|
|
strcat(s, PRESORTED_EXTENSION); |
600 |
|
|
|
601 |
|
|
if (!(DB->fp_presorted = openRoutine(s))) |
602 |
|
|
{ |
603 |
|
|
set_progerrno(INDEX_FILE_ERROR, DB->sw, "Couldn't open the presorted index file \"%s\": ", s); |
604 |
|
|
return (void *) DB; |
605 |
|
|
} |
606 |
|
|
|
607 |
|
|
DB->cur_presorted_file = s; |
608 |
|
|
|
609 |
|
|
#endif |
610 |
|
|
|
611 |
|
|
/* Validate index files */ |
612 |
|
|
DB_CheckHeader(DB); |
613 |
|
|
if ( DB->sw->lasterror ) |
614 |
|
|
return (void *) DB; |
615 |
|
|
|
616 |
|
|
/* Read offsets lookuptable */ |
617 |
|
|
DB->offsetstart = ftell(DB->fp); |
618 |
|
|
for (i = 0; i < MAXCHARS; i++) |
619 |
|
|
DB->offsets[i] = readlong(DB->fp, fread); |
620 |
|
|
|
621 |
|
|
#ifndef USE_BTREE |
622 |
|
|
/* Read hashoffsets lookuptable */ |
623 |
|
|
DB->hashstart = ftell(DB->fp); |
624 |
|
|
for (i = 0; i < VERYBIGHASHSIZE; i++) |
625 |
|
|
DB->hashoffsets[i] = readlong(DB->fp, fread); |
626 |
|
|
#else |
627 |
|
|
DB->bt = BTREE_Open(DB->fp_btree,4096,DB->offsets[WORDPOS]); |
628 |
|
|
DB->worddata = WORDDATA_Open(DB->fp_worddata); |
629 |
|
|
DB->totwords_array = ARRAY_Open(DB->fp_array,DB->offsets[TOTALWORDSPERFILEPOS]); |
630 |
|
|
DB->props_array = ARRAY_Open(DB->fp_array,DB->offsets[FILEOFFSETPOS]); |
631 |
|
|
|
632 |
|
|
/* Put the file pointer of props file at the end of the file |
633 |
|
|
** This is very important because if we are in update mode |
634 |
|
|
** we must avoid the properties to be overwritten |
635 |
|
|
*/ |
636 |
|
|
fseek(DB->prop,0,SEEK_END); |
637 |
|
|
#endif |
638 |
|
|
|
639 |
|
|
return (void *) DB; |
640 |
|
|
} |
641 |
|
|
|
642 |
|
|
/**************************************************************** |
643 |
|
|
* This closes a file, and will rename if flagged as such |
644 |
|
|
* Frees the associated current file name |
645 |
|
|
* |
646 |
|
|
*****************************************************************/ |
647 |
|
|
|
648 |
|
|
static void DB_Close_File_Native(FILE ** fp, char **filename, int *tempflag) |
649 |
|
|
{ |
650 |
|
|
if (!*fp) |
651 |
|
|
progerr("Called close on non-opened file '%s'", *filename); |
652 |
|
|
|
653 |
|
|
if (fclose(*fp)) |
654 |
|
|
progerrno("Failed to close file '%s': ", *filename); |
655 |
|
|
|
656 |
|
|
*fp = NULL; |
657 |
|
|
|
658 |
|
|
#ifdef USE_TEMPFILE_EXTENSION |
659 |
|
|
if (*tempflag) |
660 |
|
|
{ |
661 |
|
|
char *newname = estrdup(*filename); |
662 |
|
|
|
663 |
|
|
newname[strlen(newname) - strlen(USE_TEMPFILE_EXTENSION)] = '\0'; |
664 |
|
|
|
665 |
|
|
#if defined(_WIN32) || defined (__VMS) |
666 |
|
|
if (isfile(newname)) |
667 |
|
|
if (remove(newname)) |
668 |
|
|
progerrno("Failed to unlink '%s' before renaming. : ", newname); |
669 |
|
|
#endif |
670 |
|
|
|
671 |
|
|
if (rename(*filename, newname)) |
672 |
|
|
progerrno("Failed to rename '%s' to '%s' : ", *filename, newname); |
673 |
|
|
|
674 |
|
|
*tempflag = 0; /* no longer opened as a temporary file */ |
675 |
|
|
efree(newname); |
676 |
|
|
} |
677 |
|
|
#endif |
678 |
|
|
|
679 |
|
|
efree(*filename); |
680 |
|
|
*filename = NULL; |
681 |
|
|
} |
682 |
|
|
|
683 |
|
|
|
684 |
|
|
|
685 |
|
|
|
686 |
|
|
void DB_Close_Native(void *db) |
687 |
|
|
{ |
688 |
|
|
int i; |
689 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
690 |
|
|
FILE *fp = DB->fp; |
691 |
|
|
|
692 |
|
|
/* Close (and rename) property file, if it's open */ |
693 |
|
|
DB_Close_File_Native(&DB->prop, &DB->cur_prop_file, &DB->tmp_prop); |
694 |
|
|
|
695 |
|
|
#ifdef USE_BTREE |
696 |
|
|
/* Close (and rename) array file, if it's open */ |
697 |
|
|
if(DB->fp_array) |
698 |
|
|
{ |
699 |
|
|
if(DB->totwords_array) |
700 |
|
|
{ |
701 |
|
|
DB->offsets[TOTALWORDSPERFILEPOS] = ARRAY_Close(DB->totwords_array); |
702 |
|
|
DB->totwords_array = NULL; |
703 |
|
|
} |
704 |
|
|
if(DB->props_array) |
705 |
|
|
{ |
706 |
|
|
DB->offsets[FILEOFFSETPOS] = ARRAY_Close(DB->props_array); |
707 |
|
|
DB->props_array = NULL; |
708 |
|
|
} |
709 |
|
|
DB_Close_File_Native(&DB->fp_array, &DB->cur_array_file, &DB->tmp_array); |
710 |
|
|
} |
711 |
|
|
/* Close (and rename) worddata file, if it's open */ |
712 |
|
|
if(DB->worddata) |
713 |
|
|
{ |
714 |
|
|
WORDDATA_Close(DB->worddata); |
715 |
|
|
DB_Close_File_Native(&DB->fp_worddata, &DB->cur_worddata_file, &DB->tmp_worddata); |
716 |
|
|
DB->worddata = NULL; |
717 |
|
|
} |
718 |
|
|
/* Close (and rename) btree file, if it's open */ |
719 |
|
|
if(DB->bt) |
720 |
|
|
{ |
721 |
|
|
DB->offsets[WORDPOS] = BTREE_Close(DB->bt); |
722 |
|
|
DB_Close_File_Native(&DB->fp_btree, &DB->cur_btree_file, &DB->tmp_btree); |
723 |
|
|
DB->bt = NULL; |
724 |
|
|
} |
725 |
|
|
|
726 |
|
|
/* Close (and rename) presorted index file, if it's open */ |
727 |
|
|
if(DB->fp_presorted) |
728 |
|
|
{ |
729 |
|
|
DB_Close_File_Native(&DB->fp_presorted, &DB->cur_presorted_file, &DB->tmp_presorted); |
730 |
|
|
} |
731 |
|
|
if(DB->presorted_array) |
732 |
|
|
{ |
733 |
|
|
for(i = 0; i < DB->n_presorted_array; i++) |
734 |
|
|
{ |
735 |
|
|
if(DB->presorted_array[i]) |
736 |
|
|
ARRAY_Close(DB->presorted_array[i]); |
737 |
|
|
DB->presorted_array[i] = NULL; |
738 |
|
|
} |
739 |
|
|
efree(DB->presorted_array); |
740 |
|
|
} |
741 |
|
|
if(DB->presorted_root_node) |
742 |
|
|
efree(DB->presorted_root_node); |
743 |
|
|
if(DB->presorted_propid) |
744 |
|
|
efree(DB->presorted_propid); |
745 |
|
|
#endif |
746 |
|
|
|
747 |
|
|
if (DB->mode == DB_CREATE) /* If we are indexing update offsets to words and files */ |
748 |
|
|
{ |
749 |
|
|
/* Update internal pointers */ |
750 |
|
|
|
751 |
|
|
fseek(fp, DB->offsetstart, 0); |
752 |
|
|
for (i = 0; i < MAXCHARS; i++) |
753 |
|
|
printlong(fp, DB->offsets[i], fwrite); |
754 |
|
|
|
755 |
|
|
#ifndef USE_BTREE |
756 |
|
|
fseek(fp, DB->hashstart, 0); |
757 |
|
|
for (i = 0; i < VERYBIGHASHSIZE; i++) |
758 |
|
|
printlong(fp, DB->hashoffsets[i], fwrite); |
759 |
|
|
#endif |
760 |
|
|
} |
761 |
|
|
|
762 |
|
|
/* Close (and rename) the index file */ |
763 |
|
|
DB_Close_File_Native(&DB->fp, &DB->cur_index_file, &DB->tmp_index); |
764 |
|
|
|
765 |
|
|
|
766 |
|
|
if (DB->dbname) |
767 |
|
|
efree(DB->dbname); |
768 |
|
|
efree(DB); |
769 |
|
|
} |
770 |
|
|
|
771 |
|
|
void DB_Remove_Native(void *db) |
772 |
|
|
{ |
773 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
774 |
|
|
|
775 |
|
|
|
776 |
|
|
/* this is currently not used */ |
777 |
|
|
/* $$$ remove the prop file too */ |
778 |
|
|
fclose(DB->fp); |
779 |
|
|
remove(DB->dbname); |
780 |
|
|
efree(DB->dbname); |
781 |
|
|
efree(DB); |
782 |
|
|
} |
783 |
|
|
|
784 |
|
|
|
785 |
|
|
/*--------------------------------------------*/ |
786 |
|
|
/*--------------------------------------------*/ |
787 |
|
|
/* Header stuff */ |
788 |
|
|
/*--------------------------------------------*/ |
789 |
|
|
/*--------------------------------------------*/ |
790 |
|
|
|
791 |
|
|
int DB_InitWriteHeader_Native(void *db) |
792 |
|
|
{ |
793 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
794 |
|
|
|
795 |
|
|
if(DB->offsets[HEADERPOS]) |
796 |
|
|
{ |
797 |
|
|
/* If DB->offsets[HEADERPOS] is not 0 we are in update mode |
798 |
|
|
** So, put the pointer file in the header start position to overwrite |
799 |
|
|
** the header |
800 |
|
|
*/ |
801 |
|
|
fseek(DB->fp,DB->offsets[HEADERPOS],SEEK_SET); |
802 |
|
|
} |
803 |
|
|
else |
804 |
|
|
{ |
805 |
|
|
/* The index file is being created. So put the header in the |
806 |
|
|
** current file position (coincides with the end of the file |
807 |
|
|
*/ |
808 |
|
|
DB->offsets[HEADERPOS] = ftell(DB->fp); |
809 |
|
|
} |
810 |
|
|
return 0; |
811 |
|
|
} |
812 |
|
|
|
813 |
|
|
|
814 |
|
|
int DB_EndWriteHeader_Native(void *db) |
815 |
|
|
{ |
816 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
817 |
|
|
FILE *fp = DB->fp; |
818 |
|
|
|
819 |
|
|
/* End of header delimiter */ |
820 |
|
|
fputc(0, fp); |
821 |
|
|
|
822 |
|
|
return 0; |
823 |
|
|
} |
824 |
|
|
|
825 |
|
|
int DB_WriteHeaderData_Native(int id, unsigned char *s, int len, void *db) |
826 |
|
|
{ |
827 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
828 |
|
|
|
829 |
|
|
FILE *fp = DB->fp; |
830 |
|
|
|
831 |
|
|
compress1(id, fp, fputc); |
832 |
|
|
compress1(len, fp, fputc); |
833 |
|
|
fwrite(s, len, sizeof(char), fp); |
834 |
|
|
|
835 |
|
|
return 0; |
836 |
|
|
} |
837 |
|
|
|
838 |
|
|
|
839 |
|
|
int DB_InitReadHeader_Native(void *db) |
840 |
|
|
{ |
841 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
842 |
|
|
|
843 |
|
|
fseek(DB->fp, DB->offsets[HEADERPOS], 0); |
844 |
|
|
return 0; |
845 |
|
|
} |
846 |
|
|
|
847 |
|
|
int DB_ReadHeaderData_Native(int *id, unsigned char **s, int *len, void *db) |
848 |
|
|
{ |
849 |
|
|
int tmp; |
850 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
851 |
|
|
FILE *fp = DB->fp; |
852 |
|
|
|
853 |
|
|
tmp = uncompress1(fp, fgetc); |
854 |
|
|
*id = tmp; |
855 |
|
|
if (tmp) |
856 |
|
|
{ |
857 |
|
|
tmp = uncompress1(fp, fgetc); |
858 |
|
|
*s = (unsigned char *) emalloc(tmp + 1); |
859 |
|
|
*len = tmp; |
860 |
|
|
fread(*s, *len, sizeof(char), fp); |
861 |
|
|
} |
862 |
|
|
else |
863 |
|
|
{ |
864 |
|
|
len = 0; |
865 |
|
|
*s = NULL; |
866 |
|
|
} |
867 |
|
|
return 0; |
868 |
|
|
} |
869 |
|
|
|
870 |
|
|
int DB_EndReadHeader_Native(void *db) |
871 |
|
|
{ |
872 |
|
|
return 0; |
873 |
|
|
} |
874 |
|
|
|
875 |
|
|
/*--------------------------------------------*/ |
876 |
|
|
/*--------------------------------------------*/ |
877 |
|
|
/* Word Stuff */ |
878 |
|
|
/*--------------------------------------------*/ |
879 |
|
|
/*--------------------------------------------*/ |
880 |
|
|
|
881 |
|
|
int DB_InitWriteWords_Native(void *db) |
882 |
|
|
{ |
883 |
|
|
|
884 |
|
|
#ifndef USE_BTREE |
885 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
886 |
|
|
DB->offsets[WORDPOS] = ftell(DB->fp); |
887 |
|
|
#endif |
888 |
|
|
|
889 |
|
|
return 0; |
890 |
|
|
} |
891 |
|
|
|
892 |
|
|
int cmp_wordhashdata(const void *s1, const void *s2) |
893 |
|
|
{ |
894 |
|
|
int *i = (int *) s1; |
895 |
|
|
int *j = (int *) s2; |
896 |
|
|
|
897 |
|
|
return (*i - *j); |
898 |
|
|
} |
899 |
|
|
|
900 |
|
|
int DB_EndWriteWords_Native(void *db) |
901 |
|
|
{ |
902 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
903 |
|
|
FILE *fp = (FILE *) DB->fp; |
904 |
|
|
#ifndef USE_BTREE |
905 |
|
|
int i, |
906 |
|
|
wordlen; |
907 |
|
|
long wordID, |
908 |
|
|
f_hash_offset, |
909 |
|
|
f_offset, |
910 |
|
|
word_pos; |
911 |
|
|
#else |
912 |
|
|
FILE *fp_tmp; |
913 |
|
|
#endif |
914 |
|
|
|
915 |
|
|
#ifdef USE_BTREE |
916 |
|
|
|
917 |
|
|
/* If we close the BTREE here we can save some memory bytes */ |
918 |
|
|
/* Close (and rename) worddata file, if it's open */ |
919 |
|
|
fp_tmp =DB->worddata->fp; |
920 |
|
|
WORDDATA_Close(DB->worddata); |
921 |
|
|
DB->worddata=NULL; |
922 |
|
|
DB_Close_File_Native(&fp_tmp, &DB->cur_worddata_file, &DB->tmp_worddata); |
923 |
|
|
|
924 |
|
|
fp_tmp = DB->bt->fp; |
925 |
|
|
DB->offsets[WORDPOS] = BTREE_Close(DB->bt); |
926 |
|
|
DB->bt = NULL; |
927 |
|
|
DB_Close_File_Native(&fp_tmp, &DB->cur_btree_file, &DB->tmp_btree); |
928 |
|
|
|
929 |
|
|
/* Restore file pointer at the end of file */ |
930 |
|
|
fseek(DB->fp, 0, SEEK_END); |
931 |
|
|
#else |
932 |
|
|
|
933 |
|
|
/* Free hash zone */ |
934 |
|
|
Mem_ZoneFree(&DB->hashzone); |
935 |
|
|
|
936 |
|
|
/* Now update word's data offset into the list of words */ |
937 |
|
|
/* Simple check words and worddata must match */ |
938 |
|
|
|
939 |
|
|
if (DB->num_words != DB->wordhash_counter) |
940 |
|
|
progerrno("Internal DB_native error - DB->num_words != DB->wordhash_counter: "); |
941 |
|
|
|
942 |
|
|
if (DB->num_words != DB->worddata_counter) |
943 |
|
|
progerrno("Internal DB_native error - DB->num_words != DB->worddata_counter: "); |
944 |
|
|
|
945 |
|
|
/* Sort wordhashdata to be writte to allow sequential writes */ |
946 |
|
|
swish_qsort(DB->wordhashdata, DB->num_words, 3 * sizeof(long), cmp_wordhashdata); |
947 |
|
|
|
948 |
|
|
if (WRITE_WORDS_RAMDISK) |
949 |
|
|
{ |
950 |
|
|
fp = (FILE *) DB->rd; |
951 |
|
|
} |
952 |
|
|
for (i = 0; i < DB->num_words; i++) |
953 |
|
|
{ |
954 |
|
|
wordID = DB->wordhashdata[3 * i]; |
955 |
|
|
f_hash_offset = DB->wordhashdata[3 * i + 1]; |
956 |
|
|
f_offset = DB->wordhashdata[3 * i + 2]; |
957 |
|
|
|
958 |
|
|
word_pos = wordID; |
959 |
|
|
if (WRITE_WORDS_RAMDISK) |
960 |
|
|
{ |
961 |
|
|
word_pos -= DB->offsets[WORDPOS]; |
962 |
|
|
} |
963 |
|
|
/* Position file pointer in word */ |
964 |
|
|
DB->w_seek(fp, word_pos, SEEK_SET); |
965 |
|
|
/* Jump over word length and word */ |
966 |
|
|
wordlen = uncompress1(fp, DB->w_getc); /* Get Word length */ |
967 |
|
|
DB->w_seek(fp, (long) wordlen, SEEK_CUR); /* Jump Word */ |
968 |
|
|
/* Write offset to next chain */ |
969 |
|
|
printlong(fp, f_hash_offset, DB->w_write); |
970 |
|
|
/* Write offset to word data */ |
971 |
|
|
printlong(fp, f_offset, DB->w_write); |
972 |
|
|
} |
973 |
|
|
|
974 |
|
|
efree(DB->wordhashdata); |
975 |
|
|
DB->wordhashdata = NULL; |
976 |
|
|
DB->worddata_counter = 0; |
977 |
|
|
DB->wordhash_counter = 0; |
978 |
|
|
|
979 |
|
|
if (WRITE_WORDS_RAMDISK) |
980 |
|
|
{ |
981 |
|
|
unsigned char buffer[4096]; |
982 |
|
|
long ramdisk_size; |
983 |
|
|
long read = 0; |
984 |
|
|
|
985 |
|
|
ramdisk_seek((FILE *) DB->rd, 0, SEEK_END); |
986 |
|
|
ramdisk_size = ramdisk_tell((FILE *) DB->rd); |
987 |
|
|
/* Write ramdisk to fp end free it */ |
988 |
|
|
fseek((FILE *) DB->fp, DB->offsets[WORDPOS], SEEK_SET); |
989 |
|
|
ramdisk_seek((FILE *) DB->rd, 0, SEEK_SET); |
990 |
|
|
while (ramdisk_size) |
991 |
|
|
{ |
992 |
|
|
read = ramdisk_read(buffer, 4096, 1, (FILE *) DB->rd); |
993 |
|
|
fwrite(buffer, read, 1, DB->fp); |
994 |
|
|
ramdisk_size -= read; |
995 |
|
|
} |
996 |
|
|
ramdisk_close((FILE *) DB->rd); |
997 |
|
|
} |
998 |
|
|
/* Restore file pointer at the end of file */ |
999 |
|
|
fseek(DB->fp, 0, SEEK_END); |
1000 |
|
|
fputc(0, DB->fp); /* End of words mark */ |
1001 |
|
|
|
1002 |
|
|
#endif |
1003 |
|
|
|
1004 |
|
|
return 0; |
1005 |
|
|
} |
1006 |
|
|
|
1007 |
|
|
#ifndef USE_BTREE |
1008 |
|
|
long DB_GetWordID_Native(void *db) |
1009 |
|
|
{ |
1010 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1011 |
|
|
FILE *fp = DB->fp; |
1012 |
|
|
long pos = 0; |
1013 |
|
|
|
1014 |
|
|
if (WRITE_WORDS_RAMDISK) |
1015 |
|
|
{ |
1016 |
|
|
if (!DB->rd) |
1017 |
|
|
{ |
1018 |
|
|
/* ramdisk size as suggested by Bill Meier */ |
1019 |
|
|
DB->rd = ramdisk_create("RAM Disk: write words", 32 * 4096); |
1020 |
|
|
} |
1021 |
|
|
pos = DB->offsets[WORDPOS]; |
1022 |
|
|
fp = (FILE *) DB->rd; |
1023 |
|
|
} |
1024 |
|
|
pos += DB->w_tell(fp); |
1025 |
|
|
|
1026 |
|
|
return pos; /* Native database uses position as a Word ID */ |
1027 |
|
|
} |
1028 |
|
|
|
1029 |
|
|
int DB_WriteWord_Native(char *word, long wordID, void *db) |
1030 |
|
|
{ |
1031 |
|
|
int i, |
1032 |
|
|
wordlen; |
1033 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1034 |
|
|
|
1035 |
|
|
FILE *fp = DB->fp; |
1036 |
|
|
|
1037 |
|
|
i = (int) ((unsigned char) word[0]); |
1038 |
|
|
|
1039 |
|
|
if (!DB->offsets[i]) |
1040 |
|
|
DB->offsets[i] = wordID; |
1041 |
|
|
|
1042 |
|
|
|
1043 |
|
|
/* Write word length, word and a NULL offset */ |
1044 |
|
|
wordlen = strlen(word); |
1045 |
|
|
|
1046 |
|
|
if (WRITE_WORDS_RAMDISK) |
1047 |
|
|
{ |
1048 |
|
|
fp = (FILE *) DB->rd; |
1049 |
|
|
} |
1050 |
|
|
compress1(wordlen, fp, DB->w_putc); |
1051 |
|
|
DB->w_write(word, wordlen, sizeof(char), fp); |
1052 |
|
|
|
1053 |
|
|
printlong(fp, (long) 0, DB->w_write); /* hash chain */ |
1054 |
|
|
printlong(fp, (long) 0, DB->w_write); /* word's data pointer */ |
1055 |
|
|
|
1056 |
|
|
DB->num_words++; |
1057 |
|
|
|
1058 |
|
|
return 0; |
1059 |
|
|
} |
1060 |
|
|
|
1061 |
|
|
long DB_WriteWordData_Native(long wordID, unsigned char *worddata, int lendata, void *db) |
1062 |
|
|
{ |
1063 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1064 |
|
|
FILE *fp = DB->fp; |
1065 |
|
|
struct numhash *numhash; |
1066 |
|
|
int numhashval; |
1067 |
|
|
|
1068 |
|
|
/* We must be at the end of the file */ |
1069 |
|
|
|
1070 |
|
|
if (!DB->worddata_counter) |
1071 |
|
|
{ |
1072 |
|
|
/* We are starting writing worddata */ |
1073 |
|
|
/* If inside a ramdisk we must preserve its space */ |
1074 |
|
|
if (WRITE_WORDS_RAMDISK) |
1075 |
|
|
{ |
1076 |
|
|
long ramdisk_size; |
1077 |
|
|
|
1078 |
|
|
ramdisk_seek((FILE *) DB->rd, 0, SEEK_END); |
1079 |
|
|
ramdisk_size = ramdisk_tell((FILE *) DB->rd); |
1080 |
|
|
/* Preserve ramdisk size in DB file */ |
1081 |
|
|
/* it will be write later */ |
1082 |
|
|
fseek((FILE *) DB->fp, ramdisk_size, SEEK_END); |
1083 |
|
|
} |
1084 |
|
|
} |
1085 |
|
|
/* Search for word's ID */ |
1086 |
|
|
numhashval = bignumhash(wordID); |
1087 |
|
|
for (numhash = DB->hash[numhashval]; numhash; numhash = numhash->next) |
1088 |
|
|
if (DB->wordhashdata[3 * numhash->index] == wordID) |
1089 |
|
|
break; |
1090 |
|
|
if (!numhash) |
1091 |
|
|
progerrno("Internal db_native.c error in DB_WriteWordData_Native: "); |
1092 |
|
|
DB->wordhashdata[3 * numhash->index + 2] = ftell(fp); |
1093 |
|
|
|
1094 |
|
|
DB->worddata_counter++; |
1095 |
|
|
|
1096 |
|
|
/* Write the worddata to disk */ |
1097 |
|
|
compress1(lendata, fp, fputc); |
1098 |
|
|
fwrite(worddata, lendata, 1, fp); |
1099 |
|
|
|
1100 |
|
|
/* A NULL byte to indicate end of word data */ |
1101 |
|
|
fputc(0, fp); |
1102 |
|
|
|
1103 |
|
|
return 0; |
1104 |
|
|
} |
1105 |
|
|
|
1106 |
|
|
#else |
1107 |
|
|
|
1108 |
|
|
long DB_GetWordID_Native(void *db) |
1109 |
|
|
{ |
1110 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1111 |
|
|
|
1112 |
|
|
return DB->worddata->lastid; |
1113 |
|
|
} |
1114 |
|
|
|
1115 |
|
|
int DB_WriteWord_Native(char *word, long wordID, void *db) |
1116 |
|
|
{ |
1117 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1118 |
|
|
|
1119 |
|
|
BTREE_Insert(DB->bt, (unsigned char *)word, strlen(word), (unsigned long) wordID); |
1120 |
|
|
|
1121 |
|
|
DB->num_words++; |
1122 |
|
|
|
1123 |
|
|
return 0; |
1124 |
|
|
} |
1125 |
|
|
|
1126 |
|
|
int DB_UpdateWordID_Native(char *word, long new_wordID, void *db) |
1127 |
|
|
{ |
1128 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1129 |
|
|
|
1130 |
|
|
BTREE_Update(DB->bt, (unsigned char *)word, strlen(word), (unsigned long) new_wordID); |
1131 |
|
|
|
1132 |
|
|
return 0; |
1133 |
|
|
} |
1134 |
|
|
|
1135 |
|
|
int DB_DeleteWordData_Native(long wordID, void *db) |
1136 |
|
|
{ |
1137 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1138 |
|
|
int dummy; |
1139 |
|
|
|
1140 |
|
|
WORDDATA_Del(DB->worddata, wordID, &dummy); |
1141 |
|
|
|
1142 |
|
|
return 0; |
1143 |
|
|
} |
1144 |
|
|
|
1145 |
|
|
long DB_WriteWordData_Native(long wordID, unsigned char *worddata, int lendata, void *db) |
1146 |
|
|
{ |
1147 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1148 |
|
|
DB->worddata_counter++; |
1149 |
|
|
|
1150 |
|
|
/* Write the worddata to disk */ |
1151 |
|
|
WORDDATA_Put(DB->worddata,lendata,worddata); |
1152 |
|
|
|
1153 |
|
|
return 0; |
1154 |
|
|
} |
1155 |
|
|
|
1156 |
|
|
#endif |
1157 |
|
|
|
1158 |
|
|
#ifndef USE_BTREE |
1159 |
|
|
int DB_WriteWordHash_Native(char *word, long wordID, void *db) |
1160 |
|
|
{ |
1161 |
|
|
int i, |
1162 |
|
|
hashval, |
1163 |
|
|
numhashval; |
1164 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1165 |
|
|
struct numhash *numhash; |
1166 |
|
|
|
1167 |
|
|
if (!DB->wordhash_counter) |
1168 |
|
|
{ |
1169 |
|
|
/* Init hash array */ |
1170 |
|
|
for (i = 0; i < BIGHASHSIZE; i++) |
1171 |
|
|
DB->hash[i] = NULL; |
1172 |
|
|
DB->hashzone = Mem_ZoneCreate("WriteWordHash", DB->num_words * sizeof(struct numhash), 0); |
1173 |
|
|
|
1174 |
|
|
/* If we are here we have finished WriteWord_Native */ |
1175 |
|
|
/* If using ramdisk - Reserve space upto the size of the ramdisk */ |
1176 |
|
|
if (WRITE_WORDS_RAMDISK) |
1177 |
|
|
{ |
1178 |
|
|
long ram_size = DB->w_seek((FILE *) DB->rd, 0, SEEK_END); |
1179 |
|
|
|
1180 |
|
|
fseek(DB->fp, ram_size, SEEK_SET); |
1181 |
|
|
} |
1182 |
|
|
|
1183 |
|
|
DB->wordhashdata = emalloc(3 * DB->num_words * sizeof(long)); |
1184 |
|
|
} |
1185 |
|
|
|
1186 |
|
|
hashval = verybighash(word); |
1187 |
|
|
|
1188 |
|
|
if (!DB->hashoffsets[hashval]) |
1189 |
|
|
{ |
1190 |
|
|
DB->hashoffsets[hashval] = wordID; |
1191 |
|
|
} |
1192 |
|
|
|
1193 |
|
|
DB->wordhashdata[3 * DB->wordhash_counter] = wordID; |
1194 |
|
|
DB->wordhashdata[3 * DB->wordhash_counter + 1] = (long) 0; |
1195 |
|
|
|
1196 |
|
|
|
1197 |
|
|
/* Add to the hash */ |
1198 |
|
|
numhash = (struct numhash *) Mem_ZoneAlloc(DB->hashzone, sizeof(struct numhash)); |
1199 |
|
|
|
1200 |
|
|
numhashval = bignumhash(wordID); |
1201 |
|
|
numhash->index = DB->wordhash_counter; |
1202 |
|
|
numhash->next = DB->hash[numhashval]; |
1203 |
|
|
DB->hash[numhashval] = numhash; |
1204 |
|
|
|
1205 |
|
|
DB->wordhash_counter++; |
1206 |
|
|
|
1207 |
|
|
/* Update previous word in hashlist */ |
1208 |
|
|
if (DB->lasthashval[hashval]) |
1209 |
|
|
{ |
1210 |
|
|
/* Search for DB->lasthashval[hashval] */ |
1211 |
|
|
numhashval = bignumhash(DB->lasthashval[hashval]); |
1212 |
|
|
for (numhash = DB->hash[numhashval]; numhash; numhash = numhash->next) |
1213 |
|
|
if (DB->wordhashdata[3 * numhash->index] == DB->lasthashval[hashval]) |
1214 |
|
|
break; |
1215 |
|
|
if (!numhash) |
1216 |
|
|
progerrno("Internal db_native.c error in DB_WriteWordHash_Native: "); |
1217 |
|
|
DB->wordhashdata[3 * numhash->index + 1] = (long) wordID; |
1218 |
|
|
} |
1219 |
|
|
DB->lasthashval[hashval] = wordID; |
1220 |
|
|
|
1221 |
|
|
return 0; |
1222 |
|
|
} |
1223 |
|
|
#endif |
1224 |
|
|
|
1225 |
|
|
int DB_InitReadWords_Native(void *db) |
1226 |
|
|
{ |
1227 |
|
|
return 0; |
1228 |
|
|
} |
1229 |
|
|
|
1230 |
|
|
int DB_EndReadWords_Native(void *db) |
1231 |
|
|
{ |
1232 |
|
|
return 0; |
1233 |
|
|
} |
1234 |
|
|
|
1235 |
|
|
#ifndef USE_BTREE |
1236 |
|
|
int DB_ReadWordHash_Native(char *word, long *wordID, void *db) |
1237 |
|
|
{ |
1238 |
|
|
int wordlen, |
1239 |
|
|
res, |
1240 |
|
|
hashval; |
1241 |
|
|
long offset, |
1242 |
|
|
dataoffset; |
1243 |
|
|
char *fileword = NULL; |
1244 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1245 |
|
|
FILE *fp = DB->fp; |
1246 |
|
|
|
1247 |
|
|
|
1248 |
|
|
/* If there is not a star use the hash approach ... */ |
1249 |
|
|
res = 1; |
1250 |
|
|
|
1251 |
|
|
/* Get hash file offset */ |
1252 |
|
|
hashval = verybighash(word); |
1253 |
|
|
if (!(offset = DB->hashoffsets[hashval])) |
1254 |
|
|
{ |
1255 |
|
|
*wordID = 0; |
1256 |
|
|
return 0; |
1257 |
|
|
} |
1258 |
|
|
/* Search for word */ |
1259 |
|
|
while (res) |
1260 |
|
|
{ |
1261 |
|
|
/* Position in file */ |
1262 |
|
|
fseek(fp, offset, SEEK_SET); |
1263 |
|
|
/* Get word */ |
1264 |
|
|
wordlen = uncompress1(fp, fgetc); |
1265 |
|
|
fileword = emalloc(wordlen + 1); |
1266 |
|
|
fread(fileword, 1, wordlen, fp); |
1267 |
|
|
fileword[wordlen] = '\0'; |
1268 |
|
|
offset = readlong(fp, fread); /* Next hash */ |
1269 |
|
|
dataoffset = readlong(fp, fread); /* Offset to Word data */ |
1270 |
|
|
|
1271 |
|
|
res = strcmp(word, fileword); |
1272 |
|
|
efree(fileword); |
1273 |
|
|
|
1274 |
|
|
if (!res) |
1275 |
|
|
break; /* Found !! */ |
1276 |
|
|
else if (!offset) |
1277 |
|
|
{ |
1278 |
|
|
dataoffset = 0; |
1279 |
|
|
break; |
1280 |
|
|
} |
1281 |
|
|
} |
1282 |
|
|
*wordID = dataoffset; |
1283 |
|
|
return 0; |
1284 |
|
|
} |
1285 |
|
|
|
1286 |
|
|
int DB_ReadFirstWordInvertedIndex_Native(char *word, char **resultword, long *wordID, void *db) |
1287 |
|
|
{ |
1288 |
|
|
int wordlen, |
1289 |
|
|
i, |
1290 |
|
|
res, |
1291 |
|
|
len, |
1292 |
|
|
found; |
1293 |
|
|
long dataoffset = 0; |
1294 |
|
|
char *fileword = NULL; |
1295 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1296 |
|
|
FILE *fp = DB->fp; |
1297 |
|
|
|
1298 |
|
|
|
1299 |
|
|
len = strlen(word); |
1300 |
|
|
|
1301 |
|
|
i = (int) ((unsigned char) word[0]); |
1302 |
|
|
|
1303 |
|
|
if (!DB->offsets[i]) |
1304 |
|
|
{ |
1305 |
|
|
*resultword = NULL; |
1306 |
|
|
*wordID = 0; |
1307 |
|
|
return 0; |
1308 |
|
|
} |
1309 |
|
|
found = 1; |
1310 |
|
|
fseek(fp, DB->offsets[i], 0); |
1311 |
|
|
|
1312 |
|
|
/* Look for first occurrence */ |
1313 |
|
|
wordlen = uncompress1(fp, fgetc); |
1314 |
|
|
fileword = (char *) emalloc(wordlen + 1); |
1315 |
|
|
|
1316 |
|
|
while (wordlen) |
1317 |
|
|
{ |
1318 |
|
|
fread(fileword, 1, wordlen, fp); |
1319 |
|
|
fileword[wordlen] = '\0'; |
1320 |
|
|
readlong(fp, fread); /* jump hash offset */ |
1321 |
|
|
dataoffset = readlong(fp, fread); /* Get offset to word's data */ |
1322 |
|
|
if (!(res = strncmp(word, fileword, len))) /*Found!! */ |
1323 |
|
|
{ |
1324 |
|
|
DB->nextwordoffset = ftell(fp); /* preserve next word pos */ |
1325 |
|
|
break; |
1326 |
|
|
} |
1327 |
|
|
if (res < 0) |
1328 |
|
|
{ |
1329 |
|
|
dataoffset = 0; |
1330 |
|
|
break; |
1331 |
|
|
} |
1332 |
|
|
/* Go to next value */ |
1333 |
|
|
wordlen = uncompress1(fp, fgetc); /* Next word */ |
1334 |
|
|
if (!wordlen) |
1335 |
|
|
{ |
1336 |
|
|
dataoffset = 0; |
1337 |
|
|
break; |
1338 |
|
|
} |
1339 |
|
|
efree(fileword); |
1340 |
|
|
fileword = (char *) emalloc(wordlen + 1); |
1341 |
|
|
} |
1342 |
|
|
if (!dataoffset) |
1343 |
|
|
{ |
1344 |
|
|
efree(fileword); |
1345 |
|
|
*resultword = NULL; |
1346 |
|
|
} |
1347 |
|
|
else |
1348 |
|
|
*resultword = fileword; |
1349 |
|
|
*wordID = dataoffset; |
1350 |
|
|
|
1351 |
|
|
return 0; |
1352 |
|
|
} |
1353 |
|
|
|
1354 |
|
|
int DB_ReadNextWordInvertedIndex_Native(char *word, char **resultword, long *wordID, void *db) |
1355 |
|
|
{ |
1356 |
|
|
int len, |
1357 |
|
|
wordlen; |
1358 |
|
|
long dataoffset; |
1359 |
|
|
char *fileword; |
1360 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1361 |
|
|
FILE *fp = DB->fp; |
1362 |
|
|
|
1363 |
|
|
if (!DB->nextwordoffset) |
1364 |
|
|
{ |
1365 |
|
|
*resultword = NULL; |
1366 |
|
|
*wordID = 0; |
1367 |
|
|
return 0; |
1368 |
|
|
} |
1369 |
|
|
|
1370 |
|
|
len = strlen(word); |
1371 |
|
|
|
1372 |
|
|
|
1373 |
|
|
fseek(fp, DB->nextwordoffset, SEEK_SET); |
1374 |
|
|
|
1375 |
|
|
wordlen = uncompress1(fp, fgetc); |
1376 |
|
|
fileword = (char *) emalloc(wordlen + 1); |
1377 |
|
|
|
1378 |
|
|
fread(fileword, 1, wordlen, fp); |
1379 |
|
|
fileword[wordlen] = '\0'; |
1380 |
|
|
if (strncmp(word, fileword, len)) |
1381 |
|
|
{ |
1382 |
|
|
efree(fileword); |
1383 |
|
|
fileword = NULL; |
1384 |
|
|
dataoffset = 0; /* No more data */ |
1385 |
|
|
DB->nextwordoffset = 0; |
1386 |
|
|
} |
1387 |
|
|
else |
1388 |
|
|
{ |
1389 |
|
|
readlong(fp, fread); /* jump hash offset */ |
1390 |
|
|
dataoffset = readlong(fp, fread); /* Get data offset */ |
1391 |
|
|
DB->nextwordoffset = ftell(fp); |
1392 |
|
|
} |
1393 |
|
|
*resultword = fileword; |
1394 |
|
|
*wordID = dataoffset; |
1395 |
|
|
|
1396 |
|
|
return 0; |
1397 |
|
|
|
1398 |
|
|
} |
1399 |
|
|
|
1400 |
|
|
|
1401 |
|
|
long DB_ReadWordData_Native(long wordID, unsigned char **worddata, int *lendata, void *db) |
1402 |
|
|
{ |
1403 |
|
|
int len; |
1404 |
|
|
unsigned char *buffer; |
1405 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1406 |
|
|
FILE *fp = DB->fp; |
1407 |
|
|
|
1408 |
|
|
fseek(fp, wordID, 0); |
1409 |
|
|
len = uncompress1(fp, fgetc); |
1410 |
|
|
buffer = emalloc(len); |
1411 |
|
|
fread(buffer, len, 1, fp); |
1412 |
|
|
|
1413 |
|
|
*worddata = buffer; |
1414 |
|
|
*lendata = len; |
1415 |
|
|
|
1416 |
|
|
return 0; |
1417 |
|
|
} |
1418 |
|
|
|
1419 |
|
|
|
1420 |
|
|
#else |
1421 |
|
|
int DB_ReadWordHash_Native(char *word, long *wordID, void *db) |
1422 |
|
|
{ |
1423 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1424 |
|
|
unsigned char *dummy; |
1425 |
|
|
int dummy2; |
1426 |
|
|
|
1427 |
|
|
if((*wordID = (long)BTREE_Search(DB->bt,word,strlen(word),&dummy,&dummy2,1)) < 0) |
1428 |
|
|
*wordID = 0; |
1429 |
|
|
else |
1430 |
|
|
efree(dummy); |
1431 |
|
|
return 0; |
1432 |
|
|
} |
1433 |
|
|
|
1434 |
|
|
int DB_ReadFirstWordInvertedIndex_Native(char *word, char **resultword, long *wordID, void *db) |
1435 |
|
|
{ |
1436 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1437 |
|
|
unsigned char *found; |
1438 |
|
|
int found_len; |
1439 |
|
|
|
1440 |
|
|
|
1441 |
|
|
if((*wordID = (long)BTREE_Search(DB->bt,word,strlen(word), &found, &found_len, 0)) < 0) |
1442 |
|
|
{ |
1443 |
|
|
*resultword = NULL; |
1444 |
|
|
*wordID = 0; |
1445 |
|
|
} |
1446 |
|
|
else |
1447 |
|
|
{ |
1448 |
|
|
|
1449 |
|
|
*resultword = emalloc(found_len + 1); |
1450 |
|
|
memcpy(*resultword,found,found_len); |
1451 |
|
|
(*resultword)[found_len]='\0'; |
1452 |
|
|
efree(found); |
1453 |
|
|
if (strncmp(word, *resultword, strlen(word))>0) |
1454 |
|
|
return DB_ReadNextWordInvertedIndex_Native(word, resultword, wordID, db); |
1455 |
|
|
} |
1456 |
|
|
|
1457 |
|
|
return 0; |
1458 |
|
|
} |
1459 |
|
|
|
1460 |
|
|
int DB_ReadNextWordInvertedIndex_Native(char *word, char **resultword, long *wordID, void *db) |
1461 |
|
|
{ |
1462 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1463 |
|
|
unsigned char *found; |
1464 |
|
|
int found_len; |
1465 |
|
|
|
1466 |
|
|
if((*wordID = (long)BTREE_Next(DB->bt, &found, &found_len)) < 0) |
1467 |
|
|
{ |
1468 |
|
|
*resultword = NULL; |
1469 |
|
|
*wordID = 0; |
1470 |
|
|
} |
1471 |
|
|
else |
1472 |
|
|
{ |
1473 |
|
|
*resultword = emalloc(found_len + 1); |
1474 |
|
|
memcpy(*resultword,found,found_len); |
1475 |
|
|
(*resultword)[found_len]='\0'; |
1476 |
|
|
efree(found); |
1477 |
|
|
if (strncmp(word, *resultword, strlen(word))) |
1478 |
|
|
{ |
1479 |
|
|
efree(*resultword); |
1480 |
|
|
*resultword = NULL; |
1481 |
|
|
*wordID = 0; /* No more data */ |
1482 |
|
|
} |
1483 |
|
|
} |
1484 |
|
|
return 0; |
1485 |
|
|
} |
1486 |
|
|
|
1487 |
|
|
long DB_ReadWordData_Native(long wordID, unsigned char **worddata, int *lendata, void *db) |
1488 |
|
|
{ |
1489 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1490 |
|
|
|
1491 |
|
|
*worddata = WORDDATA_Get(DB->worddata,wordID,lendata); |
1492 |
|
|
|
1493 |
|
|
return 0; |
1494 |
|
|
} |
1495 |
|
|
|
1496 |
|
|
#endif |
1497 |
|
|
|
1498 |
|
|
|
1499 |
|
|
/*--------------------------------------------*/ |
1500 |
|
|
/*--------------------------------------------*/ |
1501 |
|
|
/* FileList Stuff */ |
1502 |
|
|
/*--------------------------------------------*/ |
1503 |
|
|
/*--------------------------------------------*/ |
1504 |
|
|
|
1505 |
|
|
|
1506 |
|
|
int DB_EndWriteFiles_Native(void *db) |
1507 |
|
|
{ |
1508 |
|
|
return 0; |
1509 |
|
|
} |
1510 |
|
|
|
1511 |
|
|
int DB_WriteFile_Native(int filenum, unsigned char *filedata, int sz_filedata, void *db) |
1512 |
|
|
{ |
1513 |
|
|
return 0; |
1514 |
|
|
} |
1515 |
|
|
|
1516 |
|
|
int DB_InitReadFiles_Native(void *db) |
1517 |
|
|
{ |
1518 |
|
|
return 0; |
1519 |
|
|
} |
1520 |
|
|
|
1521 |
|
|
int DB_ReadFile_Native(int filenum, unsigned char **filedata, int *sz_filedata, void *db) |
1522 |
|
|
{ |
1523 |
|
|
return 0; |
1524 |
|
|
} |
1525 |
|
|
|
1526 |
|
|
|
1527 |
|
|
int DB_EndReadFiles_Native(void *db) |
1528 |
|
|
{ |
1529 |
|
|
|
1530 |
|
|
return 0; |
1531 |
|
|
} |
1532 |
|
|
|
1533 |
|
|
|
1534 |
|
|
/*--------------------------------------------*/ |
1535 |
|
|
/*--------------------------------------------*/ |
1536 |
|
|
/* Sorted data Stuff */ |
1537 |
|
|
/*--------------------------------------------*/ |
1538 |
|
|
/*--------------------------------------------*/ |
1539 |
|
|
|
1540 |
|
|
#ifdef USE_BTREE |
1541 |
|
|
int DB_InitWriteSortedIndex_Native(void *db, int n_props) |
1542 |
|
|
{ |
1543 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1544 |
|
|
FILE *fp = DB->fp_presorted; |
1545 |
|
|
int i; |
1546 |
|
|
|
1547 |
|
|
DB->offsets[SORTEDINDEX] = ftell(fp); |
1548 |
|
|
|
1549 |
|
|
/* Write number of properties */ |
1550 |
|
|
printlong(fp,(unsigned long) n_props, fwrite); |
1551 |
|
|
|
1552 |
|
|
DB->n_presorted_array = n_props; |
1553 |
|
|
DB->presorted_array = (ARRAY **)emalloc(n_props * sizeof(ARRAY *)); |
1554 |
|
|
DB->presorted_root_node = (unsigned long *)emalloc(n_props * sizeof(unsigned long)); |
1555 |
|
|
for(i = 0; i < n_props ; i++) |
1556 |
|
|
{ |
1557 |
|
|
DB->presorted_array[i] = NULL; |
1558 |
|
|
DB->presorted_root_node[i] = 0; |
1559 |
|
|
/* Reserve space for propidx and Array Pointer */ |
1560 |
|
|
printlong(fp,(unsigned long) 0, fwrite); |
1561 |
|
|
printlong(fp,(unsigned long) 0, fwrite); |
1562 |
|
|
} |
1563 |
|
|
DB->next_sortedindex = 0; |
1564 |
|
|
return 0; |
1565 |
|
|
} |
1566 |
|
|
|
1567 |
|
|
int DB_WriteSortedIndex_Native(int propID, int *data, int n,void *db) |
1568 |
|
|
{ |
1569 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1570 |
|
|
FILE *fp = DB->fp_presorted; |
1571 |
|
|
ARRAY *arr; |
1572 |
|
|
int i; |
1573 |
|
|
|
1574 |
|
|
arr = ARRAY_Create(fp); |
1575 |
|
|
for(i = 0 ; i < n ; i++) |
1576 |
|
|
{ |
1577 |
|
|
ARRAY_Put(arr,i,data[i]); |
1578 |
|
|
/* |
1579 |
|
|
if(!(i%10000)) |
1580 |
|
|
{ |
1581 |
|
|
ARRAY_FlushCache(arr); |
1582 |
|
|
printf("%d %d \r",propID,i); |
1583 |
|
|
} |
1584 |
|
|
*/ |
1585 |
|
|
} |
1586 |
|
|
|
1587 |
|
|
DB->presorted_root_node[DB->next_sortedindex] = ARRAY_Close(arr); |
1588 |
|
|
|
1589 |
|
|
fseek(fp,DB->offsets[SORTEDINDEX] + (1 + 2 * DB->next_sortedindex) *sizeof(unsigned long),SEEK_SET); |
1590 |
|
|
printlong(fp,(unsigned long) propID, fwrite); |
1591 |
|
|
printlong(fp,(unsigned long) DB->presorted_root_node[DB->next_sortedindex], fwrite); |
1592 |
|
|
|
1593 |
|
|
DB->next_sortedindex++; |
1594 |
|
|
|
1595 |
|
|
return 0; |
1596 |
|
|
} |
1597 |
|
|
|
1598 |
|
|
int DB_EndWriteSortedIndex_Native(void *db) |
1599 |
|
|
{ |
1600 |
|
|
return 0; |
1601 |
|
|
} |
1602 |
|
|
|
1603 |
|
|
|
1604 |
|
|
int DB_InitReadSortedIndex_Native(void *db) |
1605 |
|
|
{ |
1606 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1607 |
|
|
FILE *fp = DB->fp_presorted; |
1608 |
|
|
int i; |
1609 |
|
|
|
1610 |
|
|
fseek(fp,DB->offsets[SORTEDINDEX],SEEK_SET); |
1611 |
|
|
|
1612 |
|
|
/* Read number of properties */ |
1613 |
|
|
DB->n_presorted_array = readlong(fp,fread); |
1614 |
|
|
|
1615 |
|
|
DB->presorted_array = (ARRAY **)emalloc(DB->n_presorted_array * sizeof(ARRAY *)); |
1616 |
|
|
DB->presorted_root_node = (unsigned long *)emalloc(DB->n_presorted_array * sizeof(unsigned long)); |
1617 |
|
|
DB->presorted_propid = (unsigned long *)emalloc(DB->n_presorted_array * sizeof(unsigned long)); |
1618 |
|
|
for(i = 0; i < DB->n_presorted_array ; i++) |
1619 |
|
|
{ |
1620 |
|
|
DB->presorted_array[i] = NULL; |
1621 |
|
|
DB->presorted_propid[i] = readlong(fp,fread); |
1622 |
|
|
DB->presorted_root_node[i] = readlong(fp,fread); |
1623 |
|
|
} |
1624 |
|
|
return 0; |
1625 |
|
|
|
1626 |
|
|
} |
1627 |
|
|
|
1628 |
|
|
int DB_ReadSortedIndex_Native(int propID, unsigned char **data, int *sz_data,void *db) |
1629 |
|
|
{ |
1630 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1631 |
|
|
FILE *fp = DB->fp_presorted; |
1632 |
|
|
int i; |
1633 |
|
|
|
1634 |
|
|
if(!DB->cur_presorted_array || DB->cur_presorted_propid != (unsigned long)propID) |
1635 |
|
|
{ |
1636 |
|
|
for(i = 0; i < DB->n_presorted_array ; i++) |
1637 |
|
|
{ |
1638 |
|
|
if((unsigned long)propID == DB->presorted_propid[i]) |
1639 |
|
|
{ |
1640 |
|
|
DB->cur_presorted_propid = propID; |
1641 |
|
|
DB->cur_presorted_array = DB->presorted_array[i] = ARRAY_Open(fp,DB->presorted_root_node[i]); |
1642 |
|
|
break; |
1643 |
|
|
} |
1644 |
|
|
} |
1645 |
|
|
} |
1646 |
|
|
if(DB->cur_presorted_array) |
1647 |
|
|
{ |
1648 |
|
|
*data = (unsigned char *)DB->cur_presorted_array; |
1649 |
|
|
*sz_data = sizeof(DB->cur_presorted_array); |
1650 |
|
|
} |
1651 |
|
|
else |
1652 |
|
|
{ |
1653 |
|
|
*data = NULL; |
1654 |
|
|
*sz_data = 0; |
1655 |
|
|
} |
1656 |
|
|
|
1657 |
|
|
return 0; |
1658 |
|
|
} |
1659 |
|
|
|
1660 |
|
|
|
1661 |
|
|
int DB_ReadSortedData_Native(int *data,int index, int *value, void *db) |
1662 |
|
|
{ |
1663 |
|
|
*value = ARRAY_Get((ARRAY *)data,index); |
1664 |
|
|
return 0; |
1665 |
|
|
} |
1666 |
|
|
|
1667 |
|
|
int DB_EndReadSortedIndex_Native(void *db) |
1668 |
|
|
{ |
1669 |
|
|
return 0; |
1670 |
|
|
} |
1671 |
|
|
|
1672 |
|
|
#else |
1673 |
|
|
int DB_InitWriteSortedIndex_Native(void *db) |
1674 |
|
|
{ |
1675 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1676 |
|
|
|
1677 |
|
|
DB->offsets[SORTEDINDEX] = ftell(DB->fp); |
1678 |
|
|
DB->next_sortedindex = DB->offsets[SORTEDINDEX]; |
1679 |
|
|
return 0; |
1680 |
|
|
} |
1681 |
|
|
|
1682 |
|
|
int DB_WriteSortedIndex_Native(int propID, unsigned char *data, int sz_data,void *db) |
1683 |
|
|
{ |
1684 |
|
|
long tmp1,tmp2; |
1685 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1686 |
|
|
FILE *fp = DB->fp; |
1687 |
|
|
|
1688 |
|
|
|
1689 |
|
|
fseek(fp, DB->next_sortedindex, 0); |
1690 |
|
|
|
1691 |
|
|
|
1692 |
|
|
tmp1 = ftell(fp); |
1693 |
|
|
|
1694 |
|
|
printlong(fp,(long)0,fwrite); /* Pointer to next table if any */ |
1695 |
|
|
|
1696 |
|
|
/* Write ID */ |
1697 |
|
|
compress1(propID,fp,fputc); |
1698 |
|
|
|
1699 |
|
|
/* Write len of data */ |
1700 |
|
|
compress1(sz_data,fp,putc); |
1701 |
|
|
|
1702 |
|
|
/* Write data */ |
1703 |
|
|
fwrite(data,sz_data,1,fp); |
1704 |
|
|
|
1705 |
|
|
DB->next_sortedindex = tmp2 = ftell(fp); |
1706 |
|
|
|
1707 |
|
|
|
1708 |
|
|
if(DB->lastsortedindex) |
1709 |
|
|
{ |
1710 |
|
|
fseek(fp,DB->lastsortedindex,0); |
1711 |
|
|
printlong(fp,tmp1,fwrite); |
1712 |
|
|
fseek(fp,tmp2,0); |
1713 |
|
|
} |
1714 |
|
|
DB->lastsortedindex = tmp1; |
1715 |
|
|
return 0; |
1716 |
|
|
} |
1717 |
|
|
|
1718 |
|
|
int DB_EndWriteSortedIndex_Native(void *db) |
1719 |
|
|
{ |
1720 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1721 |
|
|
FILE *fp = DB->fp; |
1722 |
|
|
|
1723 |
|
|
printlong(fp,(long)0,fwrite); /* No next table mark - Useful if no presorted indexes */ |
1724 |
|
|
/* NULL meta id- Only useful if no presorted indexes */ |
1725 |
|
|
fputc(0, fp); |
1726 |
|
|
|
1727 |
|
|
return 0; |
1728 |
|
|
} |
1729 |
|
|
|
1730 |
|
|
|
1731 |
|
|
int DB_InitReadSortedIndex_Native(void *db) |
1732 |
|
|
{ |
1733 |
|
|
return 0; |
1734 |
|
|
} |
1735 |
|
|
|
1736 |
|
|
int DB_ReadSortedIndex_Native(int propID, unsigned char **data, int *sz_data,void *db) |
1737 |
|
|
{ |
1738 |
|
|
long next, id, tmp; |
1739 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1740 |
|
|
FILE *fp = DB->fp; |
1741 |
|
|
|
1742 |
|
|
fseek(fp,DB->offsets[SORTEDINDEX],0); |
1743 |
|
|
|
1744 |
|
|
|
1745 |
|
|
next = readlong(fp,fread); |
1746 |
|
|
/* read propID */ |
1747 |
|
|
id = uncompress1(fp,fgetc); |
1748 |
|
|
|
1749 |
|
|
|
1750 |
|
|
while(1) |
1751 |
|
|
{ |
1752 |
|
|
if(id == propID) |
1753 |
|
|
{ |
1754 |
|
|
tmp = uncompress1(fp,fgetc); |
1755 |
|
|
*sz_data = tmp; |
1756 |
|
|
|
1757 |
|
|
*data = emalloc(*sz_data); |
1758 |
|
|
fread(*data,*sz_data,1,fp); |
1759 |
|
|
return 0; |
1760 |
|
|
} |
1761 |
|
|
if(next) |
1762 |
|
|
{ |
1763 |
|
|
fseek(fp,next,0); |
1764 |
|
|
next = readlong(fp,fread); |
1765 |
|
|
id = uncompress1(fp,fgetc); |
1766 |
|
|
} |
1767 |
|
|
else |
1768 |
|
|
{ |
1769 |
|
|
*sz_data = 0; |
1770 |
|
|
*data = NULL; |
1771 |
|
|
return 0; |
1772 |
|
|
} |
1773 |
|
|
} |
1774 |
|
|
return 0; |
1775 |
|
|
} |
1776 |
|
|
|
1777 |
|
|
int DB_ReadSortedData_Native(int *data,int index, int *value, void *db) |
1778 |
|
|
{ |
1779 |
|
|
*value = data[index]; |
1780 |
|
|
return 0; |
1781 |
|
|
} |
1782 |
|
|
|
1783 |
|
|
int DB_EndReadSortedIndex_Native(void *db) |
1784 |
|
|
{ |
1785 |
|
|
return 0; |
1786 |
|
|
} |
1787 |
|
|
|
1788 |
|
|
|
1789 |
|
|
#endif |
1790 |
|
|
|
1791 |
|
|
|
1792 |
|
|
|
1793 |
|
|
|
1794 |
|
|
/* |
1795 |
|
|
** Jose Ruiz 04/00 |
1796 |
|
|
** Store a portable long with just four bytes |
1797 |
|
|
*/ |
1798 |
|
|
void printlong(FILE * fp, unsigned long num, size_t(*f_write) (const void *, size_t, size_t, FILE *)) |
1799 |
|
|
{ |
1800 |
|
|
num = PACKLONG(num); /* Make the number portable */ |
1801 |
|
|
f_write(&num, MAXLONGLEN, 1, fp); |
1802 |
|
|
} |
1803 |
|
|
|
1804 |
|
|
/* |
1805 |
|
|
** Jose Ruiz 04/00 |
1806 |
|
|
** Read a portable long (just four bytes) |
1807 |
|
|
*/ |
1808 |
|
|
unsigned long readlong(FILE * fp, size_t(*f_read) (void *, size_t, size_t, FILE *)) |
1809 |
|
|
{ |
1810 |
|
|
unsigned long num; |
1811 |
|
|
|
1812 |
|
|
f_read(&num, MAXLONGLEN, 1, fp); |
1813 |
|
|
return UNPACKLONG(num); /* Make the number readable */ |
1814 |
|
|
} |
1815 |
|
|
|
1816 |
|
|
|
1817 |
|
|
|
1818 |
|
|
/**************************************************************************** |
1819 |
|
|
* Writing Properites (not for USE_BTREE) |
1820 |
|
|
* |
1821 |
|
|
* Properties are written sequentially to the .prop file. |
1822 |
|
|
* Fixed length records of the file length and seek position into the |
1823 |
|
|
* property file are written sequentially to the main index (which is why |
1824 |
|
|
* there's a separate .prop file). |
1825 |
|
|
* |
1826 |
|
|
* DB_InitWriteFiles is called first time a property is written |
1827 |
|
|
* to save the offset of the property index table in the main index. |
1828 |
|
|
* It's simply a ftell() of the current position in the index and that |
1829 |
|
|
* seek position is stored in the main index "offsets" table. |
1830 |
|
|
* |
1831 |
|
|
* DB_WriteProperty writes a property. |
1832 |
|
|
* |
1833 |
|
|
* DB_WritePropPositions write the seek pointers to the main index and |
1834 |
|
|
* *must* be called after processing each file. |
1835 |
|
|
* This is all done in WritePropertiesToDisk(). |
1836 |
|
|
* |
1837 |
|
|
* The index tables are all based on the count of properties in the index. |
1838 |
|
|
* So, to read you find the start of the prop pointers table by the value |
1839 |
|
|
* stored in the offsets table. Since we have a fixed number of properties |
1840 |
|
|
* we know the size of an entry in the prop pointers table, one record per filenum. |
1841 |
|
|
* Index into the prop seek positions table and grab the pointers to the properties. |
1842 |
|
|
* |
1843 |
|
|
* |
1844 |
|
|
*****************************************************************************/ |
1845 |
|
|
|
1846 |
|
|
|
1847 |
|
|
int DB_InitWriteFiles_Native(void *db) |
1848 |
|
|
{ |
1849 |
|
|
#ifndef USE_BTREE |
1850 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1851 |
|
|
|
1852 |
|
|
DB->offsets[FILELISTPOS] = ftell(DB->fp); |
1853 |
|
|
|
1854 |
|
|
#ifdef DEBUG_PROP |
1855 |
|
|
printf("InitWriteFiles: Start of property table in main index at offset: %ld\n", DB->offsets[FILELISTPOS] ); |
1856 |
|
|
#endif |
1857 |
|
|
|
1858 |
|
|
#endif |
1859 |
|
|
|
1860 |
|
|
return 0; |
1861 |
|
|
} |
1862 |
|
|
|
1863 |
|
|
|
1864 |
|
|
/**************************************************************************** |
1865 |
|
|
* Writes a property to the property file |
1866 |
|
|
* |
1867 |
|
|
* Creates a PROP_INDEX structure in the file entry that caches all |
1868 |
|
|
* the seek pointers into the .prop file, if it doesn't already exist. |
1869 |
|
|
* |
1870 |
|
|
* Stores in the fi->prop_index structure the seek address of this property and the physical length |
1871 |
|
|
* |
1872 |
|
|
* Writes to the prop file: |
1873 |
|
|
* <uncompressed length><property (possibly compressed)> |
1874 |
|
|
* |
1875 |
|
|
* |
1876 |
|
|
*****************************************************************************/ |
1877 |
|
|
|
1878 |
|
|
void DB_WriteProperty_Native( IndexFILE *indexf, FileRec *fi, int propID, char *buffer, int buf_len, int uncompressed_len, void *db) |
1879 |
|
|
{ |
1880 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1881 |
|
|
size_t written_bytes; |
1882 |
|
|
PROP_INDEX *pindex = fi->prop_index; |
1883 |
|
|
PROP_LOCATION *prop_loc; |
1884 |
|
|
INDEXDATAHEADER *header = &indexf->header; |
1885 |
|
|
int count = header->property_count; |
1886 |
|
|
int index_size; |
1887 |
|
|
int propIDX = header->metaID_to_PropIDX[propID]; |
1888 |
|
|
#ifdef DEBUG_PROP |
1889 |
|
|
long prop_start_pos; |
1890 |
|
|
#endif |
1891 |
|
|
|
1892 |
|
|
if ( count <= 0 ) |
1893 |
|
|
return; |
1894 |
|
|
|
1895 |
|
|
|
1896 |
|
|
if (!DB->prop) |
1897 |
|
|
progerr("Property database file not opened\n"); |
1898 |
|
|
|
1899 |
|
|
|
1900 |
|
|
/* Create place to store seek positions and lengths on first call for this file */ |
1901 |
|
|
if ( !pindex ) |
1902 |
|
|
{ |
1903 |
|
|
index_size = sizeof( PROP_INDEX ) + sizeof( PROP_LOCATION ) * (count - 1); |
1904 |
|
|
pindex = fi->prop_index = emalloc( index_size ); |
1905 |
|
|
memset( pindex, 0, index_size ); |
1906 |
|
|
} |
1907 |
|
|
|
1908 |
|
|
|
1909 |
|
|
/* make an alias */ |
1910 |
|
|
prop_loc = &pindex->prop_position[ propIDX ]; |
1911 |
|
|
|
1912 |
|
|
|
1913 |
|
|
|
1914 |
|
|
/* write the property to disk */ |
1915 |
|
|
|
1916 |
|
|
if ((prop_loc->seek = ftell(DB->prop)) == -1) |
1917 |
|
|
progerrno("O/S failed to tell me where I am - file number %d metaID %d : ", fi->filenum, propID); |
1918 |
|
|
|
1919 |
|
|
|
1920 |
|
|
/* First write the uncompressed size */ |
1921 |
|
|
compress1( uncompressed_len+1, DB->prop, putc); |
1922 |
|
|
|
1923 |
|
|
#ifdef DEBUG_PROP |
1924 |
|
|
prop_start_pos = ftell(DB->prop); |
1925 |
|
|
#endif |
1926 |
|
|
|
1927 |
|
|
|
1928 |
|
|
|
1929 |
|
|
if ((written_bytes = fwrite(buffer, 1, buf_len, DB->prop)) != buf_len) /* Write data */ |
1930 |
|
|
progerrno("Failed to write file number %d metaID %d to property file. Tried to write %d, wrote %Zu : ", fi->filenum, propID, buf_len, |
1931 |
|
|
written_bytes); |
1932 |
|
|
|
1933 |
|
|
prop_loc->length = buf_len; /* length of this prop */ |
1934 |
|
|
|
1935 |
|
|
|
1936 |
|
|
#ifdef DEBUG_PROP |
1937 |
|
|
printf("Write Prop: file %d PropIDX %d (meta %d) at file offset %ld (data at %ld) <uncompressed_len> %ld bytes <prop> %Zu bytes\n", |
1938 |
|
|
fi->filenum, propIDX, propID, prop_loc->seek, prop_start_pos, prop_start_pos - prop_loc->seek, written_bytes); |
1939 |
|
|
#endif |
1940 |
|
|
} |
1941 |
|
|
|
1942 |
|
|
|
1943 |
|
|
/**************************************************************************** |
1944 |
|
|
* Writes out the seek positions for the properties |
1945 |
|
|
* |
1946 |
|
|
* This writes out a fixed size records, one for each property. Each |
1947 |
|
|
* record is a list of <length>:<seek pos> entries, one for |
1948 |
|
|
* each property defined. Length is null if this file doesn't have a |
1949 |
|
|
* property. |
1950 |
|
|
* |
1951 |
|
|
* The advantage of the fixed width records is that they can be written |
1952 |
|
|
* to disk after each file, saving RAM, and more importanly, all the |
1953 |
|
|
* files don't need to be read when searhing. Can just seek to the |
1954 |
|
|
* file of interest, read the table, then read the property file. |
1955 |
|
|
* |
1956 |
|
|
* This comes at a cost of disk space (and maybe disk access speed), |
1957 |
|
|
* since much of the data in the table written to disk could be compressed. |
1958 |
|
|
* |
1959 |
|
|
* An optional approach would be to only save the seek positions, plus |
1960 |
|
|
* an extra seek position at the end (the next position after the last |
1961 |
|
|
* property. Then could calculate length by comparing the various start |
1962 |
|
|
* positions. |
1963 |
|
|
* |
1964 |
|
|
* For, say, five properties this would save 5 x 4(bytes/int) 20 bytes per |
1965 |
|
|
* file. But we need an extra position, so that's 20 - 4 = 16. So, for |
1966 |
|
|
* 100,000 files that's only 1.6M of disk space. Probably not worth the trouble. |
1967 |
|
|
* |
1968 |
|
|
* |
1969 |
|
|
*****************************************************************************/ |
1970 |
|
|
void DB_WritePropPositions_Native(IndexFILE *indexf, FileRec *fi, void *db) |
1971 |
|
|
{ |
1972 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
1973 |
|
|
PROP_INDEX *pindex = fi->prop_index; |
1974 |
|
|
INDEXDATAHEADER *header = &indexf->header; |
1975 |
|
|
int count = header->property_count; |
1976 |
|
|
int index_size; |
1977 |
|
|
int i; |
1978 |
|
|
#ifdef USE_BTREE |
1979 |
|
|
long seek_pos; |
1980 |
|
|
#endif |
1981 |
|
|
|
1982 |
|
|
|
1983 |
|
|
|
1984 |
|
|
/* Just in case there were no properties for this file */ |
1985 |
|
|
if ( !pindex ) |
1986 |
|
|
{ |
1987 |
|
|
index_size = sizeof( PROP_INDEX ) + sizeof( PROP_LOCATION ) * (count - 1); |
1988 |
|
|
pindex = fi->prop_index = emalloc( index_size ); |
1989 |
|
|
memset( pindex, 0, index_size ); |
1990 |
|
|
} |
1991 |
|
|
|
1992 |
|
|
#ifdef USE_BTREE |
1993 |
|
|
/* now calculate index */ |
1994 |
|
|
seek_pos = ((fi->filenum - 1) * count) * 2; |
1995 |
|
|
#endif |
1996 |
|
|
|
1997 |
|
|
#ifdef DEBUG_PROP |
1998 |
|
|
printf("Writing seek positions to index for file %d\n", fi->filenum ); |
1999 |
|
|
#endif |
2000 |
|
|
|
2001 |
|
|
|
2002 |
|
|
/* Write out the prop index */ |
2003 |
|
|
for ( i = 0; i < count; i++ ) |
2004 |
|
|
{ |
2005 |
|
|
/* make an alias */ |
2006 |
|
|
PROP_LOCATION *prop_loc = &pindex->prop_position[ i ]; |
2007 |
|
|
|
2008 |
|
|
#ifndef USE_BTREE |
2009 |
|
|
|
2010 |
|
|
#ifdef DEBUG_PROP |
2011 |
|
|
printf(" PropIDX: %d length: %ld, seek: %ld main index location: %ld\n", |
2012 |
|
|
i, prop_loc->length, prop_loc->seek, ftell( DB->fp ) ); |
2013 |
|
|
#endif |
2014 |
|
|
|
2015 |
|
|
/* Write in portable format */ |
2016 |
|
|
printlong( DB->fp, prop_loc->length, fwrite ); |
2017 |
|
|
printlong( DB->fp, prop_loc->seek, fwrite ); |
2018 |
|
|
|
2019 |
|
|
|
2020 |
|
|
#else |
2021 |
|
|
ARRAY_Put( DB->props_array,seek_pos++, prop_loc->length); |
2022 |
|
|
ARRAY_Put( DB->props_array,seek_pos++, prop_loc->seek); |
2023 |
|
|
#endif |
2024 |
|
|
} |
2025 |
|
|
|
2026 |
|
|
efree( pindex ); |
2027 |
|
|
fi->prop_index = NULL;; |
2028 |
|
|
} |
2029 |
|
|
|
2030 |
|
|
/**************************************************************************** |
2031 |
|
|
* Reads in the seek positions for the properties |
2032 |
|
|
* |
2033 |
|
|
* |
2034 |
|
|
*****************************************************************************/ |
2035 |
|
|
void DB_ReadPropPositions_Native(IndexFILE *indexf, FileRec *fi, void *db) |
2036 |
|
|
{ |
2037 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
2038 |
|
|
PROP_INDEX *pindex = fi->prop_index; |
2039 |
|
|
INDEXDATAHEADER *header = &indexf->header; |
2040 |
|
|
int count = header->property_count; |
2041 |
|
|
int index_size; |
2042 |
|
|
long seek_pos; |
2043 |
|
|
int i; |
2044 |
|
|
|
2045 |
|
|
if ( count <= 0 ) |
2046 |
|
|
return; |
2047 |
|
|
|
2048 |
|
|
|
2049 |
|
|
/* create a place to store them */ |
2050 |
|
|
|
2051 |
|
|
index_size = sizeof( PROP_INDEX ) + sizeof( PROP_LOCATION ) * (count - 1); |
2052 |
|
|
|
2053 |
|
|
pindex = fi->prop_index = emalloc( index_size ); |
2054 |
|
|
memset( pindex, 0, index_size ); |
2055 |
|
|
|
2056 |
|
|
|
2057 |
|
|
#ifndef USE_BTREE |
2058 |
|
|
/* now calculate seek_pos */ |
2059 |
|
|
// seek_pos = ((fi->filenum - 1) * index_size) + DB->offsets[FILELISTPOS]; |
2060 |
|
|
// printlong currently always writes 4 bytes, so 8 bytes for length and seek |
2061 |
|
|
seek_pos = ((fi->filenum - 1) * 8 * count) + DB->offsets[FILELISTPOS]; |
2062 |
|
|
|
2063 |
|
|
|
2064 |
|
|
/* and seek to table */ |
2065 |
|
|
if (fseek(DB->fp, seek_pos, 0) == -1) |
2066 |
|
|
progerrno("Failed to seek to property index located at %ld for file number %d : ", seek_pos, fi->filenum); |
2067 |
|
|
|
2068 |
|
|
|
2069 |
|
|
#ifdef DEBUG_PROP |
2070 |
|
|
printf("Fetching seek positions for file %d\n", fi->filenum ); |
2071 |
|
|
printf(" property index table at %ld, this file at %ld\n", DB->offsets[FILELISTPOS], seek_pos ); |
2072 |
|
|
#endif |
2073 |
|
|
|
2074 |
|
|
|
2075 |
|
|
/* Read in the prop indexes */ |
2076 |
|
|
for ( i=0; i < count; i++ ) |
2077 |
|
|
{ |
2078 |
|
|
/* make an alias */ |
2079 |
|
|
PROP_LOCATION *prop_loc = &pindex->prop_position[ i ]; |
2080 |
|
|
|
2081 |
|
|
prop_loc->length = readlong( DB->fp, fread ); |
2082 |
|
|
prop_loc->seek = readlong( DB->fp, fread ); |
2083 |
|
|
|
2084 |
|
|
#ifdef DEBUG_PROP |
2085 |
|
|
printf(" PropIDX: %d Length: %ld Seek: %ld\n", i, prop_loc->length, prop_loc->seek ); |
2086 |
|
|
#endif |
2087 |
|
|
|
2088 |
|
|
|
2089 |
|
|
} |
2090 |
|
|
#else |
2091 |
|
|
|
2092 |
|
|
/* now calculate index */ |
2093 |
|
|
seek_pos = ((fi->filenum - 1) * count) * 2; |
2094 |
|
|
|
2095 |
|
|
/* Read in the prop indexes */ |
2096 |
|
|
for ( i=0; i < count; i++ ) |
2097 |
|
|
{ |
2098 |
|
|
/* make an alias */ |
2099 |
|
|
PROP_LOCATION *prop_loc = &pindex->prop_position[ i ]; |
2100 |
|
|
|
2101 |
|
|
prop_loc->length = ARRAY_Get(DB->props_array, seek_pos++); |
2102 |
|
|
prop_loc->seek = ARRAY_Get(DB->props_array, seek_pos++); |
2103 |
|
|
} |
2104 |
|
|
#endif |
2105 |
|
|
} |
2106 |
|
|
|
2107 |
|
|
|
2108 |
|
|
|
2109 |
|
|
/**************************************************************************** |
2110 |
|
|
* Reads a property from the property file |
2111 |
|
|
* |
2112 |
|
|
* Returns: |
2113 |
|
|
* *char (buffer -- must be destoryed by caller) |
2114 |
|
|
* |
2115 |
|
|
*****************************************************************************/ |
2116 |
|
|
char *DB_ReadProperty_Native(IndexFILE *indexf, FileRec *fi, int propID, int *buf_len, int *uncompressed_len, void *db) |
2117 |
|
|
{ |
2118 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
2119 |
|
|
PROP_INDEX *pindex = fi->prop_index; |
2120 |
|
|
INDEXDATAHEADER *header = &indexf->header; |
2121 |
|
|
int count = header->property_count; |
2122 |
|
|
long seek_pos; |
2123 |
|
|
int propIDX; |
2124 |
|
|
PROP_LOCATION *prop_loc; |
2125 |
|
|
char *buffer; |
2126 |
|
|
long length; |
2127 |
|
|
|
2128 |
|
|
|
2129 |
|
|
propIDX = header->metaID_to_PropIDX[propID]; |
2130 |
|
|
|
2131 |
|
|
if ( count <= 0 ) |
2132 |
|
|
return NULL; |
2133 |
|
|
|
2134 |
|
|
|
2135 |
|
|
/* read in the index pointers if not already loaded */ |
2136 |
|
|
if ( !pindex ) |
2137 |
|
|
{ |
2138 |
|
|
DB_ReadPropPositions_Native( indexf, fi, db); |
2139 |
|
|
pindex = fi->prop_index; |
2140 |
|
|
} |
2141 |
|
|
|
2142 |
|
|
|
2143 |
|
|
if ( !pindex ) |
2144 |
|
|
progerr("Failed to call DB_ReadProperty_Native with seek positions"); |
2145 |
|
|
|
2146 |
|
|
prop_loc = &pindex->prop_position[ propIDX ]; |
2147 |
|
|
|
2148 |
|
|
|
2149 |
|
|
|
2150 |
|
|
seek_pos = pindex->prop_position[propIDX].seek; |
2151 |
|
|
length = pindex->prop_position[propIDX].length; |
2152 |
|
|
|
2153 |
|
|
*buf_len = length; /* pass the length back */ |
2154 |
|
|
|
2155 |
|
|
|
2156 |
|
|
/* Any for this metaID? */ |
2157 |
|
|
if (!length ) |
2158 |
|
|
return NULL; |
2159 |
|
|
|
2160 |
|
|
|
2161 |
|
|
|
2162 |
|
|
|
2163 |
|
|
if (fseek(DB->prop, seek_pos, 0) == -1) |
2164 |
|
|
progerrno("Failed to seek to properties located at %ld for file number %d : ", seek_pos, fi->filenum); |
2165 |
|
|
|
2166 |
|
|
#ifdef DEBUG_PROP |
2167 |
|
|
printf("Fetching filenum: %d propIDX: %d at seek: %ld (length is %ld)\n", fi->filenum, propIDX, seek_pos, length); |
2168 |
|
|
#endif |
2169 |
|
|
|
2170 |
|
|
|
2171 |
|
|
/* read uncomprssed size (for use in zlib uncompression) */ |
2172 |
|
|
*uncompressed_len = uncompress1( DB->prop, fgetc ) - 1; |
2173 |
|
|
|
2174 |
|
|
#ifdef DEBUG_PROP |
2175 |
|
|
printf(" Fetched uncompressed length of %d (%ld bytes storage), now fetching %ld prop bytes from %ld\n", |
2176 |
|
|
*uncompressed_len, ftell( DB->prop ) - seek_pos, length, ftell( DB->prop ) ); |
2177 |
|
|
#endif |
2178 |
|
|
|
2179 |
|
|
|
2180 |
|
|
/* allocate a read buffer */ |
2181 |
|
|
buffer = emalloc(length); |
2182 |
|
|
|
2183 |
|
|
|
2184 |
|
|
if (fread(buffer, 1, length, DB->prop) != length) |
2185 |
|
|
progerrno("Failed to read properties located at %ld for file number %d : ", seek_pos, fi->filenum); |
2186 |
|
|
|
2187 |
|
|
return buffer; |
2188 |
|
|
} |
2189 |
|
|
|
2190 |
|
|
|
2191 |
|
|
/**************************************************************** |
2192 |
|
|
* This routine closes the property file and reopens it as |
2193 |
|
|
* readonly to improve seek times. |
2194 |
|
|
* Note: It does not rename the property file. |
2195 |
|
|
*****************************************************************/ |
2196 |
|
|
|
2197 |
|
|
void DB_Reopen_PropertiesForRead_Native(void *db) |
2198 |
|
|
{ |
2199 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
2200 |
|
|
int no_rename = 0; |
2201 |
|
|
char *s = estrdup(DB->cur_prop_file); |
2202 |
|
|
|
2203 |
|
|
|
2204 |
|
|
/* Close property file */ |
2205 |
|
|
DB_Close_File_Native(&DB->prop, &DB->cur_prop_file, &no_rename); |
2206 |
|
|
|
2207 |
|
|
|
2208 |
|
|
if (!(DB->prop = openIndexFILEForRead(s))) |
2209 |
|
|
progerrno("Couldn't open the property file \"%s\": ", s); |
2210 |
|
|
|
2211 |
|
|
DB->cur_prop_file = s; |
2212 |
|
|
} |
2213 |
|
|
|
2214 |
|
|
|
2215 |
|
|
|
2216 |
|
|
#ifdef USE_BTREE |
2217 |
|
|
|
2218 |
|
|
|
2219 |
|
|
int DB_WriteTotalWordsPerFile_Native(SWISH *sw, int idx, int wordcount, void *db) |
2220 |
|
|
{ |
2221 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
2222 |
|
|
|
2223 |
|
|
ARRAY_Put(DB->totwords_array,idx,wordcount); |
2224 |
|
|
|
2225 |
|
|
return 0; |
2226 |
|
|
} |
2227 |
|
|
|
2228 |
|
|
|
2229 |
|
|
int DB_ReadTotalWordsPerFile_Native(SWISH *sw, int index, int *value, void *db) |
2230 |
|
|
{ |
2231 |
|
|
struct Handle_DBNative *DB = (struct Handle_DBNative *) db; |
2232 |
|
|
|
2233 |
|
|
*value = ARRAY_Get((ARRAY *)DB->totwords_array,index); |
2234 |
|
|
return 0; |
2235 |
|
|
} |
2236 |
|
|
|
2237 |
|
|
|
2238 |
|
|
#endif |