1 |
/* |
2 |
** This program and library is free software; you can redistribute it and/or |
3 |
** modify it under the terms of the GNU (Library) General Public License |
4 |
** as published by the Free Software Foundation; either version 2 |
5 |
** of the License, or any later version. |
6 |
** |
7 |
** This program is distributed in the hope that it will be useful, |
8 |
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 |
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 |
** GNU (Library) General Public License for more details. |
11 |
** |
12 |
** You should have received a copy of the GNU (Library) General Public License |
13 |
** along with this program; if not, write to the Free Software |
14 |
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
15 |
**----------------------------------------------------------------- |
16 |
** |
17 |
** Virtual Array Code. |
18 |
** 11/2001 jmruiz - The intention of this routines is storing and reading |
19 |
** elemnts of arrays of long numbers avoiding the |
20 |
** allocation in memory of the total array. In other words, |
21 |
** if we need to read only 10 elements of the array, we |
22 |
** will must try to make the minimal I/O memory and disk |
23 |
** operations. |
24 |
** |
25 |
** To do that, the data is stored in aligned pages in disk |
26 |
** Also, a simple cache system is used to speed I/O file |
27 |
** operations. |
28 |
** |
29 |
** The virtual array is extensible. In other words, you can |
30 |
** add elements whenever you want |
31 |
** |
32 |
** Main routines: |
33 |
** |
34 |
** ARRAY *ARRAY_Create(FILE *fp) |
35 |
** Creates a virtual array. Returns the handle of the array |
36 |
** |
37 |
** ARRAY *ARRAY_Open(FILE *fp, unsigned long root_page) |
38 |
** Opens an existent Virtual Array. root_page is de value returned by |
39 |
** Array_Close. Returns de handle of the array. |
40 |
** |
41 |
** unsigned long ARRAY_Close(ARRAY *arr) |
42 |
** Closes and frees memory. arr is the value returned by ARRAY_Create or |
43 |
** ARRAY_Open. Returns the root page of the array. This value must be |
44 |
** |
45 |
** int ARRAY_Put(ARRAY *arr, int index, unsigned long value) |
46 |
** Writes the array element arr[index]=value to the virtual array |
47 |
** |
48 |
** unsigned long ARRAY_Get(ARRAY *arr, int index) |
49 |
** Reads the array element index. Returns de value arr[index] |
50 |
** |
51 |
*/ |
52 |
|
53 |
#include <stdio.h> |
54 |
#include <stdlib.h> |
55 |
#include <string.h> |
56 |
|
57 |
/* |
58 |
#define STANDALONE |
59 |
#define DEBUG |
60 |
*/ |
61 |
|
62 |
#ifdef STANDALONE |
63 |
|
64 |
/* Rourtines to make long integers portable */ |
65 |
unsigned long PACKLONG(unsigned long num) |
66 |
{ |
67 |
unsigned long _i = 0L; |
68 |
unsigned char *_s; |
69 |
|
70 |
if (num) |
71 |
{ |
72 |
_s = (unsigned char *) &_i; |
73 |
_s[0] = (unsigned char) ((num >> 24) & 0xFF); |
74 |
_s[1] = (unsigned char) ((num >> 16) & 0xFF); |
75 |
_s[2] = (unsigned char) ((num >> 8) & 0xFF); |
76 |
_s[3] = (unsigned char) (num & 0xFF); |
77 |
return _i; |
78 |
} |
79 |
return num; |
80 |
} |
81 |
|
82 |
|
83 |
unsigned long UNPACKLONG(unsigned long num) |
84 |
{ |
85 |
unsigned char *_s = (unsigned char *) # |
86 |
|
87 |
return (_s[0] << 24) + (_s[1] << 16) + (_s[2] << 8) + _s[3]; |
88 |
} |
89 |
|
90 |
|
91 |
static int num = 0; |
92 |
char *emalloc(unsigned int size) |
93 |
{ |
94 |
num++; |
95 |
return malloc(size); |
96 |
} |
97 |
|
98 |
void efree(void *p) |
99 |
{ |
100 |
num--; |
101 |
free(p); |
102 |
} |
103 |
|
104 |
|
105 |
#else |
106 |
|
107 |
#include "mem.h" |
108 |
|
109 |
unsigned long PACKLONG(unsigned long num); |
110 |
unsigned long UNPACKLONG(unsigned long num); |
111 |
unsigned char *compress3(int num, unsigned char *buffer); |
112 |
int uncompress2(unsigned char **buffer); |
113 |
|
114 |
#endif /* STANDALONE */ |
115 |
|
116 |
|
117 |
#include "array.h" |
118 |
|
119 |
|
120 |
/* A ARRAY page size */ |
121 |
#define ARRAY_PageSize 4096 |
122 |
|
123 |
#define SizeInt32 4 |
124 |
|
125 |
/* Round to ARRAY_PageSize */ |
126 |
#define ARRAY_RoundPageSize(n) (((n) + ARRAY_PageSize - 1) & (~(ARRAY_PageSize - 1))) |
127 |
|
128 |
#define ARRAY_PageHeaderSize (1 * SizeInt32) |
129 |
|
130 |
#define ARRAY_PageData(pg) ((pg)->data + ARRAY_PageHeaderSize) |
131 |
#define ARRAY_Data(pg,i) (ARRAY_PageData((pg)) + (i) * SizeInt32) |
132 |
|
133 |
#define ARRAY_SetNextPage(pg,num) ( *(int *)((pg)->data + 0 * SizeInt32) = PACKLONG(num)) |
134 |
|
135 |
#define ARRAY_GetNextPage(pg,num) ( (num) = UNPACKLONG(*(int *)((pg)->data + 0 * SizeInt32))) |
136 |
|
137 |
|
138 |
int ARRAY_WritePageToDisk(FILE *fp, ARRAY_Page *pg) |
139 |
{ |
140 |
ARRAY_SetNextPage(pg,pg->next); |
141 |
fseek(fp,pg->page_number * ARRAY_PageSize,SEEK_SET); |
142 |
return fwrite(pg->data,ARRAY_PageSize,1,fp); |
143 |
} |
144 |
|
145 |
int ARRAY_WritePage(ARRAY *b, ARRAY_Page *pg) |
146 |
{ |
147 |
int hash = pg->page_number % ARRAY_CACHE_SIZE; |
148 |
ARRAY_Page *tmp; |
149 |
pg->modified =1; |
150 |
if((tmp = b->cache[hash])) |
151 |
{ |
152 |
while(tmp) |
153 |
{ |
154 |
if(tmp->page_number == pg->page_number) |
155 |
{ |
156 |
return 0; |
157 |
} |
158 |
tmp = tmp->next_cache; |
159 |
} |
160 |
} |
161 |
pg->next_cache = b->cache[hash]; |
162 |
b->cache[hash] = pg; |
163 |
return 0; |
164 |
} |
165 |
|
166 |
int ARRAY_FlushCache(ARRAY *b) |
167 |
{ |
168 |
int i; |
169 |
ARRAY_Page *tmp, *next; |
170 |
for(i = 0; i < ARRAY_CACHE_SIZE; i++) |
171 |
{ |
172 |
if((tmp = b->cache[i])) |
173 |
{ |
174 |
while(tmp) |
175 |
{ |
176 |
#ifdef DEBUG |
177 |
if(tmp->in_use) |
178 |
{ |
179 |
printf("DEBUG Error in FlushCache: Page in use\n"); |
180 |
exit(0); |
181 |
} |
182 |
#endif |
183 |
next = tmp->next_cache; |
184 |
if(tmp->modified) |
185 |
{ |
186 |
ARRAY_WritePageToDisk(b->fp, tmp); |
187 |
tmp->modified = 0; |
188 |
} |
189 |
if(tmp != b->cache[i]) |
190 |
efree(tmp); |
191 |
|
192 |
tmp = next; |
193 |
} |
194 |
b->cache[i]->next_cache = NULL; |
195 |
} |
196 |
} |
197 |
return 0; |
198 |
} |
199 |
|
200 |
int ARRAY_CleanCache(ARRAY *b) |
201 |
{ |
202 |
int i; |
203 |
ARRAY_Page *tmp,*next; |
204 |
for(i = 0; i < ARRAY_CACHE_SIZE; i++) |
205 |
{ |
206 |
if((tmp = b->cache[i])) |
207 |
{ |
208 |
while(tmp) |
209 |
{ |
210 |
next = tmp->next_cache; |
211 |
efree(tmp); |
212 |
tmp = next; |
213 |
} |
214 |
b->cache[i] = NULL; |
215 |
} |
216 |
} |
217 |
return 0; |
218 |
} |
219 |
|
220 |
ARRAY_Page *ARRAY_ReadPageFromDisk(FILE *fp, unsigned long page_number) |
221 |
{ |
222 |
ARRAY_Page *pg = (ARRAY_Page *)emalloc(sizeof(ARRAY_Page) + ARRAY_PageSize); |
223 |
|
224 |
fseek(fp,page_number * ARRAY_PageSize,SEEK_SET); |
225 |
fread(pg->data,ARRAY_PageSize, 1, fp); |
226 |
|
227 |
ARRAY_GetNextPage(pg,pg->next); |
228 |
|
229 |
pg->page_number = page_number; |
230 |
pg->modified = 0; |
231 |
return pg; |
232 |
} |
233 |
|
234 |
ARRAY_Page *ARRAY_ReadPage(ARRAY *b, unsigned long page_number) |
235 |
{ |
236 |
int hash = page_number % ARRAY_CACHE_SIZE; |
237 |
ARRAY_Page *tmp; |
238 |
if((tmp = b->cache[hash])) |
239 |
{ |
240 |
while(tmp) |
241 |
{ |
242 |
if(tmp->page_number == page_number) |
243 |
{ |
244 |
return tmp; |
245 |
} |
246 |
tmp = tmp->next_cache; |
247 |
} |
248 |
} |
249 |
|
250 |
tmp = ARRAY_ReadPageFromDisk(b->fp, page_number); |
251 |
tmp->modified = 0; |
252 |
tmp->in_use = 1; |
253 |
tmp->next_cache = b->cache[hash]; |
254 |
b->cache[hash] = tmp; |
255 |
return tmp; |
256 |
} |
257 |
|
258 |
ARRAY_Page *ARRAY_NewPage(ARRAY *b) |
259 |
{ |
260 |
ARRAY_Page *pg; |
261 |
long offset; |
262 |
FILE *fp = b->fp; |
263 |
int hash; |
264 |
int size = ARRAY_PageSize; |
265 |
|
266 |
/* Get file pointer */ |
267 |
if(fseek(fp,0,SEEK_END) !=0) |
268 |
{ |
269 |
printf("mal\n"); |
270 |
} |
271 |
offset = ftell(fp); |
272 |
/* Round up file pointer */ |
273 |
offset = ARRAY_RoundPageSize(offset); |
274 |
|
275 |
/* Set new file pointer - data will be aligned */ |
276 |
if(fseek(fp,offset, SEEK_SET)!=0 || offset != ftell(fp)) |
277 |
{ |
278 |
printf("mal\n"); |
279 |
} |
280 |
|
281 |
pg = (ARRAY_Page *)emalloc(sizeof(ARRAY_Page) + size); |
282 |
memset(pg,0,sizeof(ARRAY_Page) + size); |
283 |
/* Reserve space in file */ |
284 |
if(fwrite(pg->data,1,size,fp)!=size || ((long)size + offset) != ftell(fp)) |
285 |
{ |
286 |
printf("mal\n"); |
287 |
} |
288 |
|
289 |
pg->next = 0; |
290 |
|
291 |
pg->page_number = offset/ARRAY_PageSize; |
292 |
|
293 |
/* add to cache */ |
294 |
pg->modified = 1; |
295 |
pg->in_use = 1; |
296 |
hash = pg->page_number % ARRAY_CACHE_SIZE; |
297 |
pg->next_cache = b->cache[hash]; |
298 |
b->cache[hash] = pg; |
299 |
return pg; |
300 |
} |
301 |
|
302 |
void ARRAY_FreePage(ARRAY *b, ARRAY_Page *pg) |
303 |
{ |
304 |
int hash = pg->page_number % ARRAY_CACHE_SIZE; |
305 |
ARRAY_Page *tmp; |
306 |
|
307 |
tmp = b->cache[hash]; |
308 |
|
309 |
#ifdef DEBUG |
310 |
if(!(tmp = b->cache[hash])) |
311 |
{ |
312 |
/* This should never happen!!!! */ |
313 |
printf("Error in FreePage\n"); |
314 |
exit(0); |
315 |
} |
316 |
#endif |
317 |
|
318 |
while(tmp) |
319 |
{ |
320 |
if (tmp->page_number != pg->page_number) |
321 |
tmp = tmp->next_cache; |
322 |
else |
323 |
{ |
324 |
tmp->in_use = 0; |
325 |
break; |
326 |
} |
327 |
} |
328 |
} |
329 |
|
330 |
ARRAY *ARRAY_New(FILE *fp, unsigned int size) |
331 |
{ |
332 |
ARRAY *b; |
333 |
int i; |
334 |
b = (ARRAY *) emalloc(sizeof(ARRAY)); |
335 |
b->page_size = size; |
336 |
b->fp = fp; |
337 |
for(i = 0; i < ARRAY_CACHE_SIZE; i++) |
338 |
b->cache[i] = NULL; |
339 |
|
340 |
return b; |
341 |
} |
342 |
|
343 |
ARRAY *ARRAY_Create(FILE *fp) |
344 |
{ |
345 |
ARRAY *b; |
346 |
ARRAY_Page *root; |
347 |
int size = ARRAY_PageSize; |
348 |
|
349 |
b = ARRAY_New(fp , size); |
350 |
root = ARRAY_NewPage(b); |
351 |
|
352 |
b->root_page = root->page_number; |
353 |
|
354 |
ARRAY_WritePage(b, root); |
355 |
ARRAY_FreePage(b, root); |
356 |
|
357 |
return b; |
358 |
} |
359 |
|
360 |
|
361 |
ARRAY *ARRAY_Open(FILE *fp, unsigned long root_page) |
362 |
{ |
363 |
ARRAY *b; |
364 |
int size = ARRAY_PageSize; |
365 |
|
366 |
b = ARRAY_New(fp , size); |
367 |
|
368 |
b->root_page = root_page; |
369 |
|
370 |
return b; |
371 |
} |
372 |
|
373 |
unsigned long ARRAY_Close(ARRAY *bt) |
374 |
{ |
375 |
unsigned long root_page = bt->root_page; |
376 |
ARRAY_FlushCache(bt); |
377 |
ARRAY_CleanCache(bt); |
378 |
efree(bt); |
379 |
return root_page; |
380 |
} |
381 |
|
382 |
|
383 |
int ARRAY_Put(ARRAY *b, int index, unsigned long value) |
384 |
{ |
385 |
unsigned long next_page; |
386 |
ARRAY_Page *root_page, *tmp = NULL, *prev; |
387 |
int i, hash, page_reads, page_index; |
388 |
|
389 |
page_reads = index / ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
390 |
hash = page_reads % ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
391 |
page_reads /= ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
392 |
page_index = index % ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
393 |
|
394 |
root_page = ARRAY_ReadPage(b, b->root_page); |
395 |
next_page = UNPACKLONG(*(unsigned long *)ARRAY_Data(root_page, hash)); |
396 |
|
397 |
prev = NULL; |
398 |
for(i = 0; i <= page_reads; i++) |
399 |
{ |
400 |
if(!next_page) |
401 |
{ |
402 |
tmp = ARRAY_NewPage(b); |
403 |
ARRAY_WritePage(b,tmp); |
404 |
if(!i) |
405 |
{ |
406 |
*(unsigned long *)ARRAY_Data(root_page,hash) = PACKLONG(tmp->page_number); |
407 |
ARRAY_WritePage(b,root_page); |
408 |
} |
409 |
else |
410 |
{ |
411 |
prev->next = tmp->page_number; |
412 |
ARRAY_WritePage(b,prev); |
413 |
} |
414 |
} |
415 |
else |
416 |
{ |
417 |
tmp = ARRAY_ReadPage(b, next_page); |
418 |
} |
419 |
if(prev) |
420 |
ARRAY_FreePage(b,prev); |
421 |
prev = tmp; |
422 |
next_page = tmp->next; |
423 |
} |
424 |
*(unsigned long *)ARRAY_Data(tmp,page_index) = PACKLONG(value); |
425 |
ARRAY_WritePage(b,tmp); |
426 |
ARRAY_FreePage(b,tmp); |
427 |
ARRAY_FreePage(b,root_page); |
428 |
|
429 |
return 0; |
430 |
} |
431 |
|
432 |
|
433 |
unsigned long ARRAY_Get(ARRAY *b, int index) |
434 |
{ |
435 |
unsigned long next_page, value; |
436 |
ARRAY_Page *root_page, *tmp; |
437 |
int i, hash, page_reads, page_index; |
438 |
|
439 |
page_reads = index / ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
440 |
hash = page_reads % ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
441 |
page_reads /= ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
442 |
page_index = index % ((ARRAY_PageSize - ARRAY_PageHeaderSize) / SizeInt32); |
443 |
|
444 |
root_page = ARRAY_ReadPage(b, b->root_page); |
445 |
next_page = UNPACKLONG(*(unsigned long *)ARRAY_Data(root_page, hash)); |
446 |
|
447 |
tmp = NULL; |
448 |
for(i = 0; i <= page_reads; i++) |
449 |
{ |
450 |
if(tmp) |
451 |
ARRAY_FreePage(b, tmp); |
452 |
if(!next_page) |
453 |
{ |
454 |
ARRAY_FreePage(b,root_page); |
455 |
return 0L; |
456 |
} |
457 |
else |
458 |
{ |
459 |
tmp = ARRAY_ReadPage(b, next_page); |
460 |
} |
461 |
next_page = tmp->next; |
462 |
} |
463 |
value = UNPACKLONG(*(unsigned long *)ARRAY_Data(tmp,page_index)); |
464 |
ARRAY_FreePage(b,tmp); |
465 |
ARRAY_FreePage(b,root_page); |
466 |
|
467 |
return value; |
468 |
} |
469 |
|
470 |
|
471 |
|
472 |
#ifdef DEBUG |
473 |
|
474 |
#include <time.h> |
475 |
|
476 |
#define N_TEST 50000000 |
477 |
|
478 |
#define F_READ_BINARY "rb" |
479 |
#define F_WRITE_BINARY "wb" |
480 |
#define F_READWRITE_BINARY "rb+" |
481 |
|
482 |
#define F_READ_TEXT "r" |
483 |
#define F_WRITE_TEXT "w" |
484 |
#define F_READWRITE_TEXT "r+" |
485 |
|
486 |
int main() |
487 |
{ |
488 |
FILE *fp; |
489 |
ARRAY *bt; |
490 |
int i; |
491 |
static unsigned long nums[N_TEST]; |
492 |
unsigned long root_page; |
493 |
srand(time(NULL)); |
494 |
|
495 |
|
496 |
|
497 |
fp = fopen("kkkkk",F_WRITE_BINARY); |
498 |
fclose(fp); |
499 |
fp = fopen("kkkkk",F_READWRITE_BINARY); |
500 |
|
501 |
fwrite("aaa",1,3,fp); |
502 |
|
503 |
printf("\n\nIndexing\n\n"); |
504 |
|
505 |
bt = ARRAY_Create(fp); |
506 |
for(i=0;i<N_TEST;i++) |
507 |
{ |
508 |
nums[i] = rand(); |
509 |
// nums[i]=i; |
510 |
ARRAY_Put(bt,i,nums[i]); |
511 |
if(nums[i]!= ARRAY_Get(bt,i)) |
512 |
printf("\n\nmal %d\n\n",i); |
513 |
if(!(i%1000)) |
514 |
{ |
515 |
ARRAY_FlushCache(bt); |
516 |
printf("%d \r",i); |
517 |
} |
518 |
} |
519 |
|
520 |
root_page = ARRAY_Close(bt); |
521 |
fclose(fp); |
522 |
|
523 |
printf("\n\nUnfreed %d\n\n",num); |
524 |
printf("\n\nSearching\n\n"); |
525 |
|
526 |
fp = fopen("kkkkk",F_READ_BINARY); |
527 |
bt = ARRAY_Open(fp, root_page); |
528 |
|
529 |
for(i=0;i<N_TEST;i++) |
530 |
{ |
531 |
if(nums[i] != ARRAY_Get(bt,i)) |
532 |
printf("\n\nmal %d\n\n",i); |
533 |
if(!(i%1000)) |
534 |
printf("%d \r",i); |
535 |
} |
536 |
|
537 |
root_page = ARRAY_Close(bt); |
538 |
|
539 |
fclose(fp); |
540 |
printf("\n\nUnfreed %d\n\n",num); |
541 |
|
542 |
} |
543 |
|
544 |
#endif |