1 |
adcroft |
1.1 |
/* |
2 |
|
|
$Id: index.h,v 1.52 2002/08/29 13:45:39 jmruiz Exp $ |
3 |
|
|
** |
4 |
|
|
** This program and library is free software; you can redistribute it and/or |
5 |
|
|
** modify it under the terms of the GNU (Library) General Public License |
6 |
|
|
** as published by the Free Software Foundation; either version 2 |
7 |
|
|
** of the License, or any later version. |
8 |
|
|
** |
9 |
|
|
** This program is distributed in the hope that it will be useful, |
10 |
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
|
|
** GNU (Library) General Public License for more details. |
13 |
|
|
** |
14 |
|
|
** You should have received a copy of the GNU (Library) General Public License |
15 |
|
|
** along with this program; if not, write to the Free Software |
16 |
|
|
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#ifndef __HasSeenModule_Index |
20 |
|
|
#define __HasSeenModule_Index 1 |
21 |
|
|
|
22 |
|
|
struct dev_ino |
23 |
|
|
{ |
24 |
|
|
dev_t dev; |
25 |
|
|
ino_t ino; |
26 |
|
|
struct dev_ino *next; |
27 |
|
|
}; |
28 |
|
|
|
29 |
|
|
struct IgnoreLimitPositions |
30 |
|
|
{ |
31 |
|
|
int n; /* Number of entries per file */ |
32 |
|
|
int *pos; /* Store metaID1,position1, metaID2,position2 ..... */ |
33 |
|
|
}; |
34 |
|
|
|
35 |
|
|
/* This is used to build a list of the metaIDs that are currently in scope when indexing words */ |
36 |
|
|
|
37 |
|
|
typedef struct |
38 |
|
|
{ |
39 |
|
|
int *array; /* list of metaIDs that need to be indexed */ |
40 |
|
|
int max; /* max size of table */ |
41 |
|
|
int num; /* number in list */ |
42 |
|
|
int defaultID; /* default metaID (should always be one, I suppose) */ |
43 |
|
|
} |
44 |
|
|
METAIDTABLE; |
45 |
|
|
|
46 |
|
|
|
47 |
|
|
/* |
48 |
|
|
-- module data |
49 |
|
|
*/ |
50 |
|
|
|
51 |
|
|
|
52 |
|
|
struct MOD_Index |
53 |
|
|
{ |
54 |
|
|
/* entry vars */ |
55 |
|
|
METAIDTABLE metaIDtable; |
56 |
|
|
ENTRYARRAY *entryArray; |
57 |
|
|
ENTRY *hashentries[VERYBIGHASHSIZE]; |
58 |
|
|
char hashentriesdirty[VERYBIGHASHSIZE]; /* just a 0/1 flag */ |
59 |
|
|
|
60 |
|
|
/* Compression Work buffer while compression locations in index ** proccess */ |
61 |
|
|
unsigned char *compression_buffer; |
62 |
|
|
int len_compression_buffer; |
63 |
|
|
|
64 |
|
|
unsigned char *worddata_buffer; /* Buffer to store worddata */ |
65 |
|
|
int len_worddata_buffer; /* Max size of the buffer */ |
66 |
|
|
int sz_worddata_buffer; /* Space being used in worddata_buffer */ |
67 |
|
|
|
68 |
|
|
/* File counter */ |
69 |
|
|
int filenum; |
70 |
|
|
|
71 |
|
|
/* index tmp (both FS and HTTP methods) */ |
72 |
|
|
char *tmpdir; |
73 |
|
|
|
74 |
|
|
/* Filenames of the swap files */ |
75 |
|
|
char *swap_location_name[MAX_LOC_SWAP_FILES]; /* Location info file */ |
76 |
|
|
|
77 |
|
|
/* handlers for both files */ |
78 |
|
|
FILE *fp_loc_write[MAX_LOC_SWAP_FILES]; /* Location (writing) */ |
79 |
|
|
FILE *fp_loc_read[MAX_LOC_SWAP_FILES]; /* Location (reading) */ |
80 |
|
|
|
81 |
|
|
struct dev_ino *inode_hash[BIGHASHSIZE]; |
82 |
|
|
|
83 |
|
|
/* Buffers used by indexstring */ |
84 |
|
|
int lenswishword; |
85 |
|
|
char *swishword; |
86 |
|
|
int lenword; |
87 |
|
|
char *word; |
88 |
|
|
|
89 |
|
|
/* Economic mode (-e) */ |
90 |
|
|
int swap_locdata; /* swap location data */ |
91 |
|
|
|
92 |
|
|
/* Pointer to swap functions */ |
93 |
|
|
long (*swap_tell) (FILE *); |
94 |
|
|
size_t(*swap_write) (const void *, size_t, size_t, FILE *); |
95 |
|
|
int (*swap_seek) (FILE *, long, int); |
96 |
|
|
size_t(*swap_read) (void *, size_t, size_t, FILE *); |
97 |
|
|
int (*swap_close) (FILE *); |
98 |
|
|
int (*swap_putc) (int, FILE *); |
99 |
|
|
int (*swap_getc) (FILE *); |
100 |
|
|
|
101 |
|
|
/* IgnoreLimit option values */ |
102 |
|
|
int plimit; |
103 |
|
|
int flimit; |
104 |
|
|
/* Number of words from IgnoreLimit */ |
105 |
|
|
int nIgnoreLimitWords; |
106 |
|
|
/* Positions from stopwords from IgnoreLimit */ |
107 |
|
|
struct IgnoreLimitPositions **IgnoreLimitPositionsArray; |
108 |
|
|
|
109 |
|
|
/* Index in blocks of chunk_size files */ |
110 |
|
|
int chunk_size; |
111 |
|
|
|
112 |
|
|
/* Variable to control the size of the zone used for store locations during chunk proccesing */ |
113 |
|
|
int optimalChunkLocZoneSize; |
114 |
|
|
|
115 |
|
|
/* variable to handle free memory space for locations inside currentChunkLocZone */ |
116 |
|
|
|
117 |
|
|
LOCATION *freeLocMemChain; |
118 |
|
|
|
119 |
|
|
MEM_ZONE *perDocTmpZone; |
120 |
|
|
MEM_ZONE *currentChunkLocZone; |
121 |
|
|
MEM_ZONE *totalLocZone; |
122 |
|
|
MEM_ZONE *entryZone; |
123 |
|
|
}; |
124 |
|
|
|
125 |
|
|
void initModule_Index(SWISH *); |
126 |
|
|
void freeModule_Index(SWISH *); |
127 |
|
|
int configModule_Index(SWISH *, StringList *); |
128 |
|
|
|
129 |
|
|
|
130 |
|
|
void do_index_file(SWISH * sw, FileProp * fprop); |
131 |
|
|
|
132 |
|
|
ENTRY *getentry(SWISH * , char *); |
133 |
|
|
void addentry(SWISH *, ENTRY *, int, int, int, int); |
134 |
|
|
|
135 |
|
|
void addCommonProperties(SWISH * sw, FileProp * fprop, FileRec * fi, char *title, char *summary, int start); |
136 |
|
|
|
137 |
|
|
|
138 |
|
|
int getfilecount(IndexFILE *); |
139 |
|
|
|
140 |
|
|
int getNumberOfIgnoreLimitWords(SWISH *); |
141 |
|
|
void getPositionsFromIgnoreLimitWords(SWISH * sw); |
142 |
|
|
|
143 |
|
|
char *ruleparse(SWISH *, char *); |
144 |
|
|
void stripIgnoreFirstChars(INDEXDATAHEADER *, char *); |
145 |
|
|
void stripIgnoreLastChars(INDEXDATAHEADER *, char *); |
146 |
|
|
|
147 |
|
|
#define isIgnoreFirstChar(header,c) (header)->ignorefirstcharlookuptable[(int)((unsigned char)c)] |
148 |
|
|
#define isIgnoreLastChar(header,c) (header)->ignorelastcharlookuptable[(int)((unsigned char)c)] |
149 |
|
|
#define isBumpPositionCounterChar(header,c) (header)->bumpposcharslookuptable[(int)((unsigned char)c)] |
150 |
|
|
|
151 |
|
|
|
152 |
|
|
void computehashentry(ENTRY **, ENTRY *); |
153 |
|
|
|
154 |
|
|
void sort_words(SWISH *, IndexFILE *); |
155 |
|
|
void sortChunkLocations(SWISH *, IndexFILE *, ENTRY *); |
156 |
|
|
|
157 |
|
|
int indexstring(SWISH * sw, char *s, int filenum, int structure, int numMetaNames, int *metaID, int *position); |
158 |
|
|
|
159 |
|
|
void addsummarytofile(IndexFILE *, int, char *); |
160 |
|
|
|
161 |
|
|
void BuildSortedArrayOfWords(SWISH *, IndexFILE *); |
162 |
|
|
|
163 |
|
|
|
164 |
|
|
|
165 |
|
|
void PrintHeaderLookupTable(int ID, int table[], int table_size, FILE * fp); |
166 |
|
|
void coalesce_all_word_locations(SWISH * sw, IndexFILE * indexf); |
167 |
|
|
void coalesce_word_locations(SWISH * sw, IndexFILE * indexf, ENTRY * e); |
168 |
|
|
|
169 |
|
|
void adjustWordPositions(unsigned char *worddata, int *sz_worddata, int n_files, struct IgnoreLimitPositions **ilp); |
170 |
|
|
|
171 |
|
|
#endif |