1 |
/* |
2 |
$Id: index.h,v 1.52 2002/08/29 13:45:39 jmruiz Exp $ |
3 |
** |
4 |
** This program and library is free software; you can redistribute it and/or |
5 |
** modify it under the terms of the GNU (Library) General Public License |
6 |
** as published by the Free Software Foundation; either version 2 |
7 |
** of the License, or any later version. |
8 |
** |
9 |
** This program is distributed in the hope that it will be useful, |
10 |
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
** GNU (Library) General Public License for more details. |
13 |
** |
14 |
** You should have received a copy of the GNU (Library) General Public License |
15 |
** along with this program; if not, write to the Free Software |
16 |
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 |
*/ |
18 |
|
19 |
#ifndef __HasSeenModule_Index |
20 |
#define __HasSeenModule_Index 1 |
21 |
|
22 |
struct dev_ino |
23 |
{ |
24 |
dev_t dev; |
25 |
ino_t ino; |
26 |
struct dev_ino *next; |
27 |
}; |
28 |
|
29 |
struct IgnoreLimitPositions |
30 |
{ |
31 |
int n; /* Number of entries per file */ |
32 |
int *pos; /* Store metaID1,position1, metaID2,position2 ..... */ |
33 |
}; |
34 |
|
35 |
/* This is used to build a list of the metaIDs that are currently in scope when indexing words */ |
36 |
|
37 |
typedef struct |
38 |
{ |
39 |
int *array; /* list of metaIDs that need to be indexed */ |
40 |
int max; /* max size of table */ |
41 |
int num; /* number in list */ |
42 |
int defaultID; /* default metaID (should always be one, I suppose) */ |
43 |
} |
44 |
METAIDTABLE; |
45 |
|
46 |
|
47 |
/* |
48 |
-- module data |
49 |
*/ |
50 |
|
51 |
|
52 |
struct MOD_Index |
53 |
{ |
54 |
/* entry vars */ |
55 |
METAIDTABLE metaIDtable; |
56 |
ENTRYARRAY *entryArray; |
57 |
ENTRY *hashentries[VERYBIGHASHSIZE]; |
58 |
char hashentriesdirty[VERYBIGHASHSIZE]; /* just a 0/1 flag */ |
59 |
|
60 |
/* Compression Work buffer while compression locations in index ** proccess */ |
61 |
unsigned char *compression_buffer; |
62 |
int len_compression_buffer; |
63 |
|
64 |
unsigned char *worddata_buffer; /* Buffer to store worddata */ |
65 |
int len_worddata_buffer; /* Max size of the buffer */ |
66 |
int sz_worddata_buffer; /* Space being used in worddata_buffer */ |
67 |
|
68 |
/* File counter */ |
69 |
int filenum; |
70 |
|
71 |
/* index tmp (both FS and HTTP methods) */ |
72 |
char *tmpdir; |
73 |
|
74 |
/* Filenames of the swap files */ |
75 |
char *swap_location_name[MAX_LOC_SWAP_FILES]; /* Location info file */ |
76 |
|
77 |
/* handlers for both files */ |
78 |
FILE *fp_loc_write[MAX_LOC_SWAP_FILES]; /* Location (writing) */ |
79 |
FILE *fp_loc_read[MAX_LOC_SWAP_FILES]; /* Location (reading) */ |
80 |
|
81 |
struct dev_ino *inode_hash[BIGHASHSIZE]; |
82 |
|
83 |
/* Buffers used by indexstring */ |
84 |
int lenswishword; |
85 |
char *swishword; |
86 |
int lenword; |
87 |
char *word; |
88 |
|
89 |
/* Economic mode (-e) */ |
90 |
int swap_locdata; /* swap location data */ |
91 |
|
92 |
/* Pointer to swap functions */ |
93 |
long (*swap_tell) (FILE *); |
94 |
size_t(*swap_write) (const void *, size_t, size_t, FILE *); |
95 |
int (*swap_seek) (FILE *, long, int); |
96 |
size_t(*swap_read) (void *, size_t, size_t, FILE *); |
97 |
int (*swap_close) (FILE *); |
98 |
int (*swap_putc) (int, FILE *); |
99 |
int (*swap_getc) (FILE *); |
100 |
|
101 |
/* IgnoreLimit option values */ |
102 |
int plimit; |
103 |
int flimit; |
104 |
/* Number of words from IgnoreLimit */ |
105 |
int nIgnoreLimitWords; |
106 |
/* Positions from stopwords from IgnoreLimit */ |
107 |
struct IgnoreLimitPositions **IgnoreLimitPositionsArray; |
108 |
|
109 |
/* Index in blocks of chunk_size files */ |
110 |
int chunk_size; |
111 |
|
112 |
/* Variable to control the size of the zone used for store locations during chunk proccesing */ |
113 |
int optimalChunkLocZoneSize; |
114 |
|
115 |
/* variable to handle free memory space for locations inside currentChunkLocZone */ |
116 |
|
117 |
LOCATION *freeLocMemChain; |
118 |
|
119 |
MEM_ZONE *perDocTmpZone; |
120 |
MEM_ZONE *currentChunkLocZone; |
121 |
MEM_ZONE *totalLocZone; |
122 |
MEM_ZONE *entryZone; |
123 |
}; |
124 |
|
125 |
void initModule_Index(SWISH *); |
126 |
void freeModule_Index(SWISH *); |
127 |
int configModule_Index(SWISH *, StringList *); |
128 |
|
129 |
|
130 |
void do_index_file(SWISH * sw, FileProp * fprop); |
131 |
|
132 |
ENTRY *getentry(SWISH * , char *); |
133 |
void addentry(SWISH *, ENTRY *, int, int, int, int); |
134 |
|
135 |
void addCommonProperties(SWISH * sw, FileProp * fprop, FileRec * fi, char *title, char *summary, int start); |
136 |
|
137 |
|
138 |
int getfilecount(IndexFILE *); |
139 |
|
140 |
int getNumberOfIgnoreLimitWords(SWISH *); |
141 |
void getPositionsFromIgnoreLimitWords(SWISH * sw); |
142 |
|
143 |
char *ruleparse(SWISH *, char *); |
144 |
void stripIgnoreFirstChars(INDEXDATAHEADER *, char *); |
145 |
void stripIgnoreLastChars(INDEXDATAHEADER *, char *); |
146 |
|
147 |
#define isIgnoreFirstChar(header,c) (header)->ignorefirstcharlookuptable[(int)((unsigned char)c)] |
148 |
#define isIgnoreLastChar(header,c) (header)->ignorelastcharlookuptable[(int)((unsigned char)c)] |
149 |
#define isBumpPositionCounterChar(header,c) (header)->bumpposcharslookuptable[(int)((unsigned char)c)] |
150 |
|
151 |
|
152 |
void computehashentry(ENTRY **, ENTRY *); |
153 |
|
154 |
void sort_words(SWISH *, IndexFILE *); |
155 |
void sortChunkLocations(SWISH *, IndexFILE *, ENTRY *); |
156 |
|
157 |
int indexstring(SWISH * sw, char *s, int filenum, int structure, int numMetaNames, int *metaID, int *position); |
158 |
|
159 |
void addsummarytofile(IndexFILE *, int, char *); |
160 |
|
161 |
void BuildSortedArrayOfWords(SWISH *, IndexFILE *); |
162 |
|
163 |
|
164 |
|
165 |
void PrintHeaderLookupTable(int ID, int table[], int table_size, FILE * fp); |
166 |
void coalesce_all_word_locations(SWISH * sw, IndexFILE * indexf); |
167 |
void coalesce_word_locations(SWISH * sw, IndexFILE * indexf, ENTRY * e); |
168 |
|
169 |
void adjustWordPositions(unsigned char *worddata, int *sz_worddata, int n_files, struct IgnoreLimitPositions **ilp); |
170 |
|
171 |
#endif |