/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/config.h
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/config.h

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 /*
2 ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3 ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
4 **
5 ** This program and library is free software; you can redistribute it and/or
6 ** modify it under the terms of the GNU (Library) General Public License
7 ** as published by the Free Software Foundation; either version 2
8 ** of the License, or any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU (Library) General Public License for more details.
14 **
15 ** You should have received a copy of the GNU (Library) General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 **--------------------------------------------------------------------------
19 ** Config file edited by Roy Tennant 2/20/96
20 ** Config file edited by Giulia Hill 2/27/97 to increase lenght of
21 ** words that are indexed
22 ** Added IGNORELASTCHAR
23 ** G. Hill 3/12/97 ghill@library.berkeley.edu
24 **
25 ** Added OKNOMETA to allow no failing in case the META name is
26 ** not listed in the config.h
27 ** G. Hill 4/15/97 ghill@library.berkeley.edu
28 **
29 ** Added IGNOREFIRSTCHAR
30 ** G.Hill 10/16/97 ghill@library.berkeley.edu
31 **-----------------------------------------------------------------------
32 ** The following are user-definable options that you can change
33 ** to fine-tune SWISH's default options.
34 **
35 ** 2001-03-13 rasc moved search boolean words from swish.h
36 **
37 ** 2001-05-23 wsm added ranking weights
38 **
39 */
40
41
42 #ifdef __VMS
43 #define PROPFILE_EXTENSION "_prop"
44 #define WORDDATA_EXTENSION "_wdata"
45 #define PRESORTED_EXTENSION "_psort"
46 #define BTREE_EXTENSION "_btree"
47 #define ARRAY_EXTENSION "_array"
48 #else
49 #define PROPFILE_EXTENSION ".prop"
50 #define WORDDATA_EXTENSION ".wdata"
51 #define PRESORTED_EXTENSION ".psort"
52 #define BTREE_EXTENSION ".btree"
53 #define ARRAY_EXTENSION ".array"
54 #endif
55
56 /* MIN_PROP_COMPRESS_SIZE sets the limit for which properties are compressed
57 * must be compiled with zlib.
58 */
59 #define MIN_PROP_COMPRESS_SIZE 100
60
61 /* This is the character used to replace UTF-8 characters that cannot be
62 * converted to 8859-1 Latin-1 character
63 */
64 #define ENCODE_ERROR_CHAR ' '
65
66 /* Defines the file extension to use on the property file.
67 */
68
69 #define MAX_SORT_STRING_LEN 50
70
71 /* MAX_SORT_STRING_LEN defines the max string length to use
72 * for sorting properties. Should be long enough to sort ALL
73 * file paths or URLs. Useful if using StoreDescription to store
74 * a large amount of text.
75 */
76
77 #define USE_DOCPATH_AS_TITLE 1
78
79 /* If USE_DOCPATH_AS_TITLE is defined then documents that do not have
80 * a title defined (xml and txt, and HTML documents without a title)
81 * will display the document path as the title in results.
82 * Documents without a title will sort as a blank title, and not
83 * by the document path regardless of this setting. This is a change
84 * from versions previous to 2.2.
85 */
86
87 #ifdef __VMS
88 #define USE_TEMPFILE_EXTENSION "_temp"
89 #else
90 #define USE_TEMPFILE_EXTENSION ".temp"
91 #endif
92
93 /* If USE_TMPFILE_EXTENSION is defined then swish will append the supplied
94 * extension onto the index files during indexing, and when indexing is
95 * complete will remove the extension by renaming the files.
96 * This has two important uses when an index file already exists (and is in use):
97 * 1) the old index can be used while indexing is running
98 * 2) a failure during indexing will not destroy the existing index
99 *
100 * Note: This is used instead of a normal temporary file because possible limitation
101 * in renaming across file systems. Therefore, the temporary index files are
102 * stored in the same directory as the final index files.
103 */
104
105 #define TEMP_FILE_PREFIX "swtmp"
106
107 /* TEMP_FILE_PREFIX is prepended to all temporary files. Makes them
108 * easier to find.
109 */
110
111
112 #define ALLOW_HTTP_INDEXING_DATA_SOURCE 1
113 #define ALLOW_FILESYSTEM_INDEXING_DATA_SOURCE 1
114 #define ALLOW_EXTERNAL_PROGRAM_DATA_SOURCE 1
115
116 /* These symbols allow compile-time elimination of indexing
117 ** data sources. Any Data Source that is allowed by these
118 ** symbols can be selected for indexing from the command line.
119 ** Comment out any options you do not want to support, but
120 ** be sure to leave at least one option.
121 */
122
123 #define INDEXPERMS 0644
124
125 /* After SWISH generates an index file, it changes the permissions
126 ** of the file to this mode. Change to the mode you like
127 ** (note that it must be an octal number). If you don't want
128 ** permissions to be changed for you, comment out this line.
129 */
130
131 #define NO_PLIMIT 101
132
133 #define PLIMIT NO_PLIMIT
134 #define FLIMIT 10000
135
136 /* SWISH uses these parameters to automatically mark words as
137 ** being too common while indexing. For instance, if I defined PLIMIT
138 ** as 80 and FLIMIT as 256, SWISH would define a common word as
139 ** a word that occurs in over 80% of all indexed files and over
140 ** 256 files. Making these numbers lower will most likely make your
141 ** index files smaller. Making PLIMIT and FLIMIT small will also
142 ** ensure that searching consumes only so much CPU resources.
143 */
144
145 #define VERBOSE 1
146
147 /* You can define VERBOSE to be a number from 0 to 4. 0 is totally
148 ** silent operation. The default before swish 2.2 was 3
149 */
150
151 #define _AND_WORD "and"
152 #define _OR_WORD "or"
153 #define _NOT_WORD "not"
154
155 /*
156 ** these are the default boolean operator words used by swish search
157 */
158
159 #define DEFAULT_RULE AND_RULE
160
161 /* If a list of search words is specified without booleans,
162 ** SWISH will assume they are connected by a default rule.
163 ** This can be AND_RULE or OR_RULE.
164 */
165
166 #define TITLETOPLINES 12
167
168 /* This is how many lines deep SWISH will look into an HTML file to
169 ** attempt to find a <TITLE> tag. This has no effect when using the libxml2 parser.
170 */
171
172
173 #define MINWORDLIMIT 1
174
175 /* This is the minimum length of a word. Anything shorter will not
176 ** be indexed.
177 ** Do not change it here. Use MinWordLimit in config file
178 */
179
180 #define MAXWORDLIMIT 40
181
182 /* This is the maximum length of a word. Anything longer will not
183 ** be indexed.
184 ** Do not change it here. Use MaxWordLimit in config file
185 */
186
187 #define CONVERTHTMLENTITIES 1
188
189 /* If defined as 1, all entities in indexed
190 ** words will be converted to an ASCII equivalent. For instance,
191 ** with this feature you can index the word "resum&eacute;" or
192 ** "resum&#233;" and it will be indexed as the word "resume".
193 ** 2001-01 Do not change it here. Use ConvertHTMLEtities Yes/No in
194 ** config file
195 */
196
197 #define IGNOREALLV 0
198 #define IGNOREALLC 0
199 #define IGNOREALLN 0
200
201 /* If IGNOREALLV is 1, words containing all vowels won't be indexed.
202 ** If IGNOREALLC is 1, words containing all consonants won't be indexed.
203 ** If IGNOREALLN is 1, words containing all digits won't be indexed.
204 ** Define as 0 to allow words with consistent characters.
205 ** Vowels are defined as "aeiou", digits are "0123456789".
206 */
207
208 #define IGNOREROWV 60
209 #define IGNOREROWC 60
210 #define IGNOREROWN 60
211
212 /* IGNOREROWV is the maximum number of consecutive vowels a word can have.
213 ** IGNOREROWC is the maximum number of consecutive consonants a word can have.
214 ** IGNOREROWN is the maximum number of consecutive digits a word can have.
215 ** Vowels are defined as "aeiou", digits are "0123456789".
216 */
217
218 #define IGNORESAME 100
219
220 /* IGNORESAME is the maximum times a character can repeat in a word.
221 */
222 /* Dec 6, 2001 - Grabbed "letters" from /usr/local/share/aspell/iso8859-1.dat (http://aspell.sf.net) - moseley */
223 #define WORDCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
224
225 /*
226 #define WORDCHARS "abcdefghijklmnopqrstuvwxyzÁÂÃÈýÊËÌÐÝÞÍðÎÏÒÓÔÕØÙÛîèãõšœ€ßƒŠŒŽøŸ£ÜžíÀ0123456789"
227 */
228
229 /* WORDCHARS is a string of characters which SWISH permits to
230 ** be in words. Words are defined by these characters.
231 **
232 ** Also note that if you specify the backslash character (\) or
233 ** double quote (") you need to type a backslash before them to
234 ** make the compiler understand them.
235 */
236
237 #define BEGINCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
238
239 /* Of the characters that you decide can go into words, this is
240 ** a list of characters that words can begin with. It should be
241 ** a subset of (or equal to) WORDCHARS.
242 */
243
244 #define ENDCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
245
246 /* This is the same as BEGINCHARS, except you're testing for
247 ** valid characters at the ends of words.
248 */
249
250 #define IGNORELASTCHAR ""
251
252 /* Array that contains the char that, if considered valid in the middle of
253 ** a word need to be disreguarded when at the end. It is important to also
254 ** set the given char's in the ENDCHARS array, otherwise the word will not
255 ** be indexed because considered invalid.
256 ** If none just leave the empty list "". Do not erase the line.
257 */
258
259 #define IGNOREFIRSTCHAR ""
260
261 /* Array that contains the char that, if considered valid in the middle of
262 ** a word need to be disreguarded when at the beginning. It is important to also
263 ** set the given char's in the BEGINCHARS array, otherwise the word will not
264 ** be indexed because considered invalid.
265 ** If none just leave the empty list "". Do not erase the line.
266 */
267
268 #define IGNORE_STOPWORDS_IN_QUERY 1
269
270 /* Added JM 1/10/98. Setting this to 0 (default) causes a stopword in
271 ** an AND_RULE search to create an empty result. Setting it to 1 simply
272 ** ignores the stopwords and does a search on the remaining words.
273 */
274
275 #define INDEXTAGS 0
276
277 /* Normally, all data in tags in HTML files (except for words in
278 ** comments or meta tags) is ignored. If you want to index HTML files with the
279 ** text within tags and all, define this to be 1 and not 0.
280 ** NOTE: if you set it to 1 you will not be able to do context nor
281 ** metaNames searches, as tags are just plain text with no specific
282 ** meaning.
283 */
284
285 // #define BLANK_PROP_VALUE " *BLANK*"
286
287 /* This effects how blank properties are stored
288 ** Normally, blank properties are treated as if they were not even contained int
289 ** the document. That is:
290 ** <meta name="author" content="">
291 ** is ignored, and no "author" property is stored for that docment.
292 ** If BLANK_PROP_VALUE is set, then blank properties will be stored
293 ** but using the string provided as the property value.
294 ** If you use a leading space, then these properties will sort
295 ** before other properties (since leading whitespace is removed from
296 ** properties), and after documents that do not include the property
297 */
298
299 #define RANK_TITLE 4
300 #define RANK_HEADER 3
301 #define RANK_META 3
302 #define RANK_COMMENTS 1
303 #define RANK_EMPHASIZED 0
304
305 /* This symbols affect the weights applied during ranking. Note that they are added
306 ** together and added to a base rank of 1.0 -- thus defining a rank with a value of
307 ** 2.0 really means it is ranked (1.0 + 2.0) times greater than normal.
308 ** A value of 0.0 applies no additional ranking boost. Note that RANK_COMMENTS only
309 ** applies if you are indexing comment. Be sure you understand how these interact
310 ** in getrank; don't just go changing these values!
311 */
312
313 #define SPIDERDIRECTORY "./"
314
315 #define SWAP_LOC_DEFAULT 0
316
317 /* 2001/08 jmruiz -- Default chunk size - Index will work with blocks of files. This number specifies when to coalesce locations to save memory */
318 #define INDEX_DEFAULT_CHUNK_SIZE 10000
319
320 /* 2001/08 jmruiz -- Default optimal zone size for temporal storage of locations */
321 /* 1<<23 is 8 MB */
322 #define INDEX_DEFAULT_OPTIMAL_CHUNK_ZONE_SIZE_FOR_LOCATIONS 1<<23
323
324 /* 2002/06 Number of swap loc files (-e) */
325 #define MAX_LOC_SWAP_FILES 377
326
327 /* 2001/08 jmruiz -- To avoid emalloc/erealloc in some routines some stack arrays have been added. This is their default size */
328 #define MAX_STACK_POSITIONS 1024
329
330 /* 2001/08 jmruiz -- Do not change this (it must be a unsigned number) */
331 /* This is the maximum size of a block of coalesced locations */
332 #define COALESCE_BUFFER_MAX_SIZE 1<<18 /* (256 KB) */
333
334 /* 2001/08 jmruiz -- File System sort flag - 0 means that filenames
335 ** will not be indexed - 1 means that filenames will be indexed */
336 #define SORT_FILENAMES 0
337
338 /* 2001/10 jmruiz -- Added BTREE schema to store words */
339
340 //#define USE_BTREE
341
342
343 /* 09/00 Jose Ruiz. When set to 1 part of the info is swapped to disk
344 ** to save memory in the index proccess
345 ** Do not change it. You can activate this option through the command
346 ** line (option -e)
347 */
348
349 /* Set this to 1 if you are compiling under Win32
350 define _WIN32 1
351 */
352
353 /* --- BEGIN PORTING-RELATED SYMBOLS --- */
354
355 #ifdef _WIN32
356 #define NO_SYMBOLIC_FILE_LINKS /* Win32 has no symbolic links */
357 #endif
358
359 #ifdef __VMS
360 #define NO_SYMBOLIC_FILE_LINKS /* VMS has no symbolic links */
361 #endif
362
363 #ifdef _WIN32
364 #undef INDEXPERMS /* Win32 version doesn't use chmod() */
365 #endif
366
367 #ifdef _WIN32
368 typedef int pid_t; /* process ID */
369 #endif
370
371 //#ifdef _WIN32
372 //#define TMPDIR "c:\\windows\\temp"
373 //#elif defined(__VMS)
374 //#define TMPDIR "sys$scratch:"
375 //#else
376 //#define TMPDIR "/var/tmp"
377 //#endif
378
379
380 /* Default Delimiter of phrase search */
381 #define PHRASE_DELIMITER_CHAR '"'
382
383
384 /*
385 * Binary files must be open with the "b" option under Win32, so all
386 * fopen() calls to index files have to go through these routines to
387 * keep the code portable.
388 * Note: text files should be opened normally, without the "b" option,
389 * otherwise end-of-line processing is not done correctly (on Win32).
390 */
391 #define F_READ_BINARY "rb"
392 #define F_WRITE_BINARY "wb"
393 #define F_READWRITE_BINARY "rb+"
394
395 #define F_READ_TEXT "r"
396 #define F_WRITE_TEXT "w"
397 #define F_READWRITE_TEXT "r+"
398
399
400
401 /* #define NEXTSTEP */
402
403 /* You may need to define this if compiling on a NeXTstep machine.
404 */
405
406 /* --- END PORTING-RELATED SYMBOLS --- */
407

  ViewVC Help
Powered by ViewVC 1.1.22