/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/config.h
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/config.h

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
File MIME type: text/plain
Initial revision

1 adcroft 1.1 /*
2     ** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3     ** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
4     **
5     ** This program and library is free software; you can redistribute it and/or
6     ** modify it under the terms of the GNU (Library) General Public License
7     ** as published by the Free Software Foundation; either version 2
8     ** of the License, or any later version.
9     **
10     ** This program is distributed in the hope that it will be useful,
11     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     ** GNU (Library) General Public License for more details.
14     **
15     ** You should have received a copy of the GNU (Library) General Public License
16     ** along with this program; if not, write to the Free Software
17     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18     **--------------------------------------------------------------------------
19     ** Config file edited by Roy Tennant 2/20/96
20     ** Config file edited by Giulia Hill 2/27/97 to increase lenght of
21     ** words that are indexed
22     ** Added IGNORELASTCHAR
23     ** G. Hill 3/12/97 ghill@library.berkeley.edu
24     **
25     ** Added OKNOMETA to allow no failing in case the META name is
26     ** not listed in the config.h
27     ** G. Hill 4/15/97 ghill@library.berkeley.edu
28     **
29     ** Added IGNOREFIRSTCHAR
30     ** G.Hill 10/16/97 ghill@library.berkeley.edu
31     **-----------------------------------------------------------------------
32     ** The following are user-definable options that you can change
33     ** to fine-tune SWISH's default options.
34     **
35     ** 2001-03-13 rasc moved search boolean words from swish.h
36     **
37     ** 2001-05-23 wsm added ranking weights
38     **
39     */
40    
41    
42     #ifdef __VMS
43     #define PROPFILE_EXTENSION "_prop"
44     #define WORDDATA_EXTENSION "_wdata"
45     #define PRESORTED_EXTENSION "_psort"
46     #define BTREE_EXTENSION "_btree"
47     #define ARRAY_EXTENSION "_array"
48     #else
49     #define PROPFILE_EXTENSION ".prop"
50     #define WORDDATA_EXTENSION ".wdata"
51     #define PRESORTED_EXTENSION ".psort"
52     #define BTREE_EXTENSION ".btree"
53     #define ARRAY_EXTENSION ".array"
54     #endif
55    
56     /* MIN_PROP_COMPRESS_SIZE sets the limit for which properties are compressed
57     * must be compiled with zlib.
58     */
59     #define MIN_PROP_COMPRESS_SIZE 100
60    
61     /* This is the character used to replace UTF-8 characters that cannot be
62     * converted to 8859-1 Latin-1 character
63     */
64     #define ENCODE_ERROR_CHAR ' '
65    
66     /* Defines the file extension to use on the property file.
67     */
68    
69     #define MAX_SORT_STRING_LEN 50
70    
71     /* MAX_SORT_STRING_LEN defines the max string length to use
72     * for sorting properties. Should be long enough to sort ALL
73     * file paths or URLs. Useful if using StoreDescription to store
74     * a large amount of text.
75     */
76    
77     #define USE_DOCPATH_AS_TITLE 1
78    
79     /* If USE_DOCPATH_AS_TITLE is defined then documents that do not have
80     * a title defined (xml and txt, and HTML documents without a title)
81     * will display the document path as the title in results.
82     * Documents without a title will sort as a blank title, and not
83     * by the document path regardless of this setting. This is a change
84     * from versions previous to 2.2.
85     */
86    
87     #ifdef __VMS
88     #define USE_TEMPFILE_EXTENSION "_temp"
89     #else
90     #define USE_TEMPFILE_EXTENSION ".temp"
91     #endif
92    
93     /* If USE_TMPFILE_EXTENSION is defined then swish will append the supplied
94     * extension onto the index files during indexing, and when indexing is
95     * complete will remove the extension by renaming the files.
96     * This has two important uses when an index file already exists (and is in use):
97     * 1) the old index can be used while indexing is running
98     * 2) a failure during indexing will not destroy the existing index
99     *
100     * Note: This is used instead of a normal temporary file because possible limitation
101     * in renaming across file systems. Therefore, the temporary index files are
102     * stored in the same directory as the final index files.
103     */
104    
105     #define TEMP_FILE_PREFIX "swtmp"
106    
107     /* TEMP_FILE_PREFIX is prepended to all temporary files. Makes them
108     * easier to find.
109     */
110    
111    
112     #define ALLOW_HTTP_INDEXING_DATA_SOURCE 1
113     #define ALLOW_FILESYSTEM_INDEXING_DATA_SOURCE 1
114     #define ALLOW_EXTERNAL_PROGRAM_DATA_SOURCE 1
115    
116     /* These symbols allow compile-time elimination of indexing
117     ** data sources. Any Data Source that is allowed by these
118     ** symbols can be selected for indexing from the command line.
119     ** Comment out any options you do not want to support, but
120     ** be sure to leave at least one option.
121     */
122    
123     #define INDEXPERMS 0644
124    
125     /* After SWISH generates an index file, it changes the permissions
126     ** of the file to this mode. Change to the mode you like
127     ** (note that it must be an octal number). If you don't want
128     ** permissions to be changed for you, comment out this line.
129     */
130    
131     #define NO_PLIMIT 101
132    
133     #define PLIMIT NO_PLIMIT
134     #define FLIMIT 10000
135    
136     /* SWISH uses these parameters to automatically mark words as
137     ** being too common while indexing. For instance, if I defined PLIMIT
138     ** as 80 and FLIMIT as 256, SWISH would define a common word as
139     ** a word that occurs in over 80% of all indexed files and over
140     ** 256 files. Making these numbers lower will most likely make your
141     ** index files smaller. Making PLIMIT and FLIMIT small will also
142     ** ensure that searching consumes only so much CPU resources.
143     */
144    
145     #define VERBOSE 1
146    
147     /* You can define VERBOSE to be a number from 0 to 4. 0 is totally
148     ** silent operation. The default before swish 2.2 was 3
149     */
150    
151     #define _AND_WORD "and"
152     #define _OR_WORD "or"
153     #define _NOT_WORD "not"
154    
155     /*
156     ** these are the default boolean operator words used by swish search
157     */
158    
159     #define DEFAULT_RULE AND_RULE
160    
161     /* If a list of search words is specified without booleans,
162     ** SWISH will assume they are connected by a default rule.
163     ** This can be AND_RULE or OR_RULE.
164     */
165    
166     #define TITLETOPLINES 12
167    
168     /* This is how many lines deep SWISH will look into an HTML file to
169     ** attempt to find a <TITLE> tag. This has no effect when using the libxml2 parser.
170     */
171    
172    
173     #define MINWORDLIMIT 1
174    
175     /* This is the minimum length of a word. Anything shorter will not
176     ** be indexed.
177     ** Do not change it here. Use MinWordLimit in config file
178     */
179    
180     #define MAXWORDLIMIT 40
181    
182     /* This is the maximum length of a word. Anything longer will not
183     ** be indexed.
184     ** Do not change it here. Use MaxWordLimit in config file
185     */
186    
187     #define CONVERTHTMLENTITIES 1
188    
189     /* If defined as 1, all entities in indexed
190     ** words will be converted to an ASCII equivalent. For instance,
191     ** with this feature you can index the word "resum&eacute;" or
192     ** "resum&#233;" and it will be indexed as the word "resume".
193     ** 2001-01 Do not change it here. Use ConvertHTMLEtities Yes/No in
194     ** config file
195     */
196    
197     #define IGNOREALLV 0
198     #define IGNOREALLC 0
199     #define IGNOREALLN 0
200    
201     /* If IGNOREALLV is 1, words containing all vowels won't be indexed.
202     ** If IGNOREALLC is 1, words containing all consonants won't be indexed.
203     ** If IGNOREALLN is 1, words containing all digits won't be indexed.
204     ** Define as 0 to allow words with consistent characters.
205     ** Vowels are defined as "aeiou", digits are "0123456789".
206     */
207    
208     #define IGNOREROWV 60
209     #define IGNOREROWC 60
210     #define IGNOREROWN 60
211    
212     /* IGNOREROWV is the maximum number of consecutive vowels a word can have.
213     ** IGNOREROWC is the maximum number of consecutive consonants a word can have.
214     ** IGNOREROWN is the maximum number of consecutive digits a word can have.
215     ** Vowels are defined as "aeiou", digits are "0123456789".
216     */
217    
218     #define IGNORESAME 100
219    
220     /* IGNORESAME is the maximum times a character can repeat in a word.
221     */
222     /* Dec 6, 2001 - Grabbed "letters" from /usr/local/share/aspell/iso8859-1.dat (http://aspell.sf.net) - moseley */
223     #define WORDCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
224    
225     /*
226     #define WORDCHARS "abcdefghijklmnopqrstuvwxyzÁÂÃÈýÊËÌÐÝÞÍðÎÏÒÓÔÕØÙÛîèãõšœ€ßƒŠŒŽøŸ£ÜžíÀ0123456789"
227     */
228    
229     /* WORDCHARS is a string of characters which SWISH permits to
230     ** be in words. Words are defined by these characters.
231     **
232     ** Also note that if you specify the backslash character (\) or
233     ** double quote (") you need to type a backslash before them to
234     ** make the compiler understand them.
235     */
236    
237     #define BEGINCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
238    
239     /* Of the characters that you decide can go into words, this is
240     ** a list of characters that words can begin with. It should be
241     ** a subset of (or equal to) WORDCHARS.
242     */
243    
244     #define ENDCHARS "0123456789abcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
245    
246     /* This is the same as BEGINCHARS, except you're testing for
247     ** valid characters at the ends of words.
248     */
249    
250     #define IGNORELASTCHAR ""
251    
252     /* Array that contains the char that, if considered valid in the middle of
253     ** a word need to be disreguarded when at the end. It is important to also
254     ** set the given char's in the ENDCHARS array, otherwise the word will not
255     ** be indexed because considered invalid.
256     ** If none just leave the empty list "". Do not erase the line.
257     */
258    
259     #define IGNOREFIRSTCHAR ""
260    
261     /* Array that contains the char that, if considered valid in the middle of
262     ** a word need to be disreguarded when at the beginning. It is important to also
263     ** set the given char's in the BEGINCHARS array, otherwise the word will not
264     ** be indexed because considered invalid.
265     ** If none just leave the empty list "". Do not erase the line.
266     */
267    
268     #define IGNORE_STOPWORDS_IN_QUERY 1
269    
270     /* Added JM 1/10/98. Setting this to 0 (default) causes a stopword in
271     ** an AND_RULE search to create an empty result. Setting it to 1 simply
272     ** ignores the stopwords and does a search on the remaining words.
273     */
274    
275     #define INDEXTAGS 0
276    
277     /* Normally, all data in tags in HTML files (except for words in
278     ** comments or meta tags) is ignored. If you want to index HTML files with the
279     ** text within tags and all, define this to be 1 and not 0.
280     ** NOTE: if you set it to 1 you will not be able to do context nor
281     ** metaNames searches, as tags are just plain text with no specific
282     ** meaning.
283     */
284    
285     // #define BLANK_PROP_VALUE " *BLANK*"
286    
287     /* This effects how blank properties are stored
288     ** Normally, blank properties are treated as if they were not even contained int
289     ** the document. That is:
290     ** <meta name="author" content="">
291     ** is ignored, and no "author" property is stored for that docment.
292     ** If BLANK_PROP_VALUE is set, then blank properties will be stored
293     ** but using the string provided as the property value.
294     ** If you use a leading space, then these properties will sort
295     ** before other properties (since leading whitespace is removed from
296     ** properties), and after documents that do not include the property
297     */
298    
299     #define RANK_TITLE 4
300     #define RANK_HEADER 3
301     #define RANK_META 3
302     #define RANK_COMMENTS 1
303     #define RANK_EMPHASIZED 0
304    
305     /* This symbols affect the weights applied during ranking. Note that they are added
306     ** together and added to a base rank of 1.0 -- thus defining a rank with a value of
307     ** 2.0 really means it is ranked (1.0 + 2.0) times greater than normal.
308     ** A value of 0.0 applies no additional ranking boost. Note that RANK_COMMENTS only
309     ** applies if you are indexing comment. Be sure you understand how these interact
310     ** in getrank; don't just go changing these values!
311     */
312    
313     #define SPIDERDIRECTORY "./"
314    
315     #define SWAP_LOC_DEFAULT 0
316    
317     /* 2001/08 jmruiz -- Default chunk size - Index will work with blocks of files. This number specifies when to coalesce locations to save memory */
318     #define INDEX_DEFAULT_CHUNK_SIZE 10000
319    
320     /* 2001/08 jmruiz -- Default optimal zone size for temporal storage of locations */
321     /* 1<<23 is 8 MB */
322     #define INDEX_DEFAULT_OPTIMAL_CHUNK_ZONE_SIZE_FOR_LOCATIONS 1<<23
323    
324     /* 2002/06 Number of swap loc files (-e) */
325     #define MAX_LOC_SWAP_FILES 377
326    
327     /* 2001/08 jmruiz -- To avoid emalloc/erealloc in some routines some stack arrays have been added. This is their default size */
328     #define MAX_STACK_POSITIONS 1024
329    
330     /* 2001/08 jmruiz -- Do not change this (it must be a unsigned number) */
331     /* This is the maximum size of a block of coalesced locations */
332     #define COALESCE_BUFFER_MAX_SIZE 1<<18 /* (256 KB) */
333    
334     /* 2001/08 jmruiz -- File System sort flag - 0 means that filenames
335     ** will not be indexed - 1 means that filenames will be indexed */
336     #define SORT_FILENAMES 0
337    
338     /* 2001/10 jmruiz -- Added BTREE schema to store words */
339    
340     //#define USE_BTREE
341    
342    
343     /* 09/00 Jose Ruiz. When set to 1 part of the info is swapped to disk
344     ** to save memory in the index proccess
345     ** Do not change it. You can activate this option through the command
346     ** line (option -e)
347     */
348    
349     /* Set this to 1 if you are compiling under Win32
350     define _WIN32 1
351     */
352    
353     /* --- BEGIN PORTING-RELATED SYMBOLS --- */
354    
355     #ifdef _WIN32
356     #define NO_SYMBOLIC_FILE_LINKS /* Win32 has no symbolic links */
357     #endif
358    
359     #ifdef __VMS
360     #define NO_SYMBOLIC_FILE_LINKS /* VMS has no symbolic links */
361     #endif
362    
363     #ifdef _WIN32
364     #undef INDEXPERMS /* Win32 version doesn't use chmod() */
365     #endif
366    
367     #ifdef _WIN32
368     typedef int pid_t; /* process ID */
369     #endif
370    
371     //#ifdef _WIN32
372     //#define TMPDIR "c:\\windows\\temp"
373     //#elif defined(__VMS)
374     //#define TMPDIR "sys$scratch:"
375     //#else
376     //#define TMPDIR "/var/tmp"
377     //#endif
378    
379    
380     /* Default Delimiter of phrase search */
381     #define PHRASE_DELIMITER_CHAR '"'
382    
383    
384     /*
385     * Binary files must be open with the "b" option under Win32, so all
386     * fopen() calls to index files have to go through these routines to
387     * keep the code portable.
388     * Note: text files should be opened normally, without the "b" option,
389     * otherwise end-of-line processing is not done correctly (on Win32).
390     */
391     #define F_READ_BINARY "rb"
392     #define F_WRITE_BINARY "wb"
393     #define F_READWRITE_BINARY "rb+"
394    
395     #define F_READ_TEXT "r"
396     #define F_WRITE_TEXT "w"
397     #define F_READWRITE_TEXT "r+"
398    
399    
400    
401     /* #define NEXTSTEP */
402    
403     /* You may need to define this if compiling on a NeXTstep machine.
404     */
405    
406     /* --- END PORTING-RELATED SYMBOLS --- */
407    

  ViewVC Help
Powered by ViewVC 1.1.22