/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**--------------------------------------------------------------------------
** Config file edited by Roy Tennant 2/20/96
** Config file edited by Giulia Hill 2/27/97 to increase lenght of
** words that are indexed
** Added IGNORELASTCHAR
** G. Hill 3/12/97 ghill@library.berkeley.edu
**
** Added OKNOMETA to allow no failing in case the META name is
** not listed in the config.h
** G. Hill 4/15/97 ghill@library.berkeley.edu
**
** Added IGNOREFIRSTCHAR
** G.Hill 10/16/97 ghill@library.berkeley.edu
**-----------------------------------------------------------------------
** The following are user-definable options that you can change
** to fine-tune SWISH's default options.
**
** 2001-03-13 rasc moved search boolean words from swish.h
**
** 2001-05-23 wsm added ranking weights
**
*/
#ifdef __VMS
#define PROPFILE_EXTENSION "_prop"
#define WORDDATA_EXTENSION "_wdata"
#define PRESORTED_EXTENSION "_psort"
#define BTREE_EXTENSION "_btree"
#define ARRAY_EXTENSION "_array"
#else
#define PROPFILE_EXTENSION ".prop"
#define WORDDATA_EXTENSION ".wdata"
#define PRESORTED_EXTENSION ".psort"
#define BTREE_EXTENSION ".btree"
#define ARRAY_EXTENSION ".array"
#endif
/* MIN_PROP_COMPRESS_SIZE sets the limit for which properties are compressed
* must be compiled with zlib.
*/
#define MIN_PROP_COMPRESS_SIZE 100
/* This is the character used to replace UTF-8 characters that cannot be
* converted to 8859-1 Latin-1 character
*/
#define ENCODE_ERROR_CHAR ' '
/* Defines the file extension to use on the property file.
*/
#define MAX_SORT_STRING_LEN 50
/* MAX_SORT_STRING_LEN defines the max string length to use
* for sorting properties. Should be long enough to sort ALL
* file paths or URLs. Useful if using StoreDescription to store
* a large amount of text.
*/
#define USE_DOCPATH_AS_TITLE 1
/* If USE_DOCPATH_AS_TITLE is defined then documents that do not have
* a title defined (xml and txt, and HTML documents without a title)
* will display the document path as the title in results.
* Documents without a title will sort as a blank title, and not
* by the document path regardless of this setting. This is a change
* from versions previous to 2.2.
*/
#ifdef __VMS
#define USE_TEMPFILE_EXTENSION "_temp"
#else
#define USE_TEMPFILE_EXTENSION ".temp"
#endif
/* If USE_TMPFILE_EXTENSION is defined then swish will append the supplied
* extension onto the index files during indexing, and when indexing is
* complete will remove the extension by renaming the files.
* This has two important uses when an index file already exists (and is in use):
* 1) the old index can be used while indexing is running
* 2) a failure during indexing will not destroy the existing index
*
* Note: This is used instead of a normal temporary file because possible limitation
* in renaming across file systems. Therefore, the temporary index files are
* stored in the same directory as the final index files.
*/
#define TEMP_FILE_PREFIX "swtmp"
/* TEMP_FILE_PREFIX is prepended to all temporary files. Makes them
* easier to find.
*/
#define ALLOW_HTTP_INDEXING_DATA_SOURCE 1
#define ALLOW_FILESYSTEM_INDEXING_DATA_SOURCE 1
#define ALLOW_EXTERNAL_PROGRAM_DATA_SOURCE 1
/* These symbols allow compile-time elimination of indexing
** data sources. Any Data Source that is allowed by these
** symbols can be selected for indexing from the command line.
** Comment out any options you do not want to support, but
** be sure to leave at least one option.
*/
#define INDEXPERMS 0644
/* After SWISH generates an index file, it changes the permissions
** of the file to this mode. Change to the mode you like
** (note that it must be an octal number). If you don't want
** permissions to be changed for you, comment out this line.
*/
#define NO_PLIMIT 101
#define PLIMIT NO_PLIMIT
#define FLIMIT 10000
/* SWISH uses these parameters to automatically mark words as
** being too common while indexing. For instance, if I defined PLIMIT
** as 80 and FLIMIT as 256, SWISH would define a common word as
** a word that occurs in over 80% of all indexed files and over
** 256 files. Making these numbers lower will most likely make your
** index files smaller. Making PLIMIT and FLIMIT small will also
** ensure that searching consumes only so much CPU resources.
*/
#define VERBOSE 1
/* You can define VERBOSE to be a number from 0 to 4. 0 is totally
** silent operation. The default before swish 2.2 was 3
*/
#define _AND_WORD "and"
#define _OR_WORD "or"
#define _NOT_WORD "not"
/*
** these are the default boolean operator words used by swish search
*/
#define DEFAULT_RULE AND_RULE
/* If a list of search words is specified without booleans,
** SWISH will assume they are connected by a default rule.
** This can be AND_RULE or OR_RULE.
*/
#define TITLETOPLINES 12
/* This is how many lines deep SWISH will look into an HTML file to
** attempt to find a
tag. This has no effect when using the libxml2 parser.
*/
#define MINWORDLIMIT 1
/* This is the minimum length of a word. Anything shorter will not
** be indexed.
** Do not change it here. Use MinWordLimit in config file
*/
#define MAXWORDLIMIT 40
/* This is the maximum length of a word. Anything longer will not
** be indexed.
** Do not change it here. Use MaxWordLimit in config file
*/
#define CONVERTHTMLENTITIES 1
/* If defined as 1, all entities in indexed
** words will be converted to an ASCII equivalent. For instance,
** with this feature you can index the word "resumé" or
** "resumé" and it will be indexed as the word "resume".
** 2001-01 Do not change it here. Use ConvertHTMLEtities Yes/No in
** config file
*/
#define IGNOREALLV 0
#define IGNOREALLC 0
#define IGNOREALLN 0
/* If IGNOREALLV is 1, words containing all vowels won't be indexed.
** If IGNOREALLC is 1, words containing all consonants won't be indexed.
** If IGNOREALLN is 1, words containing all digits won't be indexed.
** Define as 0 to allow words with consistent characters.
** Vowels are defined as "aeiou", digits are "0123456789".
*/
#define IGNOREROWV 60
#define IGNOREROWC 60
#define IGNOREROWN 60
/* IGNOREROWV is the maximum number of consecutive vowels a word can have.
** IGNOREROWC is the maximum number of consecutive consonants a word can have.
** IGNOREROWN is the maximum number of consecutive digits a word can have.
** Vowels are defined as "aeiou", digits are "0123456789".
*/
#define IGNORESAME 100
/* IGNORESAME is the maximum times a character can repeat in a word.
*/
/* Dec 6, 2001 - Grabbed "letters" from /usr/local/share/aspell/iso8859-1.dat (http://aspell.sf.net) - moseley */
#define WORDCHARS "0123456789abcdefghijklmnopqrstuvwxyz"
/*
#define WORDCHARS "abcdefghijklmnopqrstuvwxyz߃ܞ0123456789"
*/
/* WORDCHARS is a string of characters which SWISH permits to
** be in words. Words are defined by these characters.
**
** Also note that if you specify the backslash character (\) or
** double quote (") you need to type a backslash before them to
** make the compiler understand them.
*/
#define BEGINCHARS "0123456789abcdefghijklmnopqrstuvwxyz"
/* Of the characters that you decide can go into words, this is
** a list of characters that words can begin with. It should be
** a subset of (or equal to) WORDCHARS.
*/
#define ENDCHARS "0123456789abcdefghijklmnopqrstuvwxyz"
/* This is the same as BEGINCHARS, except you're testing for
** valid characters at the ends of words.
*/
#define IGNORELASTCHAR ""
/* Array that contains the char that, if considered valid in the middle of
** a word need to be disreguarded when at the end. It is important to also
** set the given char's in the ENDCHARS array, otherwise the word will not
** be indexed because considered invalid.
** If none just leave the empty list "". Do not erase the line.
*/
#define IGNOREFIRSTCHAR ""
/* Array that contains the char that, if considered valid in the middle of
** a word need to be disreguarded when at the beginning. It is important to also
** set the given char's in the BEGINCHARS array, otherwise the word will not
** be indexed because considered invalid.
** If none just leave the empty list "". Do not erase the line.
*/
#define IGNORE_STOPWORDS_IN_QUERY 1
/* Added JM 1/10/98. Setting this to 0 (default) causes a stopword in
** an AND_RULE search to create an empty result. Setting it to 1 simply
** ignores the stopwords and does a search on the remaining words.
*/
#define INDEXTAGS 0
/* Normally, all data in tags in HTML files (except for words in
** comments or meta tags) is ignored. If you want to index HTML files with the
** text within tags and all, define this to be 1 and not 0.
** NOTE: if you set it to 1 you will not be able to do context nor
** metaNames searches, as tags are just plain text with no specific
** meaning.
*/
// #define BLANK_PROP_VALUE " *BLANK*"
/* This effects how blank properties are stored
** Normally, blank properties are treated as if they were not even contained int
** the document. That is:
**
** is ignored, and no "author" property is stored for that docment.
** If BLANK_PROP_VALUE is set, then blank properties will be stored
** but using the string provided as the property value.
** If you use a leading space, then these properties will sort
** before other properties (since leading whitespace is removed from
** properties), and after documents that do not include the property
*/
#define RANK_TITLE 4
#define RANK_HEADER 3
#define RANK_META 3
#define RANK_COMMENTS 1
#define RANK_EMPHASIZED 0
/* This symbols affect the weights applied during ranking. Note that they are added
** together and added to a base rank of 1.0 -- thus defining a rank with a value of
** 2.0 really means it is ranked (1.0 + 2.0) times greater than normal.
** A value of 0.0 applies no additional ranking boost. Note that RANK_COMMENTS only
** applies if you are indexing comment. Be sure you understand how these interact
** in getrank; don't just go changing these values!
*/
#define SPIDERDIRECTORY "./"
#define SWAP_LOC_DEFAULT 0
/* 2001/08 jmruiz -- Default chunk size - Index will work with blocks of files. This number specifies when to coalesce locations to save memory */
#define INDEX_DEFAULT_CHUNK_SIZE 10000
/* 2001/08 jmruiz -- Default optimal zone size for temporal storage of locations */
/* 1<<23 is 8 MB */
#define INDEX_DEFAULT_OPTIMAL_CHUNK_ZONE_SIZE_FOR_LOCATIONS 1<<23
/* 2002/06 Number of swap loc files (-e) */
#define MAX_LOC_SWAP_FILES 377
/* 2001/08 jmruiz -- To avoid emalloc/erealloc in some routines some stack arrays have been added. This is their default size */
#define MAX_STACK_POSITIONS 1024
/* 2001/08 jmruiz -- Do not change this (it must be a unsigned number) */
/* This is the maximum size of a block of coalesced locations */
#define COALESCE_BUFFER_MAX_SIZE 1<<18 /* (256 KB) */
/* 2001/08 jmruiz -- File System sort flag - 0 means that filenames
** will not be indexed - 1 means that filenames will be indexed */
#define SORT_FILENAMES 0
/* 2001/10 jmruiz -- Added BTREE schema to store words */
//#define USE_BTREE
/* 09/00 Jose Ruiz. When set to 1 part of the info is swapped to disk
** to save memory in the index proccess
** Do not change it. You can activate this option through the command
** line (option -e)
*/
/* Set this to 1 if you are compiling under Win32
define _WIN32 1
*/
/* --- BEGIN PORTING-RELATED SYMBOLS --- */
#ifdef _WIN32
#define NO_SYMBOLIC_FILE_LINKS /* Win32 has no symbolic links */
#endif
#ifdef __VMS
#define NO_SYMBOLIC_FILE_LINKS /* VMS has no symbolic links */
#endif
#ifdef _WIN32
#undef INDEXPERMS /* Win32 version doesn't use chmod() */
#endif
#ifdef _WIN32
typedef int pid_t; /* process ID */
#endif
//#ifdef _WIN32
//#define TMPDIR "c:\\windows\\temp"
//#elif defined(__VMS)
//#define TMPDIR "sys$scratch:"
//#else
//#define TMPDIR "/var/tmp"
//#endif
/* Default Delimiter of phrase search */
#define PHRASE_DELIMITER_CHAR '"'
/*
* Binary files must be open with the "b" option under Win32, so all
* fopen() calls to index files have to go through these routines to
* keep the code portable.
* Note: text files should be opened normally, without the "b" option,
* otherwise end-of-line processing is not done correctly (on Win32).
*/
#define F_READ_BINARY "rb"
#define F_WRITE_BINARY "wb"
#define F_READWRITE_BINARY "rb+"
#define F_READ_TEXT "r"
#define F_WRITE_TEXT "w"
#define F_READWRITE_TEXT "r+"
/* #define NEXTSTEP */
/* You may need to define this if compiling on a NeXTstep machine.
*/
/* --- END PORTING-RELATED SYMBOLS --- */