/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/perl/SWISHE.pm
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/perl/SWISHE.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 adcroft 1.1 package SWISHE;
2    
3     require Exporter;
4     require DynaLoader;
5    
6     @ISA = qw(Exporter DynaLoader);
7    
8     # Probably shouldn't export everything
9     @EXPORT = qw(
10     SwishOpen
11     SwishSearch
12     SwishClose
13     SwishNext
14     SwishSeek
15     SwishError
16     SwishHeaderParameter
17     SwishStopWords
18     SwishWords
19     SwishStem
20     SwishErrorString
21     SwishHeaders
22     SetLimitParameter
23     ClearLimitParameter
24     );
25    
26     $VERSION = '0.02';
27    
28     bootstrap SWISHE $VERSION;
29    
30     # Preloaded methods go here.
31    
32     # Autoload methods go after __END__, and are processed by the autosplit program.
33    
34     1;
35     __END__
36    
37     =head1 NAME
38    
39     SWISH-PERL - Perl Interface to the SWISH-E Library
40    
41     =head1 SYNOPSIS
42    
43     use SWISHE;
44    
45     my $indexfilename1 = '/path/to/index1.swish';
46     my $indexfilename2 = '/path/to/index2.swish';
47    
48     # To search for several indexes just put them together
49     $indexfiles = "$indexfilename1 $indexfilename2";
50    
51     my $handle = SwishOpen( $indexfiles )
52     or die "Failed to open '$indexfiles'";
53    
54     # Get a few headers from the index files
55     my @headers = qw/WordCharacters BeginCharacters EndCharacters/;
56     for ( @headers ) {
57     my @h = SwishHeaderParameter( $handle, $_ );
58     print "$_ for index 0 is $h[0]\n",
59     "$_ for index 1 is $h[1]\n\n";
60     }
61    
62    
63     # Now search
64     @standard = ('Rank', 'File Name', 'Title', 'Document Size');
65     @props = qw/prop1 prop2 prop3/;
66    
67     $props = join ' ', @props;
68     $sort = 'prop1 asc prop2 desc';
69     $query = 'meta1=metatest1';
70    
71     my $num_results = SwishSearch($handle, $query, 1, $props, $sort);
72    
73     if ( $num_results <= 0 ) {
74     print ($num_results ? SwishErrorString( $handle ) : 'No Results');
75    
76     my $error = SwishError( $handle );
77     print "\nError number: $error\n" if $error;
78    
79     return; # or next.
80     }
81    
82     my %results; # place to store the return values by name
83    
84     while( @results{ @standard, @props } = SwishNext( $handle )) {
85     print "\n";
86     printf("%20s -> '%s'\n", $_, $results{$_}) for @standard, @props;
87     }
88    
89     # No more queries on these indexes
90     SwishClose( $handle );
91    
92    
93     =head1 ABSTRACT
94    
95     Swish-e version 2.1.x creates an archive library of the internal Swish-e C functions.
96     This perl module provides access to those functions by embedding the Swish-e code in
97     your application. The benefits are faster searches (no need to fork/execute an external program)
98     and avoids commonly used unsafe system calls.
99    
100     This module provides direct access to the Swish-e C library functions. For a higher level, object
101     oriented interface to SWISH visit http://search.cpan.org/search?mode=module&query=SWISH
102    
103     =head2 Warnings and Gotchas
104    
105     The Swish-e library was created from the executable program (instead of the other way around).
106     The Swish-e executable program handles errors by printing them to STDOUT and then aborting.
107     Not exactly desired behavior from a library.
108     This means you should carefully screen your input data before calling the library.
109    
110     Another minor issue is that the Swish-e library currently includes all code -- both searching and
111     indexing code. Your program will be larger than needed due to this, but hopefully your OS will
112     be smart and share this code across processes and the impact will be minimal.
113    
114     The library is often the last code to get checked during Swish-e development. Test
115     carefully, and watch for memory leaks if running in a persistent environment.
116    
117    
118     =head1 INSTALLATION
119    
120     Before you can build the perl module you must build and install SWISH-E. Please read the
121     B<INSTALL> documentation included in the SWISHE distribution package.
122    
123     perldoc INSTALL
124    
125     After building the SWISHE executable and successfully running make test, you will need to install
126     the SWISHE archive library. This is done while in the top-level directory of the SWISHE distribution.
127    
128     % su root
129     % make install-lib
130     % exit
131    
132     This will install the archive library (F<libswish-e.a>) into /usr/local/lib by default.
133    
134     Next, build the perl module.
135    
136     % cd perl
137     % perl Makefile.PL
138     % make
139     % make test
140     % su root
141     % make install
142     % exit
143    
144     If you do not have root access you can instead use
145    
146     % perl Makefile.PL PREFIX=/path/to/my/local/perl/library
147    
148     And then in your perl script:
149    
150     use lib '/path/to/my/local/perl/library';
151    
152    
153     To test it you can run the test.pl script. Type "./test.pl" at your command prompt.
154     This perl script uses the index file built by the "make test" command used during the build
155     of SWISHE as described in the B<INSTALL> document.
156    
157     B<NOTE> Currently swish will exit the running program on some fatal errors. In general
158     this should not happen, but it is something to think about if running under mod_perl
159     as an error will kill off the web server process. Apache, for example, should recover
160     by starting up a new child process. But, it's not a very graceful way to handle errors.
161    
162     =head1 FUNCTION DESCRIPTIONS
163    
164     The following describes the perl interface to the SWISHE C library.
165    
166     =over 4
167    
168     =item B<$handle = SwishOpen( $IndexFiles );>
169    
170     Open one or more index files and returns a handle.
171    
172     Examples:
173    
174     $handle = SwishOpen( 'index_file.idx' );
175    
176     # open two indexes
177     $handle = SwishOpen( 'index1.idx index2.idx' );
178    
179     Returns undefined on an error, but the only errors are typically fatal, so
180     will most likely exit the running program.
181    
182     If running under a persistent framework (such as mod_perl) you may run many queries
183     against the $handle. This results in much faster searches. For example, on searching
184     an index with 24,000 files and returning (the same) 400 results, opening and closing the index
185     for each search resulted in about 15 queries per second. Leaving the index open resulted
186     in about 150 queries per second.
187    
188    
189     =item B<SwishClose( $handle );>
190    
191     Closes the handle returned by SwishOpen.
192     Closes all the opened files and frees the used memory.
193    
194     =item B<$num_results = SwishSearch($handle, $search, $structure, $properties, $sortspec);>
195    
196     Returns the number of hits, zero for no results, or a negative number.
197     If negative SwishErrorString( $handle ) will return the error message.
198    
199     The values passed are:
200    
201     =over 2
202    
203     =item *
204    
205     $handle is the handle returned by SwishOpen
206    
207     =item *
208    
209     $search is the search string.
210    
211     Examples:
212     my $query = 'title="this a is phrase"';
213     my $query = '(title="this phrase") or (description=(these words))';
214    
215     =item *
216    
217     $structure is an integer value only applicable for an html search. It defines
218     where in an html search to look.
219     It can be IN_FILE, IN_TITLE, IN_HEAD, IN_BODY, IN_COMMENTS, IN_HEADER or IN_EMPHASIZED or
220     or'ed combinations of them (e.g.: IN_HEAD | IN_BODY).
221     Use IN_FILE (a value of 1) if your documents are not html.
222     The numerical values for these constants are in src/swish.h
223    
224     You can define these in your code with:
225    
226     # Set bits
227     use constant IN_FILE => 1;
228     use constant IN_TITLE => 2;
229     use constant IN_HEAD => 4;
230    
231     Not many people use the structure feature.
232    
233     =item *
234    
235     $properties is a string with the properties to be returned separated by spaces. Properties must
236     be defined during indexing. See B<README-SWISHE> for more information.
237    
238     Example:
239    
240     my $properties = 'subject description';
241    
242     You may also use the swish internal properties:
243    
244     my $properties = 'subject description swishrank swishlastmodified';
245    
246    
247     =item *
248    
249     $sortspec is the sort spec if different from relevance.
250    
251     Examples:
252     my $sortspec = '' # sort by relevance
253    
254     # sort first in ascending order by title,
255     # then by other fields in descending order
256     my $sortspec = 'title asc category desc category desc';
257    
258     =back
259    
260     =item B<SetLimitParameter( $handle, $property, $low, $high )>
261    
262     This experimental feature allows limiting results to a range of values
263     for a given property. For example, to limit Titles:
264    
265     SetLimitParameter( $handle, 'swishtitle', 'a', 'zzzzzzzzzz' );
266    
267     Limits titles to the range specified.
268    
269     Note, if you do not call SwishOpen() and SwishClose() for every search
270     you must clear the limit selection with C<ClearLimitParameter>.
271    
272    
273     =item B<ClearLimitParameter( $handle )>
274    
275     This function must be called before calling SetLimitParameter() again while
276     making repeated queries on an open index.
277    
278     Once a limit is set on an open index, that limit stays in effect. If you want to
279     change the limit, or just remove the limit you must call ClearLimitParameter().
280     For example:
281    
282     $handle = SwishOpen( $index );
283     while ( $query = next_query() ) {
284     ClearLimitParameter( $handle );
285    
286     my @limits = $query->limits;
287     SetLimitParameter( $handle, @limits} ) if @limits;
288    
289     SwishSearch($handle, $query->search, 1 );
290    
291     ...
292     }
293    
294     SwishClose( $handle );
295    
296     There's no harm in calling ClearLimitParameter() if a limit is not going to be used.
297    
298     This behavior may change in the future.
299    
300    
301    
302     =item B<SwishNext( $handle )>
303    
304     ($rank, $filename, $title, $size, @properties) = SwishNext( $handle );
305    
306     This function returns the next hit of a search. Must be executed after SwishSearch to read the results.
307    
308     =over 2
309    
310     =item *
311    
312     $rank - An integer from 1 to 1000 indicating the relevance of the result
313    
314     =item *
315    
316     $filename - The source filename
317    
318     =item *
319    
320     $title - The title as indexed (as found in the HTML E<lt>TITLEE<gt> section)
321    
322     =item *
323    
324     $size - The length of the source document
325    
326     =item *
327    
328     @properties - The list of properties returned for this result.
329    
330     =back
331    
332     See the SYNOPSIS above for an example.
333    
334    
335     =item B<$rc=SwishSeek($handle, $num);>
336    
337     Repositions the pointer in the result list to the element pointed by num.
338     It is useful when you want to read only the results starting at $num (e.g. for showing
339     results one page at a time).
340    
341     =item B<$error_number=SwishError($handle);>
342    
343     Returns the last error if any (always a negative value).
344     If there is not an error it will return 0.
345    
346     =item B<$error_string=SwishErrorString( $handle );>
347    
348     Returns the error string for the number supplied.
349    
350     print 'Error: ', SwishErrorString( $handle ), "\n";
351    
352     =item B<@ParameterArray=SwishHeaderParameter($handle,$HeaderParameterName);>
353    
354     This function is useful to access the header data of the index files
355     Returns the contents of the requested header parameter of all index files
356     opened by SwishOpen in an array.
357    
358     Example:
359    
360     @wordchars = SwishHeaderParameter( $handle, 'WordCharacters' );
361     print "WordCharacters for index 0 = $wordchars[0]\n";
362     print "WordCharacters for index 1 = $wordchars[1]\n";
363    
364    
365     Valid values for HeaderParameterName are:
366    
367     WordCharacters
368     BeginCharacters
369     EndCharacters
370     IgnoreFirstChar
371     IgnoreLastChar
372     Indexed on
373     Description
374     IndexPointer
375     IndexAdmin
376     Stemming
377     Soundex
378    
379     Note that this list may be incomplete. Check the source code or the swish-e
380     discussion list for more info.
381    
382     =item B<@stopwords = SwishStopWords( $handle, $indexfilename );>
383    
384     Returns an array containing all the stopwords stored in the index file pointed by $indexfilename
385     where $indexfilename must match one of the names used in SwishOpen.
386    
387     Example:
388     @stopwords = SwishStopWords( $handle, $indexfilename );
389     print 'Stopwords: ',
390     join(', ', @stopwords),
391     "\n";
392    
393     =item B<@keywords = SwishWords( $handle, $indexfilename, $c);>
394    
395     Returns an array containing all the keywords stored in the index file pointed by
396     $indexfilename ($indexfilename must match one of the names used in SwishOpen)
397     and starting with the character $c.
398    
399     Example:
400     my $letter = 't';
401     @keywords = SwishWords( $handle, $indexfilename, $letter);
402    
403     print "List of keywords that start with the letter '$letter':\n",
404     join("\n", @keywords),
405     "\n";
406    
407     =item B<$stemword=SwishStem( $word );>
408    
409     Returns the stemmed word preserving the original one.
410    
411     Example:
412     my $stemword = SwishStem( 'parking' );
413     print $stem_word; # prints park
414    
415     =back
416    
417     =head1 SUPPORT
418    
419     Questions about this module and SWISHE should be posted to the SWISHE mailing list.
420     See http://swish-e.org
421    
422    
423     =head1 AUTHOR
424    
425     Jose Ruiz -- jmruiz@boe.es (Documentation by Bill Moseley)
426    
427    
428     =head1 SEE ALSO
429    
430     http://swish-e.org
431    
432     SWISH, SWISH::Library at your local CPAN site.
433    
434    
435     =head1 Document Info
436    
437     $Id: SWISHE.pm,v 1.9 2002/08/22 22:58:38 whmoseley Exp $
438    
439    
440    
441    

  ViewVC Help
Powered by ViewVC 1.1.22