/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/pod/SWISH-PERL.pod
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/pod/SWISH-PERL.pod

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
Initial revision

1 adcroft 1.1 =head1 NAME
2    
3     SWISH-PERL - Perl Interface to the Swish-e Library
4    
5     =head1 SYNOPSIS
6    
7     use SWISHE;
8    
9     my $indexfilename1 = '/path/to/index1.swish';
10     my $indexfilename2 = '/path/to/index2.swish';
11    
12     # To search for several indexes just put them together
13     $indexfiles = "$indexfilename1 $indexfilename2";
14    
15     my $handle = SwishOpen( $indexfiles )
16     or die "Failed to open '$indexfiles'";
17    
18     # Get a few headers from the index files
19     my @headers = qw/WordCharacters BeginCharacters EndCharacters/;
20     for ( @headers ) {
21     my @h = SwishHeaderParameter( $handle, $_ );
22     print "$_ for index 0 is $h[0]\n",
23     "$_ for index 1 is $h[1]\n\n";
24     }
25    
26    
27     # Now search
28     @standard = ('Rank', 'File Name', 'Title', 'Document Size');
29     @props = qw/prop1 prop2 prop3/;
30    
31     $props = join ' ', @props;
32     $sort = 'prop1 asc prop2 desc';
33     $query = 'meta1=metatest1';
34    
35     my $num_results = SwishSearch($handle, $query, 1, $props, $sort);
36    
37     unless ( $num_results ) {
38     print "No Results\n";
39    
40     my $error = SwishError( $handle );
41     print "Error number: $error\n" if $error;
42    
43     return; # or next.
44     }
45    
46     my %results; # place to store the return values by name
47    
48     while( @results{ @standard, @props } = SwishNext( $handle )) {
49     print "\n";
50     printf("%20s -> '%s'\n", $_, $results{$_}) for @standard, @props;
51     }
52    
53     # No more queries on these indexes
54     SwishClose( $handle );
55    
56     =head1 ABSTRACT
57    
58     SWISHE version 2.1.x creates an archive library of the internal SWISHE
59     C functions. This perl module provides access to those functions by
60     embedding the SWISHE search code in your application. The benefits are
61     faster searches (no need to fork/execute an external program) and avoids
62     commonly used unsafe system calls.
63    
64     This module provides direct access to the SWISHE C library functions.
65     For a higher level, object oriented interface to SWISH visit
66     http://search.cpan.org/search?mode=module&query=SWISH
67    
68    
69     =head1 INSTALLATION
70    
71     Before you can build the perl module you must build and install Swish-e.
72     Please read the B<INSTALL> documentation included in the SWISHE
73     distribution package.
74    
75     perldoc INSTALL
76    
77     After building the SWISHE executable and successfully running make test,
78     you will need to install the SWISHE archive library. This is done while
79     in the top-level directory of the SWISHE distribution.
80    
81     % su root
82     % make install-lib
83     % exit
84    
85     This will install the archive library (F<libswish-e.a>) into
86     /usr/local/lib by default.
87    
88     Next, build the perl module.
89    
90     % cd perl
91     % perl Makefile.PL
92     % make
93     % make test
94     % su root
95     % make install
96     % exit
97    
98     If you do not have root access you can instead use
99    
100     % perl Makefile.PL PREFIX=/path/to/my/local/perl/library
101    
102     And then in your perl script:
103    
104     use lib '/path/to/my/local/perl/library';
105    
106    
107     To test it you can run the test.pl script. Type "./test.pl" at your
108     command prompt. This perl script uses the index file built by the
109     "make test" command used during the build of SWISHE as described in the
110     B<INSTALL> document.
111    
112     B<NOTE>Currently Swish-e will exit the running program on some fatal errors.
113     In general this should not happen, but it is something to think about if
114     running under mod_perl as an error will kill off the web server process.
115     Apache, for example, should recover by starting up a new child process.
116     But, it's not a very graceful way to handle errors.
117    
118     =head1 FUNCTION DESCRIPTIONS
119    
120     The following describes the perl interface to the SWISHE C library.
121    
122     =over 4
123    
124     =item B<$handle = SwishOpen( $IndexFiles );>
125    
126     Open one or more index files and returns a handle.
127    
128     Examples:
129    
130     $handle = SwishOpen( 'index_file.idx' );
131    
132     # open two indexes
133     $handle = SwishOpen( 'index1.idx index2.idx' );
134    
135     Returns undefined on an error, but the only errors are typically fatal, so
136     will most likely exit the running program.
137    
138    
139     =item B<SwishClose( $handle );>
140    
141     Closes the handle returned by SwishOpen.
142     Closes all the opened files and frees the used memory.
143    
144     =item B<$num_results = SwishSearch($handle, $search, $structure, $properties, $sortspec);>
145    
146     Returns the number of hits, zero for no results, or a negative number.
147     If zero SwishError( $handle ) will return the error code which can be passed
148     to SwishErrorString() to fetch an error string.
149    
150     The values passed are:
151    
152     =over 2
153    
154     =item *
155    
156     $handle is the handle returned by SwishOpen
157    
158     =item *
159    
160     $search is the search string.
161    
162     Examples:
163     my $query = 'title="this a is phrase"';
164     my $query = '(title="this phrase") or (description=(these words))';
165    
166     =item *
167    
168     $structure is an integer value only applicable for an html search.
169     It defines where in an html search to look. It can be IN_FILE, IN_TITLE,
170     IN_HEAD, IN_BODY, IN_COMMENTS, IN_HEADER or IN_EMPHASIZED or or'ed
171     combinations of them (e.g.: IN_HEAD | IN_BODY). Use IN_FILE (a value
172     of 1) if your documents are not html. The numerical values for these
173     constants are in src/swish.h
174    
175     You can define these in your code with:
176    
177     # Set bits
178     use constant IN_FILE => 1;
179     use constant IN_TITLE => 2;
180     use constant IN_HEAD => 4;
181    
182     Not many people use the structure feature.
183    
184     =item *
185    
186     $properties is a string with the properties to be returned
187     separated by spaces. Properties must be defined during indexing.
188     See L<SWISH-CONFIG|SWISH-CONFIG> for more information.
189    
190     Example:
191    
192     my $properties = 'subject description';
193    
194     You may also use the swish internal properties:
195    
196     my $properties = 'subject description swishrank swishlastmodified';
197    
198    
199     =item *
200    
201     $sortspec is the sort spec if different from relevance.
202    
203     Examples:
204     my $sortspec = '' # sort by relevance
205    
206     # sort first in ascending order by title,
207     # then by other fields in descending order
208     my $sortspec = 'title asc category desc category desc';
209    
210     =back
211    
212     =item B<SwishNext( $handle )>
213    
214     ($rank, $filename, $title, $size, @properties) = SwishNext( $handle );
215    
216     This function returns the next hit of a search. Must be executed after
217     SwishSearch to read the results.
218    
219     =over 2
220    
221     =item *
222    
223     $rank - An integer from 1 to 1000 indicating the relevance of the result
224    
225     =item *
226    
227     $filename - The source filename
228    
229     =item *
230    
231     $title - The title as indexed (as found in the HTML E<lt>TITLEE<gt> section)
232    
233     =item *
234    
235     $size - The length of the source document
236    
237     =item *
238    
239     @properties - The list of properties returned for this result.
240    
241     =back
242    
243     See the SYNOPSIS above for an example.
244    
245    
246     =item B<$rc=SwishSeek($handle, $num);>
247    
248     Repositions the pointer in the result list to the element pointed by num.
249     It is useful when you want to read only the results starting at $num
250     (e.g. for showing results one page at a time).
251    
252     =item B<$error_number=SwishError($handle);>
253    
254     Returns the last error if any (always a negative value).
255     If there is not an error it will return 0.
256    
257     =item B<$error_string=SwishErrorString( $error_number );>
258    
259     Returns the error string for the number supplied.
260    
261     print 'Error: ', SwishErrorString( SwishError($handle) ), "\n";
262    
263     =item B<@ParameterArray=SwishHeaderParameter($handle,$HeaderParameterName);>
264    
265     This function is useful to access the header data of the index files
266     Returns the contents of the requested header parameter of all index
267     files opened by SwishOpen in an array.
268    
269     Example:
270    
271     @wordchars = SwishHeaderParameter( $handle, 'WordCharacters' );
272     print "WordCharacters for index 0 = $wordchars[0]\n";
273     print "WordCharacters for index 1 = $wordchars[1]\n";
274    
275    
276     Valid values for HeaderParameterName are:
277    
278     WordCharacters
279     BeginCharacters
280     EndCharacters
281     IgnoreFirstChar
282     IgnoreLastChar
283     Indexed on
284     Description
285     IndexPointer
286     IndexAdmin
287     Stemming
288     Soundex
289    
290     Note that this list may be incomplete. Check the source code or the
291     swish-e discussion list for more info.
292    
293     =item B<@stopwords = SwishStopWords( $handle, $indexfilename );>
294    
295     Returns an array containing all the stopwords stored in the index file
296     pointed by $indexfilename where $indexfilename must match one of the
297     names used in SwishOpen.
298    
299     Example:
300     @stopwords = SwishStopWords( $handle, $indexfilename );
301     print 'Stopwords: ',
302     join(', ', @stopwords),
303     "\n";
304    
305     =item B<@keywords = SwishWords( $handle, $indexfilename, $c);>
306    
307     Returns an array containing all the keywords stored in the index file
308     pointed by $indexfilename ($indexfilename must match one of the names
309     used in SwishOpen) and starting with the character $c.
310    
311     Example:
312     my $letter = 't';
313     @keywords = SwishWords( $handle, $indexfilename, $letter);
314    
315     print "List of keywords that start with the letter '$letter':\n",
316     join("\n", @keywords),
317     "\n";
318    
319     =item B<$stemword=SwishStem( $word );>
320    
321     Returns the stemmed word preserving the original one.
322    
323     Example:
324     my $stemword = SwishStem( 'parking' );
325     print $stem_word; # prints park
326    
327     =back
328    
329     =head1 SUPPORT
330    
331     Questions about this module and Swish-e should be posted to the Swish-e
332     mailing list. See http://swish-e.org
333    
334    
335     =head1 AUTHOR
336    
337     Jose Ruiz -- jmruiz@boe.es (Documentation by Bill Moseley)
338    
339    
340     =head1 SEE ALSO
341    
342     http://swish-e.org
343    
344     SWISH, SWISH::Library at your local CPAN site.
345    
346    
347     =head1 Document Info
348    
349     $Id: SWISH-PERL.pod,v 1.4 2002/04/15 02:34:43 whmoseley Exp $
350    
351     .

  ViewVC Help
Powered by ViewVC 1.1.22