1 |
#!/usr/local/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
|
5 |
=pod |
6 |
Test script for the SWISHE library |
7 |
|
8 |
please see perldoc README-PERL for more information |
9 |
$Id: test.pl,v 1.13 2002/08/22 22:58:38 whmoseley Exp $ |
10 |
=cut |
11 |
|
12 |
# Import symbols from the SWISHE.pm module |
13 |
use SWISHE; |
14 |
|
15 |
|
16 |
# In this test we will use the same index twice |
17 |
# The results will seem odd since normally you would use |
18 |
# two different index files, but it demonstrates |
19 |
# how to process two index files. |
20 |
|
21 |
my $indexfilename1 = '../tests/test.index'; |
22 |
my $indexfilename2 = $indexfilename1; |
23 |
|
24 |
die "Index file '$indexfilename1' not found! Did you run make test from the top level directory?\n" |
25 |
unless -e $indexfilename1; |
26 |
|
27 |
my $indexfiles = $indexfilename1; |
28 |
|
29 |
|
30 |
# To search for several indexes just put them together |
31 |
# Uncomment to test/demonstrate the use of two index files |
32 |
|
33 |
#my $indexfiles = "$indexfilename1 $indexfilename2"; |
34 |
|
35 |
|
36 |
# First, open the index files. |
37 |
# This reads in headers and prepares the index for searching |
38 |
# You can run more than one query once the index is opened. |
39 |
|
40 |
|
41 |
my $handle = SwishOpen( $indexfiles ) |
42 |
or die "Failed to SwishOpen '$indexfiles'"; |
43 |
|
44 |
# Get a few headers from the index files for display |
45 |
|
46 |
my @headers = ( qw/WordCharacters BeginCharacters EndCharacters IndexedOn FileCount FuzzyIndexingMode/ ); |
47 |
|
48 |
for my $header ( @headers ) { |
49 |
print_header("Header '$header'"); |
50 |
|
51 |
my @h = SwishHeaderParameter( $handle, $header ); |
52 |
|
53 |
print "$header for index $_ is $h[$_]\n" for 0..$#h; |
54 |
} |
55 |
|
56 |
|
57 |
# Now, let's run a few queries... |
58 |
|
59 |
# Define a few searches |
60 |
|
61 |
|
62 |
|
63 |
my @searches = ( |
64 |
{ |
65 |
title => 'Normal search', |
66 |
query => 'test', |
67 |
props => '', |
68 |
sort => '', |
69 |
context => 1, # Search the entire file |
70 |
}, |
71 |
{ |
72 |
title => 'MetaTag search 1', |
73 |
query => 'meta1=metatest1', |
74 |
props => 'meta1 meta2 meta3', |
75 |
sort => '', |
76 |
context => 1, # Search the entire file |
77 |
}, |
78 |
{ |
79 |
title => 'MetaTag search 2', |
80 |
query => 'meta2=metatest2', |
81 |
props => 'meta1 meta2 meta3', |
82 |
sort => '', |
83 |
context => 1, # Search the entire file |
84 |
}, |
85 |
{ |
86 |
title => 'XML Search', |
87 |
query => 'meta3=metatest3', |
88 |
props => 'meta1 meta2 meta3', |
89 |
sort => '', |
90 |
context => 1, # Search the entire file |
91 |
}, |
92 |
{ |
93 |
title => 'Phrase Search', |
94 |
query => '"three little pigs"', |
95 |
props => 'meta1 meta2 meta3', |
96 |
sort => '', |
97 |
context => 1, # Search the entire file |
98 |
}, |
99 |
{ |
100 |
title => 'Advanced search', |
101 |
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
102 |
props => 'meta1 meta2 meta3', |
103 |
sort => '', |
104 |
context => 1, # Search the entire file |
105 |
}, |
106 |
{ |
107 |
title => 'Advanced search', |
108 |
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
109 |
props => 'meta1 meta2 meta3', |
110 |
sort => '', |
111 |
context => 1, # Search the entire file |
112 |
}, |
113 |
{ |
114 |
title => 'Limit to title', |
115 |
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
116 |
props => 'meta1 meta2 meta3 swishrank swishdocpath swishlastmodified', |
117 |
sort => '', |
118 |
context => 1, # Search the entire file |
119 |
limit => [ 'swishtitle', '<=', 'If you are seeing this, the test' ], |
120 |
}, |
121 |
{ |
122 |
title => 'Limit to title - second test with same query', |
123 |
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
124 |
props => 'meta1 meta2 meta3 swishrank swishdocpath swishlastmodified', |
125 |
sort => '', |
126 |
context => 1, # Search the entire file |
127 |
limit => [ 'swishtitle', '<=', 'If you are seeing this, the test' ], |
128 |
}, |
129 |
); |
130 |
|
131 |
# Need an array in perl to deliver the above hash contents to swish in |
132 |
# the correct order |
133 |
my @settings = qw/query context props sort/; |
134 |
|
135 |
|
136 |
|
137 |
print_header("*** Now searching ****"); |
138 |
print "Note that some META names have embedded newlines.\n"; |
139 |
|
140 |
|
141 |
# Use an array for a hash slice when reading results. See SwishNext below. |
142 |
my @labels = qw/ |
143 |
rank |
144 |
file_name |
145 |
title |
146 |
content_length |
147 |
/; |
148 |
|
149 |
|
150 |
for my $search ( @searches ) { |
151 |
print_header( "$search->{title} - Query: '$search->{query}'" ); |
152 |
|
153 |
|
154 |
# Since we *might* use SetLimitParameter, make sure it's reset first |
155 |
ClearLimitParameter( $handle ); |
156 |
|
157 |
if ( $search->{limit} ) { |
158 |
print "limiting to @{$search->{limit}}\n\n"; |
159 |
SetLimitParameter( $handle, ,@{$search->{limit}}); |
160 |
} |
161 |
|
162 |
# Here's the actual query |
163 |
|
164 |
my $num_results = SwishSearch( $handle, @{$search}{ @settings } ); |
165 |
|
166 |
print "# Number of results = $num_results\n\n"; |
167 |
|
168 |
if ( $num_results <= 0 ) { |
169 |
print ($num_results ? SwishErrorString( $num_results ) : 'No Results'); |
170 |
|
171 |
my $error = SwishError( $handle ); |
172 |
print "\nError number: $error\n" if $error; |
173 |
|
174 |
next; |
175 |
} |
176 |
|
177 |
my %result; |
178 |
my @properties = split /\s+/, $search->{props}; |
179 |
my %props; |
180 |
|
181 |
while ( ( @result{ @labels }, @props{@properties} ) = SwishNext( $handle )) { |
182 |
|
183 |
for ( @labels ) { |
184 |
printf(" %20s -> '%s'\n", $_ ,$result{$_}); |
185 |
} |
186 |
for ( @properties ) { |
187 |
printf(" %20s:(%-20s) -> '%s'\n", 'Property', $_, $props{$_} || '<blank>' ); |
188 |
} |
189 |
print "-----------\n"; |
190 |
} |
191 |
} |
192 |
|
193 |
print_header('Other Functions'); |
194 |
|
195 |
|
196 |
# Now, demonstrate the use of SwishStem to find the stem of a word. |
197 |
|
198 |
my @stemwords = qw/parking libaries library librarians money monies running runs is/; |
199 |
print "\nStemming:\n"; |
200 |
print " '$_' => '" . ( SwishStem( $_ ) || 'returned undefined: Word not stemmed for some reason' ) . "'\n" for @stemwords; |
201 |
print "\n"; |
202 |
|
203 |
|
204 |
|
205 |
# Grab the stop words from the header |
206 |
|
207 |
my @stopwords = SwishStopWords( $handle, $indexfilename1 ); |
208 |
print 'Stopwords: ', |
209 |
( @stopwords ? join(', ', @stopwords) : '** None **' ), |
210 |
"\n\n"; |
211 |
|
212 |
|
213 |
# Let's see what words in the index begin with the letter "t". |
214 |
|
215 |
my $letter = 't'; |
216 |
my @keywords = SwishWords( $handle, $indexfilename1, $letter); |
217 |
|
218 |
print "List of keywords that start with the letter '$letter':\n", |
219 |
join("\n", @keywords), |
220 |
"\n\n"; |
221 |
|
222 |
|
223 |
|
224 |
# Free the memory. |
225 |
|
226 |
SwishClose( $handle ); |
227 |
|
228 |
# If swish was built with memory debugging this will dump extra info |
229 |
SWISHE::MemSummary(); |
230 |
|
231 |
|
232 |
sub print_header { |
233 |
print "\n", '-' x length( $_[0] ),"\n", |
234 |
$_[0], |
235 |
"\n", '-' x length( $_[0] ),"\n"; |
236 |
} |
237 |
|
238 |
|