1 |
adcroft |
1.1 |
#!/usr/local/bin/perl -w |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
|
5 |
|
|
=pod |
6 |
|
|
Test script for the SWISHE library |
7 |
|
|
|
8 |
|
|
please see perldoc README-PERL for more information |
9 |
|
|
$Id: test.pl,v 1.13 2002/08/22 22:58:38 whmoseley Exp $ |
10 |
|
|
=cut |
11 |
|
|
|
12 |
|
|
# Import symbols from the SWISHE.pm module |
13 |
|
|
use SWISHE; |
14 |
|
|
|
15 |
|
|
|
16 |
|
|
# In this test we will use the same index twice |
17 |
|
|
# The results will seem odd since normally you would use |
18 |
|
|
# two different index files, but it demonstrates |
19 |
|
|
# how to process two index files. |
20 |
|
|
|
21 |
|
|
my $indexfilename1 = '../tests/test.index'; |
22 |
|
|
my $indexfilename2 = $indexfilename1; |
23 |
|
|
|
24 |
|
|
die "Index file '$indexfilename1' not found! Did you run make test from the top level directory?\n" |
25 |
|
|
unless -e $indexfilename1; |
26 |
|
|
|
27 |
|
|
my $indexfiles = $indexfilename1; |
28 |
|
|
|
29 |
|
|
|
30 |
|
|
# To search for several indexes just put them together |
31 |
|
|
# Uncomment to test/demonstrate the use of two index files |
32 |
|
|
|
33 |
|
|
#my $indexfiles = "$indexfilename1 $indexfilename2"; |
34 |
|
|
|
35 |
|
|
|
36 |
|
|
# First, open the index files. |
37 |
|
|
# This reads in headers and prepares the index for searching |
38 |
|
|
# You can run more than one query once the index is opened. |
39 |
|
|
|
40 |
|
|
|
41 |
|
|
my $handle = SwishOpen( $indexfiles ) |
42 |
|
|
or die "Failed to SwishOpen '$indexfiles'"; |
43 |
|
|
|
44 |
|
|
# Get a few headers from the index files for display |
45 |
|
|
|
46 |
|
|
my @headers = ( qw/WordCharacters BeginCharacters EndCharacters IndexedOn FileCount FuzzyIndexingMode/ ); |
47 |
|
|
|
48 |
|
|
for my $header ( @headers ) { |
49 |
|
|
print_header("Header '$header'"); |
50 |
|
|
|
51 |
|
|
my @h = SwishHeaderParameter( $handle, $header ); |
52 |
|
|
|
53 |
|
|
print "$header for index $_ is $h[$_]\n" for 0..$#h; |
54 |
|
|
} |
55 |
|
|
|
56 |
|
|
|
57 |
|
|
# Now, let's run a few queries... |
58 |
|
|
|
59 |
|
|
# Define a few searches |
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
my @searches = ( |
64 |
|
|
{ |
65 |
|
|
title => 'Normal search', |
66 |
|
|
query => 'test', |
67 |
|
|
props => '', |
68 |
|
|
sort => '', |
69 |
|
|
context => 1, # Search the entire file |
70 |
|
|
}, |
71 |
|
|
{ |
72 |
|
|
title => 'MetaTag search 1', |
73 |
|
|
query => 'meta1=metatest1', |
74 |
|
|
props => 'meta1 meta2 meta3', |
75 |
|
|
sort => '', |
76 |
|
|
context => 1, # Search the entire file |
77 |
|
|
}, |
78 |
|
|
{ |
79 |
|
|
title => 'MetaTag search 2', |
80 |
|
|
query => 'meta2=metatest2', |
81 |
|
|
props => 'meta1 meta2 meta3', |
82 |
|
|
sort => '', |
83 |
|
|
context => 1, # Search the entire file |
84 |
|
|
}, |
85 |
|
|
{ |
86 |
|
|
title => 'XML Search', |
87 |
|
|
query => 'meta3=metatest3', |
88 |
|
|
props => 'meta1 meta2 meta3', |
89 |
|
|
sort => '', |
90 |
|
|
context => 1, # Search the entire file |
91 |
|
|
}, |
92 |
|
|
{ |
93 |
|
|
title => 'Phrase Search', |
94 |
|
|
query => '"three little pigs"', |
95 |
|
|
props => 'meta1 meta2 meta3', |
96 |
|
|
sort => '', |
97 |
|
|
context => 1, # Search the entire file |
98 |
|
|
}, |
99 |
|
|
{ |
100 |
|
|
title => 'Advanced search', |
101 |
|
|
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
102 |
|
|
props => 'meta1 meta2 meta3', |
103 |
|
|
sort => '', |
104 |
|
|
context => 1, # Search the entire file |
105 |
|
|
}, |
106 |
|
|
{ |
107 |
|
|
title => 'Advanced search', |
108 |
|
|
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
109 |
|
|
props => 'meta1 meta2 meta3', |
110 |
|
|
sort => '', |
111 |
|
|
context => 1, # Search the entire file |
112 |
|
|
}, |
113 |
|
|
{ |
114 |
|
|
title => 'Limit to title', |
115 |
|
|
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
116 |
|
|
props => 'meta1 meta2 meta3 swishrank swishdocpath swishlastmodified', |
117 |
|
|
sort => '', |
118 |
|
|
context => 1, # Search the entire file |
119 |
|
|
limit => [ 'swishtitle', '<=', 'If you are seeing this, the test' ], |
120 |
|
|
}, |
121 |
|
|
{ |
122 |
|
|
title => 'Limit to title - second test with same query', |
123 |
|
|
query => 'test or meta1=m* or meta2=m* or meta3=m*', |
124 |
|
|
props => 'meta1 meta2 meta3 swishrank swishdocpath swishlastmodified', |
125 |
|
|
sort => '', |
126 |
|
|
context => 1, # Search the entire file |
127 |
|
|
limit => [ 'swishtitle', '<=', 'If you are seeing this, the test' ], |
128 |
|
|
}, |
129 |
|
|
); |
130 |
|
|
|
131 |
|
|
# Need an array in perl to deliver the above hash contents to swish in |
132 |
|
|
# the correct order |
133 |
|
|
my @settings = qw/query context props sort/; |
134 |
|
|
|
135 |
|
|
|
136 |
|
|
|
137 |
|
|
print_header("*** Now searching ****"); |
138 |
|
|
print "Note that some META names have embedded newlines.\n"; |
139 |
|
|
|
140 |
|
|
|
141 |
|
|
# Use an array for a hash slice when reading results. See SwishNext below. |
142 |
|
|
my @labels = qw/ |
143 |
|
|
rank |
144 |
|
|
file_name |
145 |
|
|
title |
146 |
|
|
content_length |
147 |
|
|
/; |
148 |
|
|
|
149 |
|
|
|
150 |
|
|
for my $search ( @searches ) { |
151 |
|
|
print_header( "$search->{title} - Query: '$search->{query}'" ); |
152 |
|
|
|
153 |
|
|
|
154 |
|
|
# Since we *might* use SetLimitParameter, make sure it's reset first |
155 |
|
|
ClearLimitParameter( $handle ); |
156 |
|
|
|
157 |
|
|
if ( $search->{limit} ) { |
158 |
|
|
print "limiting to @{$search->{limit}}\n\n"; |
159 |
|
|
SetLimitParameter( $handle, ,@{$search->{limit}}); |
160 |
|
|
} |
161 |
|
|
|
162 |
|
|
# Here's the actual query |
163 |
|
|
|
164 |
|
|
my $num_results = SwishSearch( $handle, @{$search}{ @settings } ); |
165 |
|
|
|
166 |
|
|
print "# Number of results = $num_results\n\n"; |
167 |
|
|
|
168 |
|
|
if ( $num_results <= 0 ) { |
169 |
|
|
print ($num_results ? SwishErrorString( $num_results ) : 'No Results'); |
170 |
|
|
|
171 |
|
|
my $error = SwishError( $handle ); |
172 |
|
|
print "\nError number: $error\n" if $error; |
173 |
|
|
|
174 |
|
|
next; |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
my %result; |
178 |
|
|
my @properties = split /\s+/, $search->{props}; |
179 |
|
|
my %props; |
180 |
|
|
|
181 |
|
|
while ( ( @result{ @labels }, @props{@properties} ) = SwishNext( $handle )) { |
182 |
|
|
|
183 |
|
|
for ( @labels ) { |
184 |
|
|
printf(" %20s -> '%s'\n", $_ ,$result{$_}); |
185 |
|
|
} |
186 |
|
|
for ( @properties ) { |
187 |
|
|
printf(" %20s:(%-20s) -> '%s'\n", 'Property', $_, $props{$_} || '<blank>' ); |
188 |
|
|
} |
189 |
|
|
print "-----------\n"; |
190 |
|
|
} |
191 |
|
|
} |
192 |
|
|
|
193 |
|
|
print_header('Other Functions'); |
194 |
|
|
|
195 |
|
|
|
196 |
|
|
# Now, demonstrate the use of SwishStem to find the stem of a word. |
197 |
|
|
|
198 |
|
|
my @stemwords = qw/parking libaries library librarians money monies running runs is/; |
199 |
|
|
print "\nStemming:\n"; |
200 |
|
|
print " '$_' => '" . ( SwishStem( $_ ) || 'returned undefined: Word not stemmed for some reason' ) . "'\n" for @stemwords; |
201 |
|
|
print "\n"; |
202 |
|
|
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
# Grab the stop words from the header |
206 |
|
|
|
207 |
|
|
my @stopwords = SwishStopWords( $handle, $indexfilename1 ); |
208 |
|
|
print 'Stopwords: ', |
209 |
|
|
( @stopwords ? join(', ', @stopwords) : '** None **' ), |
210 |
|
|
"\n\n"; |
211 |
|
|
|
212 |
|
|
|
213 |
|
|
# Let's see what words in the index begin with the letter "t". |
214 |
|
|
|
215 |
|
|
my $letter = 't'; |
216 |
|
|
my @keywords = SwishWords( $handle, $indexfilename1, $letter); |
217 |
|
|
|
218 |
|
|
print "List of keywords that start with the letter '$letter':\n", |
219 |
|
|
join("\n", @keywords), |
220 |
|
|
"\n\n"; |
221 |
|
|
|
222 |
|
|
|
223 |
|
|
|
224 |
|
|
# Free the memory. |
225 |
|
|
|
226 |
|
|
SwishClose( $handle ); |
227 |
|
|
|
228 |
|
|
# If swish was built with memory debugging this will dump extra info |
229 |
|
|
SWISHE::MemSummary(); |
230 |
|
|
|
231 |
|
|
|
232 |
|
|
sub print_header { |
233 |
|
|
print "\n", '-' x length( $_[0] ),"\n", |
234 |
|
|
$_[0], |
235 |
|
|
"\n", '-' x length( $_[0] ),"\n"; |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
|