/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/example/modules/DefaultHighlight.pm
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/example/modules/DefaultHighlight.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 #=======================================================================
2 # Default Highlighting Code
3 #
4 # $Id: DefaultHighlight.pm,v 1.1 2001/12/06 07:32:11 whmoseley Exp $
5 #=======================================================================
6 package DefaultHighlight;
7 use strict;
8
9 sub new {
10 my ( $class, $results, $metaname ) = @_;
11
12
13 my $self = bless {
14 results => $results, # just in case we need a method
15 settings=> $results->config('highlight'),
16 metaname=> $metaname,
17 }, $class;
18
19 # parse out the query into words
20 my $query = $results->extract_query_match;
21
22
23 # Do words exist for this layer (all text at this time) and metaname?
24 # This is a reference to an array of phrases and words
25
26 $self->{description_prop} = $results->config('description_prop') || '';
27
28 if ( $results->header('stemming applied') =~ /^(?:1|yes)$/i ) {
29 eval { require SWISH::Stemmer };
30 if ( $@ ) {
31 $results->errstr('Stemmed index needs Stemmer.pm to highlight: ' . $@);
32 } else {
33 $self->{stemmer_function} = \&SWISH::Stemmer::SwishStem;
34 }
35 }
36
37
38 if ( $query && exists $query->{text}{$metaname} ) {
39 $self->{query} = $query->{text}{$metaname};
40
41 $self->set_match_regexp;
42 }
43
44 return $self;
45 }
46
47 sub highlight {
48 my ( $self, $properties ) = @_;
49
50
51 return unless $self->{query};
52
53 my $phrase_array = $self->{query};
54
55 my $settings = $self->{settings};
56 my $metaname = $self->{metaname};
57
58 # Do we care about this meta?
59 return unless exists $settings->{meta_to_prop_map}{$metaname};
60
61 # Get the related properties
62 my @props = @{ $settings->{meta_to_prop_map}{$metaname} };
63
64 my %checked;
65
66 for ( @props ) {
67 if ( $properties->{$_} ) {
68 $checked{$_}++;
69 $self->highlight_text( \$properties->{$_}, $phrase_array );
70 }
71 }
72
73
74 # Truncate the description, if not processed.
75 my $description = $self->{description_prop};
76 if ( $description && !$checked{ $description } && $properties->{$description} ) {
77 my $max_words = $settings->{max_words} || 100;
78 my @words = split /\s+/, $properties->{$description};
79 if ( @words > $max_words ) {
80 $properties->{$description} = join ' ', @words[0..$max_words], '<b>...</b>';
81 }
82 }
83
84 }
85
86
87
88 #==========================================================================
89 #
90
91 sub highlight_text {
92
93 my ( $self, $text_ref, $phrase_array ) = @_;
94
95 my $wc_regexp = $self->{wc_regexp};
96 my $extract_regexp = $self->{extract_regexp};
97 my $match_regexp = $self->{match_regexp};
98
99
100 my $last = 0;
101
102 my $settings = $self->{settings};
103
104 my $Show_Words = $settings->{show_words} || 10;
105 my $Occurrences = $settings->{occurrences} || 5;
106 my $Max_Words = $settings->{max_words} || 100;
107 my $On = $settings->{highlight_on} || '<b>';
108 my $Off = $settings->{highlight_off} || '</b>';
109
110
111 my $stemmer_function = $self->{stemmer_function};
112
113
114 # Should really call unescapeHTML(), but then would need to escape <b> from escaping.
115 my @words = split /$wc_regexp/, $$text_ref;
116
117
118 return 'No Content saved: Check StoreDescription setting' unless @words;
119
120 my @flags;
121 $flags[$#words] = 0; # Extend array.
122
123 my $occurrences = $Occurrences ;
124
125
126 my $pos = 0;
127
128 while ( $Show_Words && $pos <= $#words ) {
129
130 # Check if the word is a swish word (ignoring begin and end chars)
131 if ( $words[$pos] =~ /$extract_regexp/ ) {
132
133
134 my ( $begin, $word, $end ) = ( $1, $2, $3 );
135
136 my $test = $stemmer_function
137 ? $stemmer_function->($word)
138 : lc $word;
139
140 $test ||= lc $word;
141
142 # Not check if word matches
143 if ( $test =~ /$match_regexp/ ) {
144
145 $words[$pos] = "$begin$On$word$Off$end";
146
147
148 my $start = $pos - ($Show_Words-1)* 2;
149 my $end = $pos + ($Show_Words-1)* 2;
150 if ( $start < 0 ) {
151 $end = $end - $start;
152 $start = 0;
153 }
154
155 $end = $#words if $end > $#words;
156
157 $flags[$_]++ for $start .. $end;
158
159
160 # All done, and mark where to stop looking
161 if ( $occurrences-- <= 0 ) {
162 $last = $end;
163 last;
164 }
165 }
166 }
167
168 $pos += 2; # Skip to next wordchar word
169 }
170
171
172 my $dotdotdot = ' <b>...</b> ';
173
174
175 my @output;
176
177 my $printing;
178 my $first = 1;
179 my $some_printed;
180
181 if ( $Show_Words && @words > 50 ) { # don't limit context if a small number of words
182 for my $i ( 0 ..$#words ) {
183
184
185 if ( $last && $i >= $last && $i < $#words ) {
186 push @output, $dotdotdot;
187 last;
188 }
189
190 if ( $flags[$i] ) {
191
192 push @output, $dotdotdot if !$printing++ && !$first;
193 push @output, $words[$i];
194 $some_printed++;
195
196 } else {
197 $printing = 0;
198 }
199
200 $first = 0;
201
202
203 }
204 }
205
206 if ( !$some_printed ) {
207 for my $i ( 0 .. $Max_Words ) {
208 if ( $i > $#words ) {
209 $printing++;
210 last;
211 }
212 push @output, $words[$i];
213 }
214 }
215
216
217
218 push @output, $dotdotdot if !$printing;
219
220 $$text_ref = join '', @output;
221
222
223 }
224
225 #============================================
226 # Returns compiled regular expressions for matching
227 #
228
229 sub set_match_regexp {
230 my $self = shift;
231
232 my $results = $self->{results};
233
234
235 my $wc = $results->header('wordcharacters');
236 my $ignoref = $results->header('ignorefirstchar');
237 my $ignorel = $results->header('ignorelastchar');
238
239
240
241 my $query = join ' ', map { join ' ', @$_} @{$self->{query}}; # join everything together!
242
243
244 $wc = quotemeta $wc;
245
246
247 my $match_string =
248 join '|',
249 map { substr( $_, -1, 1 ) eq '*'
250 ? quotemeta( substr( $_, 0, -1) ) . "[$wc]*?"
251 : quotemeta
252 }
253 grep { ! /^(and|or|not|["()=])$/oi } # left over code
254 split /\s+/, $query;
255
256
257
258 return unless $match_string;
259
260 for ( $ignoref, $ignorel ) {
261 if ( $_ ) {
262 $_ = quotemeta;
263 $_ = "([$_]*)";
264 } else {
265 $_ = '()';
266 }
267 }
268
269
270 # Yuck!
271 $wc .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; # Warning: dependent on tolower used while indexing
272
273
274 # Now, wait a minute. Look at this more, as I'd hope that making a
275 # qr// go out of scope would release the compiled pattern.
276
277 if ( $ENV{MOD_PERL} ) {
278 $self->{wc_regexp} = qr/([^$wc]+)/; # regexp for splitting into swish-words
279 $self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/i; # regexp for extracting out the words to compare
280 $self->{match_regexp} = qr/^(?:$match_string)$/; # regexp for comparing extracted words to query
281
282 } else {
283 $self->{wc_regexp} = qr/([^$wc]+)/o; # regexp for splitting into swish-words
284 $self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/oi; # regexp for extracting out the words to compare
285 $self->{match_regexp} = qr/^(?:$match_string)$/o; # regexp for comparing extracted words to query
286 }
287 }
288
289
290 1;
291

  ViewVC Help
Powered by ViewVC 1.1.22