/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/example/modules/DefaultHighlight.pm
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/example/modules/DefaultHighlight.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 adcroft 1.1 #=======================================================================
2     # Default Highlighting Code
3     #
4     # $Id: DefaultHighlight.pm,v 1.1 2001/12/06 07:32:11 whmoseley Exp $
5     #=======================================================================
6     package DefaultHighlight;
7     use strict;
8    
9     sub new {
10     my ( $class, $results, $metaname ) = @_;
11    
12    
13     my $self = bless {
14     results => $results, # just in case we need a method
15     settings=> $results->config('highlight'),
16     metaname=> $metaname,
17     }, $class;
18    
19     # parse out the query into words
20     my $query = $results->extract_query_match;
21    
22    
23     # Do words exist for this layer (all text at this time) and metaname?
24     # This is a reference to an array of phrases and words
25    
26     $self->{description_prop} = $results->config('description_prop') || '';
27    
28     if ( $results->header('stemming applied') =~ /^(?:1|yes)$/i ) {
29     eval { require SWISH::Stemmer };
30     if ( $@ ) {
31     $results->errstr('Stemmed index needs Stemmer.pm to highlight: ' . $@);
32     } else {
33     $self->{stemmer_function} = \&SWISH::Stemmer::SwishStem;
34     }
35     }
36    
37    
38     if ( $query && exists $query->{text}{$metaname} ) {
39     $self->{query} = $query->{text}{$metaname};
40    
41     $self->set_match_regexp;
42     }
43    
44     return $self;
45     }
46    
47     sub highlight {
48     my ( $self, $properties ) = @_;
49    
50    
51     return unless $self->{query};
52    
53     my $phrase_array = $self->{query};
54    
55     my $settings = $self->{settings};
56     my $metaname = $self->{metaname};
57    
58     # Do we care about this meta?
59     return unless exists $settings->{meta_to_prop_map}{$metaname};
60    
61     # Get the related properties
62     my @props = @{ $settings->{meta_to_prop_map}{$metaname} };
63    
64     my %checked;
65    
66     for ( @props ) {
67     if ( $properties->{$_} ) {
68     $checked{$_}++;
69     $self->highlight_text( \$properties->{$_}, $phrase_array );
70     }
71     }
72    
73    
74     # Truncate the description, if not processed.
75     my $description = $self->{description_prop};
76     if ( $description && !$checked{ $description } && $properties->{$description} ) {
77     my $max_words = $settings->{max_words} || 100;
78     my @words = split /\s+/, $properties->{$description};
79     if ( @words > $max_words ) {
80     $properties->{$description} = join ' ', @words[0..$max_words], '<b>...</b>';
81     }
82     }
83    
84     }
85    
86    
87    
88     #==========================================================================
89     #
90    
91     sub highlight_text {
92    
93     my ( $self, $text_ref, $phrase_array ) = @_;
94    
95     my $wc_regexp = $self->{wc_regexp};
96     my $extract_regexp = $self->{extract_regexp};
97     my $match_regexp = $self->{match_regexp};
98    
99    
100     my $last = 0;
101    
102     my $settings = $self->{settings};
103    
104     my $Show_Words = $settings->{show_words} || 10;
105     my $Occurrences = $settings->{occurrences} || 5;
106     my $Max_Words = $settings->{max_words} || 100;
107     my $On = $settings->{highlight_on} || '<b>';
108     my $Off = $settings->{highlight_off} || '</b>';
109    
110    
111     my $stemmer_function = $self->{stemmer_function};
112    
113    
114     # Should really call unescapeHTML(), but then would need to escape <b> from escaping.
115     my @words = split /$wc_regexp/, $$text_ref;
116    
117    
118     return 'No Content saved: Check StoreDescription setting' unless @words;
119    
120     my @flags;
121     $flags[$#words] = 0; # Extend array.
122    
123     my $occurrences = $Occurrences ;
124    
125    
126     my $pos = 0;
127    
128     while ( $Show_Words && $pos <= $#words ) {
129    
130     # Check if the word is a swish word (ignoring begin and end chars)
131     if ( $words[$pos] =~ /$extract_regexp/ ) {
132    
133    
134     my ( $begin, $word, $end ) = ( $1, $2, $3 );
135    
136     my $test = $stemmer_function
137     ? $stemmer_function->($word)
138     : lc $word;
139    
140     $test ||= lc $word;
141    
142     # Not check if word matches
143     if ( $test =~ /$match_regexp/ ) {
144    
145     $words[$pos] = "$begin$On$word$Off$end";
146    
147    
148     my $start = $pos - ($Show_Words-1)* 2;
149     my $end = $pos + ($Show_Words-1)* 2;
150     if ( $start < 0 ) {
151     $end = $end - $start;
152     $start = 0;
153     }
154    
155     $end = $#words if $end > $#words;
156    
157     $flags[$_]++ for $start .. $end;
158    
159    
160     # All done, and mark where to stop looking
161     if ( $occurrences-- <= 0 ) {
162     $last = $end;
163     last;
164     }
165     }
166     }
167    
168     $pos += 2; # Skip to next wordchar word
169     }
170    
171    
172     my $dotdotdot = ' <b>...</b> ';
173    
174    
175     my @output;
176    
177     my $printing;
178     my $first = 1;
179     my $some_printed;
180    
181     if ( $Show_Words && @words > 50 ) { # don't limit context if a small number of words
182     for my $i ( 0 ..$#words ) {
183    
184    
185     if ( $last && $i >= $last && $i < $#words ) {
186     push @output, $dotdotdot;
187     last;
188     }
189    
190     if ( $flags[$i] ) {
191    
192     push @output, $dotdotdot if !$printing++ && !$first;
193     push @output, $words[$i];
194     $some_printed++;
195    
196     } else {
197     $printing = 0;
198     }
199    
200     $first = 0;
201    
202    
203     }
204     }
205    
206     if ( !$some_printed ) {
207     for my $i ( 0 .. $Max_Words ) {
208     if ( $i > $#words ) {
209     $printing++;
210     last;
211     }
212     push @output, $words[$i];
213     }
214     }
215    
216    
217    
218     push @output, $dotdotdot if !$printing;
219    
220     $$text_ref = join '', @output;
221    
222    
223     }
224    
225     #============================================
226     # Returns compiled regular expressions for matching
227     #
228    
229     sub set_match_regexp {
230     my $self = shift;
231    
232     my $results = $self->{results};
233    
234    
235     my $wc = $results->header('wordcharacters');
236     my $ignoref = $results->header('ignorefirstchar');
237     my $ignorel = $results->header('ignorelastchar');
238    
239    
240    
241     my $query = join ' ', map { join ' ', @$_} @{$self->{query}}; # join everything together!
242    
243    
244     $wc = quotemeta $wc;
245    
246    
247     my $match_string =
248     join '|',
249     map { substr( $_, -1, 1 ) eq '*'
250     ? quotemeta( substr( $_, 0, -1) ) . "[$wc]*?"
251     : quotemeta
252     }
253     grep { ! /^(and|or|not|["()=])$/oi } # left over code
254     split /\s+/, $query;
255    
256    
257    
258     return unless $match_string;
259    
260     for ( $ignoref, $ignorel ) {
261     if ( $_ ) {
262     $_ = quotemeta;
263     $_ = "([$_]*)";
264     } else {
265     $_ = '()';
266     }
267     }
268    
269    
270     # Yuck!
271     $wc .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; # Warning: dependent on tolower used while indexing
272    
273    
274     # Now, wait a minute. Look at this more, as I'd hope that making a
275     # qr// go out of scope would release the compiled pattern.
276    
277     if ( $ENV{MOD_PERL} ) {
278     $self->{wc_regexp} = qr/([^$wc]+)/; # regexp for splitting into swish-words
279     $self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/i; # regexp for extracting out the words to compare
280     $self->{match_regexp} = qr/^(?:$match_string)$/; # regexp for comparing extracted words to query
281    
282     } else {
283     $self->{wc_regexp} = qr/([^$wc]+)/o; # regexp for splitting into swish-words
284     $self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/oi; # regexp for extracting out the words to compare
285     $self->{match_regexp} = qr/^(?:$match_string)$/o; # regexp for comparing extracted words to query
286     }
287     }
288    
289    
290     1;
291    

  ViewVC Help
Powered by ViewVC 1.1.22