#=======================================================================
# Default Highlighting Code
#
# $Id: DefaultHighlight.pm,v 1.1.1.1 2002/09/20 19:47:30 adcroft Exp $
#=======================================================================
package DefaultHighlight;
use strict;
sub new {
my ( $class, $results, $metaname ) = @_;
my $self = bless {
results => $results, # just in case we need a method
settings=> $results->config('highlight'),
metaname=> $metaname,
}, $class;
# parse out the query into words
my $query = $results->extract_query_match;
# Do words exist for this layer (all text at this time) and metaname?
# This is a reference to an array of phrases and words
$self->{description_prop} = $results->config('description_prop') || '';
if ( $results->header('stemming applied') =~ /^(?:1|yes)$/i ) {
eval { require SWISH::Stemmer };
if ( $@ ) {
$results->errstr('Stemmed index needs Stemmer.pm to highlight: ' . $@);
} else {
$self->{stemmer_function} = \&SWISH::Stemmer::SwishStem;
}
}
if ( $query && exists $query->{text}{$metaname} ) {
$self->{query} = $query->{text}{$metaname};
$self->set_match_regexp;
}
return $self;
}
sub highlight {
my ( $self, $properties ) = @_;
return unless $self->{query};
my $phrase_array = $self->{query};
my $settings = $self->{settings};
my $metaname = $self->{metaname};
# Do we care about this meta?
return unless exists $settings->{meta_to_prop_map}{$metaname};
# Get the related properties
my @props = @{ $settings->{meta_to_prop_map}{$metaname} };
my %checked;
for ( @props ) {
if ( $properties->{$_} ) {
$checked{$_}++;
$self->highlight_text( \$properties->{$_}, $phrase_array );
}
}
# Truncate the description, if not processed.
my $description = $self->{description_prop};
if ( $description && !$checked{ $description } && $properties->{$description} ) {
my $max_words = $settings->{max_words} || 100;
my @words = split /\s+/, $properties->{$description};
if ( @words > $max_words ) {
$properties->{$description} = join ' ', @words[0..$max_words], '...';
}
}
}
#==========================================================================
#
sub highlight_text {
my ( $self, $text_ref, $phrase_array ) = @_;
my $wc_regexp = $self->{wc_regexp};
my $extract_regexp = $self->{extract_regexp};
my $match_regexp = $self->{match_regexp};
my $last = 0;
my $settings = $self->{settings};
my $Show_Words = $settings->{show_words} || 10;
my $Occurrences = $settings->{occurrences} || 5;
my $Max_Words = $settings->{max_words} || 100;
my $On = $settings->{highlight_on} || '';
my $Off = $settings->{highlight_off} || '';
my $stemmer_function = $self->{stemmer_function};
# Should really call unescapeHTML(), but then would need to escape from escaping.
my @words = split /$wc_regexp/, $$text_ref;
return 'No Content saved: Check StoreDescription setting' unless @words;
my @flags;
$flags[$#words] = 0; # Extend array.
my $occurrences = $Occurrences ;
my $pos = 0;
while ( $Show_Words && $pos <= $#words ) {
# Check if the word is a swish word (ignoring begin and end chars)
if ( $words[$pos] =~ /$extract_regexp/ ) {
my ( $begin, $word, $end ) = ( $1, $2, $3 );
my $test = $stemmer_function
? $stemmer_function->($word)
: lc $word;
$test ||= lc $word;
# Not check if word matches
if ( $test =~ /$match_regexp/ ) {
$words[$pos] = "$begin$On$word$Off$end";
my $start = $pos - ($Show_Words-1)* 2;
my $end = $pos + ($Show_Words-1)* 2;
if ( $start < 0 ) {
$end = $end - $start;
$start = 0;
}
$end = $#words if $end > $#words;
$flags[$_]++ for $start .. $end;
# All done, and mark where to stop looking
if ( $occurrences-- <= 0 ) {
$last = $end;
last;
}
}
}
$pos += 2; # Skip to next wordchar word
}
my $dotdotdot = ' ... ';
my @output;
my $printing;
my $first = 1;
my $some_printed;
if ( $Show_Words && @words > 50 ) { # don't limit context if a small number of words
for my $i ( 0 ..$#words ) {
if ( $last && $i >= $last && $i < $#words ) {
push @output, $dotdotdot;
last;
}
if ( $flags[$i] ) {
push @output, $dotdotdot if !$printing++ && !$first;
push @output, $words[$i];
$some_printed++;
} else {
$printing = 0;
}
$first = 0;
}
}
if ( !$some_printed ) {
for my $i ( 0 .. $Max_Words ) {
if ( $i > $#words ) {
$printing++;
last;
}
push @output, $words[$i];
}
}
push @output, $dotdotdot if !$printing;
$$text_ref = join '', @output;
}
#============================================
# Returns compiled regular expressions for matching
#
sub set_match_regexp {
my $self = shift;
my $results = $self->{results};
my $wc = $results->header('wordcharacters');
my $ignoref = $results->header('ignorefirstchar');
my $ignorel = $results->header('ignorelastchar');
my $query = join ' ', map { join ' ', @$_} @{$self->{query}}; # join everything together!
$wc = quotemeta $wc;
my $match_string =
join '|',
map { substr( $_, -1, 1 ) eq '*'
? quotemeta( substr( $_, 0, -1) ) . "[$wc]*?"
: quotemeta
}
grep { ! /^(and|or|not|["()=])$/oi } # left over code
split /\s+/, $query;
return unless $match_string;
for ( $ignoref, $ignorel ) {
if ( $_ ) {
$_ = quotemeta;
$_ = "([$_]*)";
} else {
$_ = '()';
}
}
# Yuck!
$wc .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; # Warning: dependent on tolower used while indexing
# Now, wait a minute. Look at this more, as I'd hope that making a
# qr// go out of scope would release the compiled pattern.
if ( $ENV{MOD_PERL} ) {
$self->{wc_regexp} = qr/([^$wc]+)/; # regexp for splitting into swish-words
$self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/i; # regexp for extracting out the words to compare
$self->{match_regexp} = qr/^(?:$match_string)$/; # regexp for comparing extracted words to query
} else {
$self->{wc_regexp} = qr/([^$wc]+)/o; # regexp for splitting into swish-words
$self->{extract_regexp} = qr/^$ignoref([$wc]+?)$ignorel$/oi; # regexp for extracting out the words to compare
$self->{match_regexp} = qr/^(?:$match_string)$/o; # regexp for comparing extracted words to query
}
}
1;