#!/usr/local/bin/perl -w use strict; =pod This is an example program for use with swish-e's -S prog indexing method. This will scan and index a hypermail (http://hypermail.org) mailing list archive. You might use a config file such as: IndexDir ./index_hypermail.pl SwishProgParameters /usr/local/hypermail/foo MetaNames swishtitle name email PropertyNames name email PropertyNamesDate sent IndexContents HTML2 .html StoreDescription HTML2
100000 UndefinedMetaTags ignore The above expects this file (index_hypermail.pl) to be in the current diretory, and expects the hypermail files to be in the directory /usr/local/hypermail/foo. Index with the command: ./swish-e -c swish.conf -S prog See perldoc examples/swish.cgi for how to search this index. Here's a possible config file for use with swish.cgi: >cat .swishcgi.conf return { title => "Search the Foo List Archive", swish_binary => '../swish-e', display_props => [qw/ name email sent /], sorts => [qw/swishrank swishtitle email sent/], secondary_sort => [qw/sent desc/], metanames => [qw/swishdefault swishtitle name email/], name_labels => { swishrank => 'Rank', swishtitle => 'Subject Only', name => "Poster's Name", email => "Poster's Email", sent => 'Message Date', swishdefault => 'Subject & Body', }, highlight => { package => 'PhraseHighlight', show_words => 10, # Number of swish words words to show around highlighted word max_words => 100, # If no words are found to highlighted then show this many words occurrences => 6, # Limit number of occurrences of highlighted words highlight_on => '', highlight_off => '', meta_to_prop_map => { # this maps search metatags to display properties swishdefault => [ qw/swishtitle swishdescription/ ], swishtitle => [ qw/swishtitle/ ], email => [ qw/email/ ], name => [ qw/name/ ], swishdocpath => [ qw/swishdocpath/ ], }, }, date_ranges => { property_name => 'sent', # property name to limit by time_periods => [ 'All', 'Today', 'Yesterday', 'This Week', 'Last Week', 'Last 90 Days', 'This Month', 'Last Month', ], line_break => 0, default => 'All', date_range => 1, }, }; =cut use File::Find; # for recursing a directory tree use Date::Parse; # Recurse the directory(s) passed in on the command line find( { wanted => \&wanted, no_chdir => 1, }, @ARGV ); sub wanted { return if -d; return unless m!(^|/)\d+\.html$!; my $mtime = (stat $File::Find::name )[9]; my $html = format_message($File::Find::name ); return unless $html; my $size = length $html; my $name = $File::Find::name; $name =~ s[^./][]; print <