/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/index_hypermail.pl
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/index_hypermail.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
File MIME type: text/plain
Initial revision

1 adcroft 1.1 #!/usr/local/bin/perl -w
2     use strict;
3    
4     =pod
5    
6     This is an example program for use with swish-e's -S prog indexing method.
7    
8     This will scan and index a hypermail (http://hypermail.org) mailing list archive.
9    
10     You might use a config file such as:
11    
12     IndexDir ./index_hypermail.pl
13     SwishProgParameters /usr/local/hypermail/foo
14    
15     MetaNames swishtitle name email
16     PropertyNames name email
17     PropertyNamesDate sent
18     IndexContents HTML2 .html
19     StoreDescription HTML2 <body> 100000
20     UndefinedMetaTags ignore
21    
22     The above expects this file (index_hypermail.pl) to be in the current diretory,
23     and expects the hypermail files to be in the directory /usr/local/hypermail/foo.
24    
25     Index with the command:
26    
27     ./swish-e -c swish.conf -S prog
28    
29     See perldoc examples/swish.cgi for how to search this index. Here's a possible
30     config file for use with swish.cgi:
31    
32     >cat .swishcgi.conf
33    
34     return {
35     title => "Search the Foo List Archive",
36     swish_binary => '../swish-e',
37     display_props => [qw/ name email sent /],
38     sorts => [qw/swishrank swishtitle email sent/],
39     secondary_sort => [qw/sent desc/],
40     metanames => [qw/swishdefault swishtitle name email/],
41     name_labels => {
42     swishrank => 'Rank',
43     swishtitle => 'Subject Only',
44     name => "Poster's Name",
45     email => "Poster's Email",
46     sent => 'Message Date',
47     swishdefault => 'Subject & Body',
48     },
49    
50     highlight => {
51     package => 'PhraseHighlight',
52     show_words => 10, # Number of swish words words to show around highlighted word
53     max_words => 100, # If no words are found to highlighted then show this many words
54     occurrences => 6, # Limit number of occurrences of highlighted words
55     highlight_on => '<font style="background:#FFFF99">',
56     highlight_off => '</font>',
57     meta_to_prop_map => { # this maps search metatags to display properties
58     swishdefault => [ qw/swishtitle swishdescription/ ],
59     swishtitle => [ qw/swishtitle/ ],
60     email => [ qw/email/ ],
61     name => [ qw/name/ ],
62     swishdocpath => [ qw/swishdocpath/ ],
63     },
64     },
65     date_ranges => {
66     property_name => 'sent', # property name to limit by
67     time_periods => [
68     'All',
69     'Today',
70     'Yesterday',
71     'This Week',
72     'Last Week',
73     'Last 90 Days',
74     'This Month',
75     'Last Month',
76     ],
77    
78     line_break => 0,
79     default => 'All',
80     date_range => 1,
81     },
82     };
83    
84    
85    
86     =cut
87    
88    
89    
90     use File::Find; # for recursing a directory tree
91     use Date::Parse;
92    
93     # Recurse the directory(s) passed in on the command line
94    
95     find( { wanted => \&wanted, no_chdir => 1, }, @ARGV );
96    
97    
98     sub wanted {
99     return if -d;
100     return unless m!(^|/)\d+\.html$!;
101    
102     my $mtime = (stat $File::Find::name )[9];
103    
104     my $html = format_message($File::Find::name );
105     return unless $html;
106    
107     my $size = length $html;
108    
109     my $name = $File::Find::name;
110     $name =~ s[^./][];
111    
112     print <<EOF;
113     Content-Length: $size
114     Last-Mtime: $mtime
115     Path-Name: $name
116    
117     EOF
118    
119     print $html;
120     }
121    
122    
123    
124     sub format_message {
125     my $file = shift;
126     local $_;
127    
128     unless ( open FH, "<$file" ) {
129     warn "Failed to open '$file'. Error: $!";
130     return;
131     }
132    
133     my %fields;
134    
135     while (<FH>) {
136     if ( my( $tag, $content) = /<!-- ([^=]+)="(.+)" -->$/ ) {
137     last if $tag eq 'body';
138    
139     unless ( $content ) {
140     warn "File '$file' tag '$tag' empty content\n";
141     next;
142     }
143    
144     if ( $tag eq 'sent' ) {
145     my $date = str2time $content;
146     unless ( defined $date ) {
147     warn "File '$file' tag '$tag' failed to parse '$content'\n";
148     next;
149     }
150     $content = $date;
151     }
152    
153     if ( $tag eq 'received' ) {
154     my $date = str2time $content;
155     unless ( defined $date ) {
156     warn "File '$file' tag '$tag' failed to parse '$content'\n";
157     next;
158     }
159     $content = $date;
160     }
161    
162    
163     if ( $tag eq 'subject' ) {
164     $content =~ s/\Q[SWISH-E]\E//;
165     if ( $content =~ s/\s*Re:\s*//i ) {
166     $content .= ' (Re)';
167     }
168     }
169    
170     $fields{$tag} = $content;
171     }
172     }
173    
174     # Let's now use received header, if possible
175     $fields{sent} = $fields{received} || $fields{sent};
176    
177     my $body = '';
178    
179     while ( <FH> ) {
180     last if /<!-- body="end" -->/ || /^-- $/ || /^--$/ || /^(_|-){40,}\s*$/;
181    
182     $body .= $_;
183     }
184    
185     return join "\n",
186     '<html>',
187     '<head>',
188     '<title>',
189     ($fields{subject} || '' ),
190     '</title>',
191     map( { qq[<meta name="$_" content="$fields{$_}">] } keys %fields ),
192     '</head><body>',
193     $body,
194     '</body>',
195     '</html>',
196     '';
197     }
198    

  ViewVC Help
Powered by ViewVC 1.1.22