/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/index_hypermail.pl
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/index_hypermail.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 #!/usr/local/bin/perl -w
2 use strict;
3
4 =pod
5
6 This is an example program for use with swish-e's -S prog indexing method.
7
8 This will scan and index a hypermail (http://hypermail.org) mailing list archive.
9
10 You might use a config file such as:
11
12 IndexDir ./index_hypermail.pl
13 SwishProgParameters /usr/local/hypermail/foo
14
15 MetaNames swishtitle name email
16 PropertyNames name email
17 PropertyNamesDate sent
18 IndexContents HTML2 .html
19 StoreDescription HTML2 <body> 100000
20 UndefinedMetaTags ignore
21
22 The above expects this file (index_hypermail.pl) to be in the current diretory,
23 and expects the hypermail files to be in the directory /usr/local/hypermail/foo.
24
25 Index with the command:
26
27 ./swish-e -c swish.conf -S prog
28
29 See perldoc examples/swish.cgi for how to search this index. Here's a possible
30 config file for use with swish.cgi:
31
32 >cat .swishcgi.conf
33
34 return {
35 title => "Search the Foo List Archive",
36 swish_binary => '../swish-e',
37 display_props => [qw/ name email sent /],
38 sorts => [qw/swishrank swishtitle email sent/],
39 secondary_sort => [qw/sent desc/],
40 metanames => [qw/swishdefault swishtitle name email/],
41 name_labels => {
42 swishrank => 'Rank',
43 swishtitle => 'Subject Only',
44 name => "Poster's Name",
45 email => "Poster's Email",
46 sent => 'Message Date',
47 swishdefault => 'Subject & Body',
48 },
49
50 highlight => {
51 package => 'PhraseHighlight',
52 show_words => 10, # Number of swish words words to show around highlighted word
53 max_words => 100, # If no words are found to highlighted then show this many words
54 occurrences => 6, # Limit number of occurrences of highlighted words
55 highlight_on => '<font style="background:#FFFF99">',
56 highlight_off => '</font>',
57 meta_to_prop_map => { # this maps search metatags to display properties
58 swishdefault => [ qw/swishtitle swishdescription/ ],
59 swishtitle => [ qw/swishtitle/ ],
60 email => [ qw/email/ ],
61 name => [ qw/name/ ],
62 swishdocpath => [ qw/swishdocpath/ ],
63 },
64 },
65 date_ranges => {
66 property_name => 'sent', # property name to limit by
67 time_periods => [
68 'All',
69 'Today',
70 'Yesterday',
71 'This Week',
72 'Last Week',
73 'Last 90 Days',
74 'This Month',
75 'Last Month',
76 ],
77
78 line_break => 0,
79 default => 'All',
80 date_range => 1,
81 },
82 };
83
84
85
86 =cut
87
88
89
90 use File::Find; # for recursing a directory tree
91 use Date::Parse;
92
93 # Recurse the directory(s) passed in on the command line
94
95 find( { wanted => \&wanted, no_chdir => 1, }, @ARGV );
96
97
98 sub wanted {
99 return if -d;
100 return unless m!(^|/)\d+\.html$!;
101
102 my $mtime = (stat $File::Find::name )[9];
103
104 my $html = format_message($File::Find::name );
105 return unless $html;
106
107 my $size = length $html;
108
109 my $name = $File::Find::name;
110 $name =~ s[^./][];
111
112 print <<EOF;
113 Content-Length: $size
114 Last-Mtime: $mtime
115 Path-Name: $name
116
117 EOF
118
119 print $html;
120 }
121
122
123
124 sub format_message {
125 my $file = shift;
126 local $_;
127
128 unless ( open FH, "<$file" ) {
129 warn "Failed to open '$file'. Error: $!";
130 return;
131 }
132
133 my %fields;
134
135 while (<FH>) {
136 if ( my( $tag, $content) = /<!-- ([^=]+)="(.+)" -->$/ ) {
137 last if $tag eq 'body';
138
139 unless ( $content ) {
140 warn "File '$file' tag '$tag' empty content\n";
141 next;
142 }
143
144 if ( $tag eq 'sent' ) {
145 my $date = str2time $content;
146 unless ( defined $date ) {
147 warn "File '$file' tag '$tag' failed to parse '$content'\n";
148 next;
149 }
150 $content = $date;
151 }
152
153 if ( $tag eq 'received' ) {
154 my $date = str2time $content;
155 unless ( defined $date ) {
156 warn "File '$file' tag '$tag' failed to parse '$content'\n";
157 next;
158 }
159 $content = $date;
160 }
161
162
163 if ( $tag eq 'subject' ) {
164 $content =~ s/\Q[SWISH-E]\E//;
165 if ( $content =~ s/\s*Re:\s*//i ) {
166 $content .= ' (Re)';
167 }
168 }
169
170 $fields{$tag} = $content;
171 }
172 }
173
174 # Let's now use received header, if possible
175 $fields{sent} = $fields{received} || $fields{sent};
176
177 my $body = '';
178
179 while ( <FH> ) {
180 last if /<!-- body="end" -->/ || /^-- $/ || /^--$/ || /^(_|-){40,}\s*$/;
181
182 $body .= $_;
183 }
184
185 return join "\n",
186 '<html>',
187 '<head>',
188 '<title>',
189 ($fields{subject} || '' ),
190 '</title>',
191 map( { qq[<meta name="$_" content="$fields{$_}">] } keys %fields ),
192 '</head><body>',
193 $body,
194 '</body>',
195 '</html>',
196 '';
197 }
198

  ViewVC Help
Powered by ViewVC 1.1.22