/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/doc2txt.pm
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/doc2txt.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 package doc2txt;
2 use strict;
3
4 =pod
5
6 =head1 NAME
7
8 doc2txt - swish-e sample module to convert MS Word docs to text
9
10 =head1 SYNOPSIS
11
12 use doc2txt;
13 my $doc_record_ref = doc2txt( $doc_file_name );
14
15 # or by passing content in a scalar reference
16 my $doc_text_ref = doc2txt( \$doc_content );
17
18
19
20
21 =head1 DESCRIPTION
22
23 Sample module for use with other swish-e 'prog' document source programs.
24
25 Pass either a file name, or a scalar reference.
26
27 The differece is when you pass a reference to a scalar
28 only the content is returned. When you pass a file name
29 an entire record is returned ready to be fed to swish -- this
30 includes the headers required by swish for indexing.
31
32
33 =head1 REQUIREMENTS
34
35 Uses the catdoc program. http://www.fe.msk.ru/~vitus/catdoc/
36
37 You may need to adjust the parameters used to call catdoc.
38
39 You will also need the module File::Temp available from CPAN if passing content
40 to this module (instead of a file name). I'm not thrilled about how that
41 currently works...
42
43
44 =head1 AUTHOR
45
46 Bill Moseley
47
48 =cut
49
50 use Symbol;
51
52
53 use vars qw(
54 @ISA
55 @EXPORT
56 $VERSION
57 );
58
59 # $Id: doc2txt.pm,v 1.2 2002/05/27 06:35:32 whmoseley Exp $
60 $VERSION = sprintf '%d.%02d', q$Revision: 1.2 $ =~ /: (\d+)\.(\d+)/;
61
62 require Exporter;
63 @ISA = qw(Exporter);
64 @EXPORT = qw(doc2txt);
65
66 my @InfoTags = qw/Title Subject Author CreationDate Creator Producer ModDate Keywords/;
67
68 my $catdoc = 'catdoc -a'; # how cat doc is called. Rainer uses catdoc -s8859-1 -d8859-1
69
70
71 sub doc2txt {
72 my $file_or_content = shift;
73
74
75 my $file = ref $file_or_content
76 ? create_temp_file( $file_or_content )
77 : $file_or_content;
78
79
80 my $content = `$catdoc $file`;
81
82 return \$content if ref $file_or_content;
83
84 # otherwise build the headers
85
86 my $mtime = (stat $file )[9];
87
88 my $size = length $content;
89
90 my $ret = <<EOF;
91 Content-Length: $size
92 Last-Mtime: $mtime
93 Path-Name: $file
94
95 EOF
96
97 $ret .= $content;
98
99 return \$ret;
100
101
102 }
103
104
105 # This is the portable way to do this, I suppose.
106 # Otherwise, just create a file in the local directory.
107
108 sub create_temp_file {
109 my $scalar_ref = shift;
110
111 require "File/Temp.pm";
112
113 my ( $fh, $file_name ) = File::Temp::tempfile( UNLINK => 1 );
114
115 print $fh $$scalar_ref or die $!;
116
117
118 close $fh or die "Failed to close '$file_name' $!";
119
120 return $file_name;
121 }
122
123

  ViewVC Help
Powered by ViewVC 1.1.22