/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/doc2txt.pm
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/prog-bin/doc2txt.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:30 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 adcroft 1.1 package doc2txt;
2     use strict;
3    
4     =pod
5    
6     =head1 NAME
7    
8     doc2txt - swish-e sample module to convert MS Word docs to text
9    
10     =head1 SYNOPSIS
11    
12     use doc2txt;
13     my $doc_record_ref = doc2txt( $doc_file_name );
14    
15     # or by passing content in a scalar reference
16     my $doc_text_ref = doc2txt( \$doc_content );
17    
18    
19    
20    
21     =head1 DESCRIPTION
22    
23     Sample module for use with other swish-e 'prog' document source programs.
24    
25     Pass either a file name, or a scalar reference.
26    
27     The differece is when you pass a reference to a scalar
28     only the content is returned. When you pass a file name
29     an entire record is returned ready to be fed to swish -- this
30     includes the headers required by swish for indexing.
31    
32    
33     =head1 REQUIREMENTS
34    
35     Uses the catdoc program. http://www.fe.msk.ru/~vitus/catdoc/
36    
37     You may need to adjust the parameters used to call catdoc.
38    
39     You will also need the module File::Temp available from CPAN if passing content
40     to this module (instead of a file name). I'm not thrilled about how that
41     currently works...
42    
43    
44     =head1 AUTHOR
45    
46     Bill Moseley
47    
48     =cut
49    
50     use Symbol;
51    
52    
53     use vars qw(
54     @ISA
55     @EXPORT
56     $VERSION
57     );
58    
59     # $Id: doc2txt.pm,v 1.2 2002/05/27 06:35:32 whmoseley Exp $
60     $VERSION = sprintf '%d.%02d', q$Revision: 1.2 $ =~ /: (\d+)\.(\d+)/;
61    
62     require Exporter;
63     @ISA = qw(Exporter);
64     @EXPORT = qw(doc2txt);
65    
66     my @InfoTags = qw/Title Subject Author CreationDate Creator Producer ModDate Keywords/;
67    
68     my $catdoc = 'catdoc -a'; # how cat doc is called. Rainer uses catdoc -s8859-1 -d8859-1
69    
70    
71     sub doc2txt {
72     my $file_or_content = shift;
73    
74    
75     my $file = ref $file_or_content
76     ? create_temp_file( $file_or_content )
77     : $file_or_content;
78    
79    
80     my $content = `$catdoc $file`;
81    
82     return \$content if ref $file_or_content;
83    
84     # otherwise build the headers
85    
86     my $mtime = (stat $file )[9];
87    
88     my $size = length $content;
89    
90     my $ret = <<EOF;
91     Content-Length: $size
92     Last-Mtime: $mtime
93     Path-Name: $file
94    
95     EOF
96    
97     $ret .= $content;
98    
99     return \$ret;
100    
101    
102     }
103    
104    
105     # This is the portable way to do this, I suppose.
106     # Otherwise, just create a file in the local directory.
107    
108     sub create_temp_file {
109     my $scalar_ref = shift;
110    
111     require "File/Temp.pm";
112    
113     my ( $fh, $file_name ) = File::Temp::tempfile( UNLINK => 1 );
114    
115     print $fh $$scalar_ref or die $!;
116    
117    
118     close $fh or die "Failed to close '$file_name' $!";
119    
120     return $file_name;
121     }
122    
123    

  ViewVC Help
Powered by ViewVC 1.1.22