/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/doc/Pod/HtmlPsPdf/Html.pm
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/doc/Pod/HtmlPsPdf/Html.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
Importing web-site building process.

1 adcroft 1.1 package Pod::HtmlPsPdf::Html;
2    
3     use Pod::Functions;
4     use Getopt::Long; # package for handling command-line parameters
5     require Exporter;
6     use vars qw($VERSION);
7     $VERSION = 1.01;
8     @ISA = Exporter;
9     @EXPORT = qw(pod2html htmlify);
10     use Cwd;
11    
12     use Carp;
13    
14     use strict;
15    
16     use Config;
17    
18     =head1 NAME
19    
20     Pod::Html - module to convert pod files to HTML
21    
22     =head1 SYNOPSIS
23    
24     use Pod::Html;
25     pod2html([options]);
26    
27     =head1 DESCRIPTION
28    
29     Converts files from pod format (see L<perlpod>) to HTML format. It
30     can automatically generate indexes and cross-references, and it keeps
31     a cache of things it knows how to cross-reference.
32    
33     =head1 ARGUMENTS
34    
35     Pod::Html takes the following arguments:
36    
37     =over 4
38    
39     =item help
40    
41     --help
42    
43     Displays the usage message.
44    
45     =item htmlroot
46    
47     --htmlroot=name
48    
49     Sets the base URL for the HTML files. When cross-references are made,
50     the HTML root is prepended to the URL.
51    
52     =item infile
53    
54     --infile=name
55    
56     Specify the pod file to convert. Input is taken from STDIN if no
57     infile is specified.
58    
59     =item outfile
60    
61     --outfile=name
62    
63     Specify the HTML file to create. Output goes to STDOUT if no outfile
64     is specified.
65    
66     =item podroot
67    
68     --podroot=name
69    
70     Specify the base directory for finding library pods.
71    
72     =item podpath
73    
74     --podpath=name:...:name
75    
76     Specify which subdirectories of the podroot contain pod files whose
77     HTML converted forms can be linked-to in cross-references.
78    
79     =item libpods
80    
81     --libpods=name:...:name
82    
83     List of page names (eg, "perlfunc") which contain linkable C<=item>s.
84    
85     =item netscape
86    
87     --netscape
88    
89     Use Netscape HTML directives when applicable.
90    
91     =item nonetscape
92    
93     --nonetscape
94    
95     Do not use Netscape HTML directives (default).
96    
97     =item index
98    
99     --index
100    
101     Generate an index at the top of the HTML file (default behaviour).
102    
103     =item noindex
104    
105     --noindex
106    
107     Do not generate an index at the top of the HTML file.
108    
109    
110     =item recurse
111    
112     --recurse
113    
114     Recurse into subdirectories specified in podpath (default behaviour).
115    
116     =item norecurse
117    
118     --norecurse
119    
120     Do not recurse into subdirectories specified in podpath.
121    
122     =item title
123    
124     --title=title
125    
126     Specify the title of the resulting HTML file.
127    
128     =item verbose
129    
130     --verbose
131    
132     Display progress messages.
133    
134     =back
135    
136     =head1 EXAMPLE
137    
138     pod2html("pod2html",
139     "--podpath=lib:ext:pod:vms",
140     "--podroot=/usr/src/perl",
141     "--htmlroot=/perl/nmanual",
142     "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
143     "--recurse",
144     "--infile=foo.pod",
145     "--outfile=/perl/nmanual/foo.html");
146    
147     =head1 AUTHOR
148    
149     Originally written by Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
150    
151     Modified by Stas Bekman E<lt>stas@stason.orgE<gt>.
152    
153     =head1 BUGS
154    
155     Has trouble with C<> etc in = commands.
156    
157     =head1 SEE ALSO
158    
159     L<perlpod>
160    
161     =head1 COPYRIGHT
162    
163     This program is distributed under the Artistic License.
164    
165     =cut
166    
167     use vars qw($OUT);
168    
169     #my $HR = qq{<P><B><FONT SIZE=-1><A HREF="#toc">[TOC]</A></FONT></B></P><HR>};
170     my $HR = qq{<HR>};
171    
172    
173     my $dircache = "pod2html-dircache";
174     my $itemcache = "pod2html-itemcache";
175    
176     my @begin_stack = (); # begin/end stack
177    
178     my @libpods = (); # files to search for links from C<> directives
179     my $htmlroot = "/"; # http-server base directory from which all
180     # relative paths in $podpath stem.
181     my $htmlfile = ""; # write to stdout by default
182     my $podfile = ""; # read from stdin by default
183     my @podpath = (); # list of directories containing library pods.
184     my $podroot = "."; # filesystem base directory from which all
185     # relative paths in $podpath stem.
186     my $recurse = 1; # recurse on subdirectories in $podpath.
187     my $verbose = 0; # not verbose by default
188     my $doindex = 1; # non-zero if we should generate an index
189     my $listlevel = 0; # current list depth
190     my @listitem = (); # stack of HTML commands to use when a =item is
191     # encountered. the top of the stack is the
192     # current list.
193     my @listdata = (); # similar to @listitem, but for the text after
194     # an =item
195     my @listend = (); # similar to @listitem, but the text to use to
196     # end the list.
197     my $ignore = 1; # whether or not to format text. we don't
198     # format text until we hit our first pod
199     # directive.
200    
201     my %items_named = (); # for the multiples of the same item in perlfunc
202     my @items_seen = ();
203     my $netscape = 0; # whether or not to use netscape directives.
204     my $title; # title to give the pod(s)
205     my $top = 1; # true if we are at the top of the doc. used
206     # to prevent the first <HR> directive.
207     my $paragraph; # which paragraph we're processing (used
208     # for error messages)
209     my %pages = (); # associative array used to find the location
210     # of pages referenced by L<> links.
211     my %sections = (); # sections within this page
212     my %items = (); # associative array used to find the location
213     # of =item directives referenced by C<> links
214     my $Is83; # is dos with short filenames (8.3)
215    
216    
217     sub init_globals {
218    
219     $OUT = '';
220    
221     $dircache = "pod2html-dircache";
222     $itemcache = "pod2html-itemcache";
223    
224     @begin_stack = (); # begin/end stack
225    
226     @libpods = (); # files to search for links from C<> directives
227     $htmlroot = "/"; # http-server base directory from which all
228     # relative paths in $podpath stem.
229     $htmlfile = ""; # write to stdout by default
230     $podfile = ""; # read from stdin by default
231     @podpath = (); # list of directories containing library pods.
232     $podroot = "."; # filesystem base directory from which all
233     # relative paths in $podpath stem.
234     $recurse = 1; # recurse on subdirectories in $podpath.
235     $verbose = 0; # not verbose by default
236     $doindex = 1; # non-zero if we should generate an index
237     $listlevel = 0; # current list depth
238     @listitem = (); # stack of HTML commands to use when a =item is
239     # encountered. the top of the stack is the
240     # current list.
241     @listdata = (); # similar to @listitem, but for the text after
242     # an =item
243     @listend = (); # similar to @listitem, but the text to use to
244     # end the list.
245     $ignore = 1; # whether or not to format text. we don't
246     # format text until we hit our first pod
247     # directive.
248    
249     @items_seen = ();
250     %items_named = ();
251     $netscape = 0; # whether or not to use netscape directives.
252     $title = ''; # title to give the pod(s)
253     $top = 1; # true if we are at the top of the doc. used
254     # to prevent the first <HR> directive.
255     $paragraph = ''; # which paragraph we're processing (used
256     # for error messages)
257     %sections = (); # sections within this page
258    
259     # These are not reinitialised here but are kept as a cache.
260     # See get_cache and related cache management code.
261     #%pages = (); # associative array used to find the location
262     # of pages referenced by L<> links.
263     #%items = (); # associative array used to find the location
264     # of =item directives referenced by C<> links
265     $Is83=$^O eq 'dos';
266     }
267    
268     use vars qw($r_valid_anchors $r_links_to_check $curr_base);
269    
270     # $html = pod2html(@pod_code)
271     #
272     sub pod2html {
273    
274     init_globals();
275    
276    
277    
278     @podpath = @{+shift};
279     $podroot = shift;
280     $htmlroot = shift;
281     $verbose = shift;
282     my $r_html_data = shift;
283     my $rh_main_toc = shift; # a list of TableOfContents
284     my $curr_page = shift || '';
285     $podfile = $curr_page || '';
286     my $curr_page_index = shift || '';
287     $r_valid_anchors = shift || {};
288     $r_links_to_check = shift || {};
289    
290     $curr_base = $curr_page;
291     $curr_base =~ s/\.html//;
292    
293     my @poddata = @$r_html_data;
294    
295     local($/);
296     local $_;
297    
298    
299     # # Strip escapes "\'" and '\"' from words like "it\'s"
300     # map {s/\\(['"])/$1/go} @poddata;
301    
302     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
303    
304     # cache of %pages and %items from last time we ran
305    
306     #undef $opt_help if defined $opt_help;
307    
308     # parse the command-line parameters
309     #parse_command_line();
310    
311    
312     #$htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
313    
314     # read the pod a paragraph at a time
315     warn "Scanning for sections in input file(s)\n" if $verbose;
316    
317     # must be performed before the index creation, so the first
318     # header will not enter into the index but serve only as a header
319    
320     # put a title in the HTML file
321     $title = '';
322     my $title_sec = '';
323    
324     # find the beginning of the pod
325     for (my $i = 0; $i < @poddata; $i++) {
326     $title_sec = shift @poddata;
327     last if $title_sec =~ s/^=head1\s*(NAME)?//;
328     }
329    
330     # grab the first section
331     while (1) {
332     last unless $poddata[0];
333     last if $poddata[0] =~ /^=head/; # stop on the next section beginning
334     $title_sec .= shift @poddata; # otherwise grab the title data
335     }
336     # remove any excessive spaces, new lines
337     $title_sec =~ s/\n/ /gs;
338     $title_sec =~ s/\s+/ /g;
339    
340     # save away a clean header
341     $title = $title_sec;
342    
343     # TITLE_SEARCH: {
344     # for (my $i = 0; $i < @poddata; $i++) {
345     # if ($poddata[$i] =~ /^=head1\s*(.*)?/) {
346     # # remove the title so it wouldn't show up among the
347     # # section names!
348     # shift @poddata;
349     # my $text = $1
350     # next if $
351     # $title = $1, last TITLE_SEARCH if $1;
352     # }
353     # }
354     # }
355    
356     # scan the pod for =head[1-6] directives and build an index
357     my $index = scan_headings(\%sections, @poddata);
358    
359    
360     unless($index) {
361     warn "No pod in $podfile\n" if $verbose;
362     return;
363     }
364    
365     # TITLE_SEARCH: {
366     # for (my $i = 0; $i < @poddata; $i++) {
367     # if ($poddata[$i] =~ /^title:\s*(.*)/) {
368     # $title = $1, last TITLE_SEARCH;
369     # }
370     # }
371     # }
372    
373     if (!$title and $podfile =~ /\.pod$/) {
374     # probably a split pod so take first =head[12] as title
375     for (my $i = 0; $i < @poddata; $i++) {
376     last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
377     }
378     warn "adopted '$title' as title for $podfile\n"
379     if $verbose and $title;
380     }
381     if ($title) {
382     $title =~ s/\s*\(.*\)//;
383     } else {
384     warn "$0: no title for $podfile";
385     $podfile =~ /^(.*)(\.[^.\/]+)?$/;
386     $title = ($podfile eq "-" ? 'No Title' : $1);
387     warn "using $title" if $verbose;
388     }
389    
390     # full pages index including the page's name and title
391     my $full_index = '';
392     $full_index = qq{<LI><A HREF="$curr_page"><B><FONT SIZE=+1>$title</FONT></B></A></LI>};
393     $full_index .= "<P>";
394     $full_index .= $index;
395     $full_index =~ s/\#/$curr_page\#/gs;
396     $full_index .= "<P>";
397    
398     # add a valid tag to a list of tags - since we might have
399     # link like warnings# which points to the page itself
400     $r_valid_anchors->{$curr_base."/"} = $title;
401    
402     # start the HTML
403     add_header($title);
404    
405     $OUT .= $HR;
406    
407     # load/reload/validate/cache %pages and %items
408     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
409    
410     # scan the pod for =item directives
411     scan_items("", \%items, @poddata);
412    
413     # put an index at the top of the file. note, if $doindex is 0 we
414     # still generate an index, but surround it with an html comment.
415     # that way some other program can extract it if desired.
416     $index =~ s/--+/-/g;
417     $OUT .= "<!-- INDEX BEGIN -->\n";
418     $OUT .= qq{<A NAME="toc"></A>\n};
419     $OUT .= qq{<P><B>Table of Contents:</B></P>\n};
420     $OUT .= "<!--\n" unless $doindex;
421     $OUT .= $index;
422     $OUT .= "-->\n" unless $doindex;
423     $OUT .= "<!-- INDEX END -->\n\n";
424     # $OUT .= "$HR\n" if $doindex;
425    
426     # now convert this file
427     warn "Converting input file\n" if $verbose;
428     foreach my $i (0..$#poddata) {
429     $_ = $poddata[$i];
430     $paragraph = $i+1;
431     if (/^(=.*)/s) { # is it a pod directive?
432     $ignore = 0;
433     $_ = $1;
434     if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
435     process_begin($1, $2);
436     } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
437     process_end($1, $2);
438     } elsif (/^=cut/) { # =cut
439     process_cut();
440     } elsif (/^=pod/) { # =pod
441     process_pod();
442     } else {
443     next if @begin_stack && $begin_stack[-1] ne 'html';
444    
445     if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
446     process_head($1, $2);
447     } elsif (/^=item\s*(.*\S)/sm) { # =item text
448     process_item($1);
449     } elsif (/^=over\s*(.*)/) { # =over N
450     process_over();
451     } elsif (/^=back/) { # =back
452     process_back();
453     } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
454     process_for($1,$2);
455     } elsif (/^=example\s+(\S+)\s+(.*)/si) {# =example
456     process_example($1,$2);
457     } elsif (/^=figure\s+(\S+)\s+(.*)/si) { # =figure
458     process_figure($1,$2);
459     } elsif (/^=code\s+(\S+)\s+(.*)/si) { # =code
460     process_code($1,$2);
461     } else {
462     /^=(\S*)\s*/;
463     warn "$0: $podfile: unknown pod directive '$1' in "
464     . "paragraph $paragraph. ignoring.\n";
465     }
466     }
467     $top = 0;
468     }
469     else {
470     next if $ignore;
471     next if @begin_stack && $begin_stack[-1] ne 'html';
472     my $text = $_;
473     process_text(\$text, 1);
474     $OUT .= "<P>\n$text";
475     }
476     }
477    
478     # finish off any pending directives
479     finish_list();
480    
481     # add the last <HR>
482     $OUT .= $HR;
483    
484     # add the tail
485     add_tail();
486    
487     $rh_main_toc->{$curr_page} = $full_index;
488    
489     @$r_html_data = $OUT;
490     # return $OUT;
491    
492     warn "Finished\n" if $verbose;
493     }
494    
495    
496     ##################
497     sub add_header{
498     my $title = shift || '';
499    
500     $OUT .= qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
501     <HTML>
502     <HEAD>
503     <TITLE>$title</TITLE>
504     <BODY>
505     <H1 ALIGN=CENTER>$title</H1>
506     }
507    
508    
509     } # end of add_header
510    
511    
512     sub add_tail{
513    
514     $OUT .= qq{
515    
516     </BODY>
517     </HTML>
518     };
519    
520     } # end of add_tail
521    
522     ##############################################################################
523    
524     my $usage; # see below
525     sub usage {
526     my $podfile = shift;
527     warn "$0: $podfile: @_\n" if @_;
528     die $usage;
529     }
530    
531     $usage =<<END_OF_USAGE;
532     Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
533     --podpath=<name>:...:<name> --podroot=<name>
534     --libpods=<name>:...:<name> --recurse --verbose --index
535     --netscape --norecurse --noindex
536    
537     --flush - flushes the item and directory caches.
538     --help - prints this message.
539     --htmlroot - http-server base directory from which all relative paths
540     in podpath stem (default is /).
541     --index - generate an index at the top of the resulting html
542     (default).
543     --infile - filename for the pod to convert (input taken from stdin
544     by default).
545     --libpods - colon-separated list of pages to search for =item pod
546     directives in as targets of C<> and implicit links (empty
547     by default). note, these are not filenames, but rather
548     page names like those that appear in L<> links.
549     --netscape - will use netscape html directives when applicable.
550     --nonetscape - will not use netscape directives (default).
551     --outfile - filename for the resulting html file (output sent to
552     stdout by default).
553     --podpath - colon-separated list of directories containing library
554     pods. empty by default.
555     --podroot - filesystem base directory from which all relative paths
556     in podpath stem (default is .).
557     --noindex - don't generate an index at the top of the resulting html.
558     --norecurse - don't recurse on those subdirectories listed in podpath.
559     --recurse - recurse on those subdirectories listed in podpath
560     (default behavior).
561     --title - title that will appear in resulting html file.
562     --verbose - self-explanatory
563    
564     END_OF_USAGE
565    
566     sub parse_command_line {
567     my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
568     my $result = GetOptions(
569     'flush' => \$opt_flush,
570     'help' => \$opt_help,
571     'htmlroot=s' => \$opt_htmlroot,
572     'index!' => \$opt_index,
573     'infile=s' => \$opt_infile,
574     'libpods=s' => \$opt_libpods,
575     'netscape!' => \$opt_netscape,
576     'outfile=s' => \$opt_outfile,
577     'podpath=s' => \$opt_podpath,
578     'podroot=s' => \$opt_podroot,
579     'norecurse' => \$opt_norecurse,
580     'recurse!' => \$opt_recurse,
581     'title=s' => \$opt_title,
582     'verbose' => \$opt_verbose,
583     );
584     usage("-", "invalid parameters") if not $result;
585    
586     usage("-") if defined $opt_help; # see if the user asked for help
587     $opt_help = ""; # just to make -w shut-up.
588    
589     $podfile = $opt_infile if defined $opt_infile;
590     $htmlfile = $opt_outfile if defined $opt_outfile;
591    
592     @podpath = split(":", $opt_podpath) if defined $opt_podpath;
593     @libpods = split(":", $opt_libpods) if defined $opt_libpods;
594    
595     warn "Flushing item and directory caches\n"
596     if $opt_verbose && defined $opt_flush;
597     unlink($dircache, $itemcache) if defined $opt_flush;
598    
599     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
600     $podroot = $opt_podroot if defined $opt_podroot;
601    
602     $doindex = $opt_index if defined $opt_index;
603     $recurse = $opt_recurse if defined $opt_recurse;
604     $title = $opt_title if defined $opt_title;
605     $verbose = defined $opt_verbose ? 1 : 0;
606     $netscape = $opt_netscape if defined $opt_netscape;
607     }
608    
609    
610     my $saved_cache_key;
611    
612     sub get_cache {
613     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
614     my @cache_key_args = @_;
615    
616     # A first-level cache:
617     # Don't bother reading the cache files if they still apply
618     # and haven't changed since we last read them.
619    
620     my $this_cache_key = cache_key(@cache_key_args);
621    
622     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
623    
624     # load the cache of %pages and %items if possible. $tests will be
625     # non-zero if successful.
626     my $tests = 0;
627     if (-f $dircache && -f $itemcache) {
628     warn "scanning for item cache\n" if $verbose;
629     $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
630     }
631    
632     # if we didn't succeed in loading the cache then we must (re)build
633     # %pages and %items.
634     if (!$tests) {
635     warn "scanning directories in pod-path\n" if $verbose;
636     scan_podpath($podroot, $recurse, 0);
637     }
638     $saved_cache_key = cache_key(@cache_key_args);
639     }
640    
641     sub cache_key {
642     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
643     return join('!', $dircache, $itemcache, $recurse,
644     @$podpath, $podroot, stat($dircache), stat($itemcache));
645     }
646    
647     #
648     # load_cache - tries to find if the caches stored in $dircache and $itemcache
649     # are valid caches of %pages and %items. if they are valid then it loads
650     # them and returns a non-zero value.
651     #
652    
653     sub load_cache {
654     my($dircache, $itemcache, $podpath, $podroot) = @_;
655     my($tests);
656     local $_;
657    
658     $tests = 0;
659    
660     open(CACHE, "<$itemcache") ||
661     die "$0: error opening $itemcache for reading: $!\n";
662     $/ = "\n";
663    
664     # is it the same podpath?
665     $_ = <CACHE>;
666     chomp($_);
667     $tests++ if (join(":", @$podpath) eq $_);
668    
669     # is it the same podroot?
670     $_ = <CACHE>;
671     chomp($_);
672     $tests++ if ($podroot eq $_);
673    
674     # load the cache if its good
675     if ($tests != 2) {
676     close(CACHE);
677     return 0;
678     }
679    
680     warn "loading item cache\n" if $verbose;
681     while (<CACHE>) {
682     /(.*?) (.*)$/;
683     $items{$1} = $2;
684     }
685     close(CACHE);
686    
687     warn "scanning for directory cache\n" if $verbose;
688     open(CACHE, "<$dircache") ||
689     die "$0: error opening $dircache for reading: $!\n";
690     $/ = "\n";
691     $tests = 0;
692    
693     # is it the same podpath?
694     $_ = <CACHE>;
695     chomp($_);
696     $tests++ if (join(":", @$podpath) eq $_);
697    
698     # is it the same podroot?
699     $_ = <CACHE>;
700     chomp($_);
701     $tests++ if ($podroot eq $_);
702    
703     # load the cache if its good
704     if ($tests != 2) {
705     close(CACHE);
706     return 0;
707     }
708    
709     warn "loading directory cache\n" if $verbose;
710     while (<CACHE>) {
711     /(.*?) (.*)$/;
712     $pages{$1} = $2;
713     }
714    
715     close(CACHE);
716    
717     return 1;
718     }
719    
720     #
721     # scan_podpath - scans the directories specified in @podpath for directories,
722     # .pod files, and .pm files. it also scans the pod files specified in
723     # @libpods for =item directives.
724     #
725     sub scan_podpath {
726     my($podroot, $recurse, $append) = @_;
727     my($pwd, $dir);
728     my($libpod, $dirname, $pod, @files, @poddata);
729    
730     unless($append) {
731     %items = ();
732     %pages = ();
733     }
734    
735     # scan each directory listed in @podpath
736     $pwd = getcwd();
737     chdir($podroot)
738     || die "$0: error changing to directory $podroot: $!\n";
739     foreach $dir (@podpath) {
740     scan_dir($dir, $recurse);
741     }
742    
743     # scan the pods listed in @libpods for =item directives
744     foreach $libpod (@libpods) {
745     # if the page isn't defined then we won't know where to find it
746     # on the system.
747     next unless defined $pages{$libpod} && $pages{$libpod};
748    
749     # if there is a directory then use the .pod and .pm files within it.
750     if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
751     # find all the .pod and .pm files within the directory
752     $dirname = $1;
753     opendir(DIR, $dirname) ||
754     die "$0: error opening directory $dirname: $!\n";
755     @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
756     closedir(DIR);
757    
758     # scan each .pod and .pm file for =item directives
759     foreach $pod (@files) {
760     open(POD, "<$dirname/$pod") ||
761     die "$0: error opening $dirname/$pod for input: $!\n";
762     @poddata = <POD>;
763     close(POD);
764    
765     scan_items("$dirname/$pod", @poddata);
766     }
767    
768     # use the names of files as =item directives too.
769     foreach $pod (@files) {
770     $pod =~ /^(.*)(\.pod|\.pm)$/;
771     $items{$1} = "$dirname/$1.html" if $1;
772     }
773     } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
774     $pages{$libpod} =~ /([^:]*\.pm):/) {
775     # scan the .pod or .pm file for =item directives
776     $pod = $1;
777     open(POD, "<$pod") ||
778     die "$0: error opening $pod for input: $!\n";
779     @poddata = <POD>;
780     close(POD);
781    
782     scan_items("$pod", @poddata);
783     } else {
784     warn "$0: shouldn't be here (line ".__LINE__."\n";
785     }
786     }
787     @poddata = (); # clean-up a bit
788    
789     chdir($pwd)
790     || die "$0: error changing to directory $pwd: $!\n";
791    
792     # cache the item list for later use
793     warn "caching items for later use\n" if $verbose;
794     open(CACHE, ">$itemcache") ||
795     die "$0: error open $itemcache for writing: $!\n";
796    
797     print CACHE join(":", @podpath) . "\n$podroot\n";
798     foreach my $key (keys %items) {
799     print CACHE "$key $items{$key}\n";
800     }
801    
802     close(CACHE);
803    
804     # cache the directory list for later use
805     warn "caching directories for later use\n" if $verbose;
806     open(CACHE, ">$dircache") ||
807     die "$0: error open $dircache for writing: $!\n";
808    
809     print CACHE join(":", @podpath) . "\n$podroot\n";
810     foreach my $key (keys %pages) {
811     print CACHE "$key $pages{$key}\n";
812     }
813    
814     close(CACHE);
815     }
816    
817     #
818     # scan_dir - scans the directory specified in $dir for subdirectories, .pod
819     # files, and .pm files. notes those that it finds. this information will
820     # be used later in order to figure out where the pages specified in L<>
821     # links are on the filesystem.
822     #
823     sub scan_dir {
824     my($dir, $recurse) = @_;
825     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
826     local $_;
827    
828     @subdirs = ();
829     @pods = ();
830    
831     opendir(DIR, $dir) ||
832     die "$0: error opening directory $dir: $!\n";
833     while (defined($_ = readdir(DIR))) {
834     if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
835     $pages{$_} = "" unless defined $pages{$_};
836     $pages{$_} .= "$dir/$_:";
837     push(@subdirs, $_);
838     } elsif (/\.pod$/) { # .pod
839     s/\.pod$//;
840     $pages{$_} = "" unless defined $pages{$_};
841     $pages{$_} .= "$dir/$_.pod:";
842     push(@pods, "$dir/$_.pod");
843     } elsif (/\.pm$/) { # .pm
844     s/\.pm$//;
845     $pages{$_} = "" unless defined $pages{$_};
846     $pages{$_} .= "$dir/$_.pm:";
847     push(@pods, "$dir/$_.pm");
848     }
849     }
850     closedir(DIR);
851    
852     # recurse on the subdirectories if necessary
853     if ($recurse) {
854     foreach my $subdir (@subdirs) {
855     scan_dir("$dir/$subdir", $recurse);
856     }
857     }
858     }
859    
860     #
861     # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
862     # build an index.
863     #
864     sub scan_headings {
865     my($sections, @data) = @_;
866     my($tag, $which_head, $title, $listdepth, $index);
867    
868     # here we need local $ignore = 0;
869     # unfortunately, we can't have it, because $ignore is lexical
870     $ignore = 0;
871    
872     $listdepth = 0;
873     $index = "";
874    
875     # scan for =head directives, note their name, and build an index
876     # pointing to each of them.
877     foreach my $line (@data) {
878     if ($line =~ /^=(head)([1-6])\s+(.*)/) {
879     ($tag,$which_head, $title) = ($1,$2,$3);
880     chomp($title);
881     $$sections{htmlify(0,$title)} = 1;
882    
883     while ($which_head != $listdepth) {
884     if ($which_head > $listdepth) {
885     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
886     $listdepth++;
887     } elsif ($which_head < $listdepth) {
888     $listdepth--;
889     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
890     }
891     }
892    
893     $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
894     "<A HREF=\"#" . htmlify(0,$title) . "\">" .
895     html_escape(process_text(\$title, 0)) . "</A>";
896     }
897     }
898    
899    
900     # finish off the lists
901     while ($listdepth--) {
902     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
903     }
904    
905     # get rid of bogus lists
906     $index =~ s,\t*<UL>\s*</UL>\n,,g;
907    
908     $ignore = 1; # restore old value;
909    
910     return $index;
911     }
912    
913     #
914     # scan_items - scans the pod specified by $pod for =item directives. we
915     # will use this information later on in resolving C<> links.
916     #
917     sub scan_items {
918     my($pod, @poddata) = @_;
919     my($i, $item);
920     local $_;
921    
922     $pod =~ s/\.pod$//;
923     $pod .= ".html" if $pod;
924    
925     foreach $i (0..$#poddata) {
926     $_ = $poddata[$i];
927    
928     # remove any formatting instructions
929     s,[A-Z]<([^<>]*)>,$1,g;
930    
931     # figure out what kind of item it is and get the first word of
932     # it's name.
933     if (/^=item\s+(\w*)\s*.*$/s) {
934     if ($1 eq "*") { # bullet list
935     /\A=item\s+\*\s*(.*?)\s*\Z/s;
936     $item = $1;
937     } elsif ($1 =~ /^\d+/) { # numbered list
938     /\A=item\s+\d+\.?(.*?)\s*\Z/s;
939     $item = $1;
940     } else {
941     # /\A=item\s+(.*?)\s*\Z/s;
942     /\A=item\s+(\w*)/s;
943     $item = $1;
944     }
945    
946     $items{$item} = "$pod" if $item;
947     }
948     }
949     }
950    
951     #
952     # process_head - convert a pod head[1-6] tag and convert it to HTML format.
953     #
954     sub process_head {
955     my($tag, $heading) = @_;
956     my $firstword;
957    
958     # figure out the level of the =head
959     $tag =~ /head([1-6])/;
960     my $level = $1;
961    
962     # can't have a heading full of spaces and speechmarks and so on
963     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
964    
965     $OUT .= "<P>\n" unless $listlevel;
966     $OUT .= "$HR\n" unless $listlevel || $top;
967     $OUT .= "<H$level>"; # unless $listlevel;
968     #$OUT .= "<H$level>" unless $listlevel;
969     my $convert = $heading; process_text(\$convert, 0);
970     $convert = html_escape($convert);
971     $OUT .= '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
972     $OUT .= "</H$level>"; # unless $listlevel;
973     $OUT .= "\n";
974    
975     # add a valid tag to a list of tags
976     my $key = $curr_base."/".htmlify(0,$heading);
977     print STDERR "Duplicated Anchor: $key\n" if exists $r_valid_anchors->{$key};
978     $r_valid_anchors->{$key} = $convert;
979     }
980    
981     #
982     # process_item - convert a pod item tag and convert it to HTML format.
983     #
984     sub process_item {
985     my $text = $_[0];
986     my($i, $quote, $name);
987    
988     my $need_preamble = 0;
989     my $this_entry;
990    
991    
992     # lots of documents start a list without doing an =over. this is
993     # bad! but, the proper thing to do seems to be to just assume
994     # they did do an =over. so warn them once and then continue.
995     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
996     unless $listlevel;
997     process_over() unless $listlevel;
998    
999     return unless $listlevel;
1000    
1001     # tempreparely convert E<> escape tags
1002     1 while $text =~ s/E<([^<>]*)>/E-_-$1-_-/g;
1003    
1004     # remove formatting instructions from the text
1005     1 while $text =~ s/[A-DF-Z]<([^<>]*)>/$1/g;
1006    
1007     pre_escape(\$text);
1008    
1009     # process the escaped Etag
1010     1 while $text =~ s/E-_-(.*?)-_-/process_E($1)/eg;
1011    
1012     $need_preamble = $items_seen[$listlevel]++ == 0;
1013    
1014     # check if this is the first =item after an =over
1015     $i = $listlevel - 1;
1016     my $need_new = $listlevel >= @listitem;
1017    
1018     if ($text =~ /\A\*/) { # bullet
1019    
1020     if ($need_preamble) {
1021     push(@listend, "</UL>");
1022     $OUT .= "<UL>\n";
1023     }
1024    
1025     $OUT .= '<P><LI>';
1026     if ($text =~ /\A\*\s*(.+)\Z/s) {
1027     $OUT .= '<STRONG>';
1028     if ($items_named{$1}++) {
1029     $OUT .= html_escape($1);
1030     } else {
1031     my $name = 'item_' . htmlify(1,$1);
1032     $OUT .= qq(<A NAME="$name">). html_escape($1). '</A>';
1033     }
1034     $OUT .= '</STRONG>';
1035     }
1036    
1037     } elsif ($text =~ /\A[\d#]+/) { # numbered list
1038    
1039     if ($need_preamble) {
1040     push(@listend, "</OL>");
1041     $OUT .= "<OL>\n";
1042     }
1043    
1044     $OUT .= '<P><LI>';
1045     if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
1046     $OUT .= '<STRONG>';
1047     if ($items_named{$1}++) {
1048     $OUT .= html_escape($1);
1049     } else {
1050     my $name = 'item_' . htmlify(0,$1);
1051     $OUT .= qq(<A NAME="$name">). html_escape($1). '</A>';
1052     }
1053     $OUT .= '</STRONG>';
1054     }
1055    
1056     } else { # all others
1057    
1058     if ($need_preamble) {
1059     push(@listend, '</DL>');
1060     $OUT .= "<DL>\n";
1061     }
1062    
1063     $OUT .= '<P><DT>';
1064     if ($text =~ /(\S+)/) {
1065     $OUT .= '<STRONG>';
1066     if ($items_named{$1}++) {
1067     $OUT .= html_escape($text);
1068     } else {
1069     my $name = 'item_' . htmlify(1,$text);
1070     $OUT .= qq(<A NAME="$name">). html_escape($text). '</A>';
1071     }
1072     $OUT .= '</STRONG>';
1073     }
1074     $OUT .= '<DD>';
1075     }
1076    
1077     $OUT .= "\n";
1078     }
1079    
1080     #
1081     # process_over - process a pod over tag and start a corresponding HTML
1082     # list.
1083     #
1084     sub process_over {
1085     # start a new list
1086     $listlevel++;
1087     }
1088    
1089     #
1090     # process_back - process a pod back tag and convert it to HTML format.
1091     #
1092     sub process_back {
1093     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
1094     unless $listlevel;
1095     return unless $listlevel;
1096    
1097     # close off the list. note, I check to see if $listend[$listlevel] is
1098     # defined because an =item directive may have never appeared and thus
1099     # $listend[$listlevel] may have never been initialized.
1100     $listlevel--;
1101     $OUT .= $listend[$listlevel] if defined $listend[$listlevel];
1102     $OUT .= "\n";
1103    
1104     # don't need the corresponding perl code anymore
1105     pop(@listitem);
1106     pop(@listdata);
1107     pop(@listend);
1108    
1109     pop(@items_seen);
1110     }
1111    
1112     #
1113     # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1114     #
1115     sub process_cut {
1116     $ignore = 1;
1117     }
1118    
1119     #
1120     # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1121     # corresponding cut.
1122     #
1123     sub process_pod {
1124     # no need to set $ignore to 0 cause the main loop did it
1125     }
1126    
1127     #
1128     # process_example - process a =example newpod tag.
1129     # =example 1.1 This is a title
1130     # becomes
1131     # <p><i>Example 1.1: This is a title</i></p>
1132     #
1133     sub process_example {
1134     my($index, $title) = @_;
1135     $OUT .= qq{<p><i>Example $index: $title</i></p>};
1136     }
1137    
1138     #
1139     # process_figure - process a =figure newpod tag.
1140     # =figure 1.1 This is a title
1141     # becomes
1142     # <p><center><img src="fig1.1.gif"></center></p>
1143     # <p><center><b>Figure 1.1: This is a title</b></center></p>
1144     #
1145     sub process_figure {
1146     my($index, $title) = @_;
1147     $OUT .= qq{<p><center><img src="fig$index.gif"></center></p>
1148     <p><center><b>Figure $index: $title</b></center></p>
1149     };
1150     }
1151    
1152     #
1153     # process_code - process a =code newpod tag.
1154     # =code filename This is a comment
1155     # becomes
1156     # <p><a href="code/filename"><code>filename</code></a>This is a comment</p>
1157     #
1158     sub process_code {
1159     my($filename, $comment) = @_;
1160     $OUT .= qq{
1161     <p><a href="code/$filename"><code>$filename</code></a> -- $comment</p>
1162     };
1163     }
1164    
1165     #
1166     # process_for - process a =for pod tag. if it's for html, split
1167     # it out verbatim, if illustration, center it, otherwise ignore it.
1168     #
1169     sub process_for {
1170     my($whom, $text) = @_;
1171     if ( $whom =~ /^(pod2)?html$/i) {
1172     $OUT .= $text;
1173     } elsif ($whom =~ /^illustration$/i) {
1174     1 while chomp $text;
1175     for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1176     $text .= $ext, last if -r "$text$ext";
1177     }
1178     $OUT .= qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1179     }
1180     }
1181    
1182     #
1183     # process_begin - process a =begin pod tag. this pushes
1184     # whom we're beginning on the begin stack. if there's a
1185     # begin stack, we only print if it us.
1186     #
1187     sub process_begin {
1188     my($whom, $text) = @_;
1189     $whom = lc($whom);
1190     push (@begin_stack, $whom);
1191     if ( $whom =~ /^(pod2)?html$/) {
1192     $OUT .= $text if $text;
1193     }
1194     }
1195    
1196     #
1197     # process_end - process a =end pod tag. pop the
1198     # begin stack. die if we're mismatched.
1199     #
1200     sub process_end {
1201     my($whom, $text) = @_;
1202     $whom = lc($whom);
1203     if ($begin_stack[-1] ne $whom ) {
1204     warn "Unmatched begin/end at chunk $paragraph\n"
1205     }
1206     pop @begin_stack;
1207     }
1208    
1209     #
1210     # process_text - handles plaintext that appears in the input pod file.
1211     # there may be pod commands embedded within the text so those must be
1212     # converted to html commands.
1213     #
1214     sub process_text {
1215     my($text, $escapeQuotes) = @_;
1216     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1217     my($podcommand, $params, $tag, $quote);
1218    
1219     return if $ignore;
1220    
1221     $quote = 0; # status of double-quote conversion
1222     $result = "";
1223     $rest = $$text;
1224    
1225     if ($rest =~ /^\s+/) { # preformatted text, no pod directives
1226     $rest =~ s/\n+\Z//;
1227     $rest =~ s#.*#
1228     my $line = $&;
1229     1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1230     $line;
1231     #eg;
1232    
1233     $rest =~ s/&/&amp;/g;
1234     $rest =~ s/</&lt;/g;
1235     $rest =~ s/>/&gt;/g;
1236     $rest =~ s/"/&quot;/g;
1237    
1238     # try and create links for all occurrences of perl.* within
1239     # the preformatted text.
1240     $rest =~ s{
1241     (\s*)(perl\w+)
1242     }{
1243     if (defined $pages{$2}) { # is a link
1244     qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1245     } elsif (defined $pages{dosify($2)}) { # is a link
1246     qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1247     } else {
1248     "$1$2";
1249     }
1250     }xeg;
1251     $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1252    
1253     my $urls = '(' . join ('|', qw{
1254     http
1255     telnet
1256     mailto
1257     news
1258     gopher
1259     file
1260     wais
1261     ftp
1262     } )
1263     . ')';
1264    
1265     my $ltrs = '\w';
1266     my $gunk = '/#~:.?+=&%@!\-';
1267     my $punc = '.:?\-;';
1268     my $any = "${ltrs}${gunk}${punc}";
1269    
1270     $rest =~ s{
1271     \b # start at word boundary
1272     ( # begin $1 {
1273     $urls : # need resource and a colon
1274     (?! :) # don't convert HTTP::Foo and alike
1275     [$any] +? # followed by on or more
1276     # of any valid character, but
1277     # be conservative and take only
1278     # what you need to....
1279     ) # end $1 }
1280     (?= # look-ahead non-consumptive assertion
1281     [$punc]* # either 0 or more puntuation
1282     [^$any] # followed by a non-url char
1283     | # or else
1284     $ # then end of the string
1285     )
1286     }{<A HREF="$1">$1</A>}igox;
1287    
1288     # text should be as it is (verbatim)
1289     $result = "<PRE>$rest</PRE>"
1290    
1291     } else { # formatted text
1292     # parse through the string, stopping each time we find a
1293     # pod-escape. once the string has been throughly processed
1294     # we can output it.
1295     while (length $rest) {
1296     # check to see if there are any possible pod directives in
1297     # the remaining part of the text.
1298     if ($rest =~ m/[BCEIFLSZ]</) {
1299     warn "\$rest\t= $rest\n" unless
1300     $rest =~ /\A
1301     ([^<]*?)
1302     ([BCEIFLSZ]?)
1303     <
1304     (.*)\Z/xs;
1305    
1306     $s1 = $1; # pure text
1307     $s2 = $2; # the type of pod-escape that follows
1308     $s3 = '<'; # '<'
1309     $s4 = $3; # the rest of the string
1310     } else {
1311     $s1 = $rest;
1312     $s2 = "";
1313     $s3 = "";
1314     $s4 = "";
1315     }
1316    
1317     if ($s3 eq '<' && $s2) { # a pod-escape
1318     $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1319     $podcommand = "$s2<";
1320     $rest = $s4;
1321    
1322     # find the matching '>'
1323     $match = 1;
1324     $bf = 0;
1325     while ($match && !$bf) {
1326     $bf = 1;
1327     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1328     $bf = 0;
1329     $match++;
1330     $podcommand .= $1;
1331     $rest = $2;
1332     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1333     $bf = 0;
1334     $match--;
1335     $podcommand .= $1;
1336     $rest = $2;
1337     }
1338     }
1339    
1340     if ($match != 0) {
1341     warn <<WARN;
1342     $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1343     WARN
1344     $result .= substr $podcommand, 0, 2;
1345     $rest = substr($podcommand, 2) . $rest;
1346     next;
1347     }
1348    
1349     # pull out the parameters to the pod-escape
1350     $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1351     $tag = $1;
1352     $params = $2;
1353    
1354     # process the text within the pod-escape so that any escapes
1355     # which must occur do.
1356     process_text(\$params, 0) unless $tag eq 'L';
1357    
1358     $s1 = $params;
1359     if (!$tag || $tag eq " ") { # <> : no tag
1360     $s1 = "&lt;$params&gt;";
1361     } elsif ($tag eq "L") { # L<> : link
1362     $s1 = process_L($params);
1363     } elsif ($tag eq "I" || # I<> : italicize text
1364     $tag eq "B" || # B<> : bold text
1365     $tag eq "F") { # F<> : file specification
1366     $s1 = process_BFI($tag, $params);
1367     } elsif ($tag eq "C") { # C<> : literal code
1368     $s1 = process_C($params, 1);
1369     } elsif ($tag eq "E") { # E<> : escape
1370     $s1 = process_E($params);
1371     } elsif ($tag eq "Z") { # Z<> : zero-width character
1372     $s1 = process_Z($params);
1373     } elsif ($tag eq "S") { # S<> : non-breaking space
1374     $s1 = process_S($params);
1375     } elsif ($tag eq "X") { # S<> : non-breaking space
1376     $s1 = process_X($params);
1377     } else {
1378     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1379     }
1380    
1381     $result .= "$s1";
1382     } else {
1383     # for pure text we must deal with implicit links and
1384     # double-quotes among other things.
1385     $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1386     $rest = $s4;
1387     }
1388     }
1389     }
1390     $$text = $result;
1391     }
1392    
1393     sub html_escape {
1394     my $rest = $_[0];
1395    
1396     # don't break already escaped '<','>', by escaping the & from
1397     # &gt; or '&lt';
1398     return $rest if $rest =~ /&(gt|lt|amp|quot);/;
1399    
1400     $rest =~ s/&/&amp;/g;
1401     $rest =~ s/</&lt;/g;
1402     $rest =~ s/>/&gt;/g;
1403     $rest =~ s/"/&quot;/g;
1404     return $rest;
1405     }
1406    
1407     #
1408     # process_puretext - process pure text (without pod-escapes) converting
1409     # double-quotes and handling implicit C<> links.
1410     #
1411     sub process_puretext {
1412     my($text, $quote) = @_;
1413     my(@words, $result, $rest, $lead, $trail);
1414    
1415     # convert double-quotes to single-quotes
1416     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1417     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1418    
1419     $$quote = ($text =~ m/"/ ? 1 : 0);
1420     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1421    
1422     # keep track of leading and trailing white-space
1423     $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1424     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1425    
1426     # collapse all white space into a single space
1427     $text =~ s/\s+/ /g;
1428     @words = split(" ", $text);
1429    
1430     # process each word individually
1431     foreach my $word (@words) {
1432     # see if we can infer a link
1433     if ($word =~ /^\w+\(/) {
1434     # has parenthesis so should have been a C<> ref
1435     $word = process_C($word);
1436     # $word =~ /^[^()]*]\(/;
1437     # if (defined $items{$1} && $items{$1}) {
1438     # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1439     # . htmlify(0,$word)
1440     # . "\">$word</A></CODE>";
1441     # } elsif (defined $items{$word} && $items{$word}) {
1442     # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1443     # . htmlify(0,$word)
1444     # . "\">$word</A></CODE>";
1445     # } else {
1446     # $word = "\n<CODE><A HREF=\"#item_"
1447     # . htmlify(0,$word)
1448     # . "\">$word</A></CODE>";
1449     # }
1450     } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1451     # perl variables, should be a C<> ref
1452     $word = process_C($word, 1);
1453     } elsif ($word =~ m,^\w+://\w,) {
1454     # looks like a URL
1455     # $word = qq(<A HREF="$word">$word</A>);
1456    
1457     my $urls = '(' . join ('|', qw{
1458     http
1459     telnet
1460     mailto
1461     news
1462     gopher
1463     file
1464     wais
1465     ftp
1466     } )
1467     . ')';
1468    
1469     my $ltrs = '\w';
1470     my $gunk = '/#~:.?+=&%@!\-';
1471     my $punc = '.:?\-;';
1472     my $any = "${ltrs}${gunk}${punc}";
1473    
1474     $word =~ s{
1475     \b # start at word boundary
1476     ( # begin $1 {
1477     $urls : # need resource and a colon
1478     (?! :) # don't convert HTTP::Foo and alike
1479     [$any] +? # followed by one or more
1480     # of any valid character, but
1481     # be conservative and take only
1482     # what you need to....
1483     ) # end $1 }
1484     (?= # look-ahead non-consumptive assertion
1485     [$punc]* # either 0 or more puntuation
1486     [^$any] # followed by a non-url char
1487     | # or else
1488     $ # then end of the string
1489     )
1490     }{<A HREF="$1">$1</A>}igox;
1491    
1492    
1493     } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1494     # looks like an e-mail address
1495     my ($w1, $w2, $w3) = ("", $word, "");
1496     ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1497     ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1498     $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1499     } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1500     $word = html_escape($word) if $word =~ /["&<>]/;
1501     $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1502     } else {
1503     $word = html_escape($word) if $word =~ /["&<>]/;
1504     }
1505     }
1506    
1507     # build a new string based upon our conversion
1508     $result = "";
1509     $rest = join(" ", @words);
1510     while (length($rest) > 75) {
1511     if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1512     $rest =~ m/^(\S*)\s(.*?)$/o) {
1513    
1514     $result .= "$1\n";
1515     $rest = $2;
1516     } else {
1517     $result .= "$rest\n";
1518     $rest = "";
1519     }
1520     }
1521     $result .= $rest if $rest;
1522    
1523     # restore the leading and trailing white-space
1524     $result = "$lead$result$trail";
1525    
1526     return $result;
1527     }
1528    
1529     #
1530     # pre_escape - convert & in text to $amp;
1531     #
1532     sub pre_escape {
1533     my($str) = @_;
1534    
1535     $$str =~ s,&,&amp;,g;
1536     }
1537    
1538     #
1539     # dosify - convert filenames to 8.3
1540     #
1541     sub dosify {
1542     my($str) = @_;
1543     if ($Is83) {
1544     $str = lc $str;
1545     $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1546     $str =~ s/(\w+)/substr ($1,0,8)/ge;
1547     }
1548     return $str;
1549     }
1550    
1551     #
1552     # process_L - convert a pod L<> directive to a corresponding HTML link.
1553     # most of the links made are inferred rather than known about directly
1554     # (i.e it's not known whether the =head\d section exists in the target file,
1555     # or whether a .pod file exists in the case of split files). however, the
1556     # guessing usually works.
1557     #
1558     # Unlike the other directives, this should be called with an unprocessed
1559     # string, else tags in the link won't be matched.
1560     #
1561     sub process_L {
1562     my($str) = @_;
1563     my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
1564    
1565     $str =~ s/\n/ /g; # undo word-wrapped tags
1566     $s1 = $str;
1567     for ($s1) {
1568     # LREF: a la HREF L<show this text|man/section>
1569     $linktext = $1 if s:^([^|]+)\|::;
1570    
1571     # a :: acts like a /
1572     s,::,/,;
1573    
1574     # make sure sections start with a /
1575     s,^",/",g;
1576     s,^,/,g if (!m,/, && / /);
1577    
1578     # check if there's a section specified
1579     if (m,^(.*?)/"?(.*?)"?$,) { # yes
1580     ($page, $section) = ($1, $2);
1581     } else { # no
1582     ($page, $section) = ($_, "");
1583     }
1584    
1585     # check if we know that this is a section in this page
1586     if (!defined $pages{$page} && defined $sections{$page}) {
1587     $section = $page;
1588     $page = "";
1589     }
1590     }
1591    
1592     $page83=dosify($page);
1593     $page=$page83 if (defined $pages{$page83});
1594     if ($page eq "") {
1595     $link = "#" . htmlify(0,$section);
1596     $linktext = $section unless defined($linktext);
1597     } elsif (!defined $pages{$page}) {
1598     warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1599     $link = "";
1600     $linktext = $page unless defined($linktext);
1601     } else {
1602     $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1603     $section = htmlify(0,$section) if $section ne "";
1604    
1605     # if there is a directory by the name of the page, then assume that an
1606     # appropriate section will exist in the subdirectory
1607     if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1608     $link = "$htmlroot/$1/$section.html";
1609    
1610     # since there is no directory by the name of the page, the section will
1611     # have to exist within a .html of the same name. thus, make sure there
1612     # is a .pod or .pm that might become that .html
1613     } else {
1614     $section = "#$section" if $section;
1615     # check if there is a .pod with the page name
1616     if ($pages{$page} =~ /([^:]*)\.pod:/) {
1617     $link = "$htmlroot/$1.html$section";
1618     } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1619     $link = "$htmlroot/$1.html$section";
1620     } else {
1621     warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1622     "no .pod or .pm found\n";
1623     $link = "";
1624     $linktext = $section unless defined($linktext);
1625     }
1626     }
1627     }
1628    
1629     process_text(\$linktext, 0);
1630     if ($link) {
1631     $s1 = "<A HREF=\"$link\">$linktext</A>";
1632     $link =~ s|(\./)+||;
1633     $link =~ s|/+||;
1634     $link =~ s|(\.html)?#|/|;
1635     $link =~ s|^/|$curr_base/|; # normalize internal links
1636     push @{$r_links_to_check->{$curr_base}}, $link ;
1637     } else {
1638     $s1 = "<EM>$linktext</EM>";
1639     }
1640     return $s1;
1641     }
1642    
1643     #
1644     # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1645     # convert them to corresponding HTML directives.
1646     #
1647     sub process_BFI {
1648     my($tag, $str) = @_;
1649     my($s1); # work string
1650     my(%repltext) = ( 'B' => 'STRONG',
1651     'F' => 'EM',
1652     'I' => 'EM');
1653    
1654     # extract the modified text and convert to HTML
1655     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1656     return $s1;
1657     }
1658    
1659     #
1660     # process_C - process the C<> pod-escape.
1661     #
1662     sub process_C {
1663     my($str, $doref) = @_;
1664     my($s1, $s2);
1665    
1666     $s1 = $str;
1667     $s1 =~ s/\([^()]*\)//g; # delete parentheses
1668     $s2 = $s1;
1669     $s1 =~ s/\W//g; # delete bogus characters
1670     $str = html_escape($str);
1671    
1672     # if there was a pod file that we found earlier with an appropriate
1673     # =item directive, then create a link to that page.
1674     if ($doref && defined $items{$s1}) {
1675     $s1 = ($items{$s1} ?
1676     "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1677     "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1678     $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1679     confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1680     } else {
1681     $s1 = "<CODE>$str</CODE>";
1682     # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1683     }
1684    
1685    
1686     return $s1;
1687     }
1688    
1689     #
1690     # process_E - process the E<> pod directive which seems to escape a character.
1691     #
1692     sub process_E {
1693     my($str) = @_;
1694    
1695     for ($str) {
1696     s,([^/].*),\&$1\;,g;
1697     }
1698    
1699     return $str;
1700     }
1701    
1702     #
1703     # process_Z - process the Z<> pod directive which really just amounts to
1704     # ignoring it. this allows someone to start a paragraph with an =
1705     #
1706     sub process_Z {
1707     my($str) = @_;
1708    
1709     # there is no equivalent in HTML for this so just ignore it.
1710     $str = "";
1711     return $str;
1712     }
1713    
1714     #
1715     # process_S - process the S<> pod directive which means to convert all
1716     # spaces in the string to non-breaking spaces (in HTML-eze).
1717     #
1718     sub process_S {
1719     my($str) = @_;
1720    
1721     # convert all spaces in the text to non-breaking spaces in HTML.
1722     $str =~ s/ /&nbsp;/g;
1723     return $str;
1724     }
1725    
1726     #
1727     # process_X - this is supposed to make an index entry. we'll just
1728     # ignore it.
1729     #
1730     sub process_X {
1731     return '';
1732     }
1733    
1734    
1735     #
1736     # finish_list - finish off any pending HTML lists. this should be called
1737     # after the entire pod file has been read and converted.
1738     #
1739     sub finish_list {
1740     while ($listlevel > 0) {
1741     $OUT .= "</DL>\n";
1742     $listlevel--;
1743     }
1744     }
1745    
1746     #
1747     # htmlify - converts a pod section specification to a suitable section
1748     # specification for HTML. if first arg is 1, only takes 1st word.
1749     #
1750     sub htmlify {
1751     my($compact, $heading) = @_;
1752    
1753     if ($compact) {
1754     $heading =~ /^(\w+)/;
1755     $heading = $1;
1756     }
1757    
1758     # $heading = lc($heading);
1759     $heading =~ s/[^\w\s]/_/g;
1760     $heading =~ s/(\s+)/ /g;
1761     $heading =~ s/^\s*(.*?)\s*$/$1/s;
1762     $heading =~ s/ /_/g;
1763     $heading =~ s/\A(.{32}).*\Z/$1/s;
1764     $heading =~ s/\s+\Z//;
1765     $heading =~ s/_{2,}/_/g;
1766    
1767     return $heading;
1768     }
1769    
1770     BEGIN {
1771     }
1772    
1773     1;

  ViewVC Help
Powered by ViewVC 1.1.22