Pod/HtmlPsPdf/Html.pm

package Pod::HtmlPsPdf::Html;

use Pod::Functions;
use Getopt::Long;       # package for handling command-line parameters
require Exporter;
use vars qw($VERSION);
$VERSION = 1.01;
@ISA = Exporter;
@EXPORT = qw(pod2html htmlify);
use Cwd;

use Carp;

use strict;

use Config;

=head1 NAME

Pod::Html - module to convert pod files to HTML

=head1 SYNOPSIS

    use Pod::Html;
    pod2html([options]);

=head1 DESCRIPTION

Converts files from pod format (see L<perlpod>) to HTML format.  It
can automatically generate indexes and cross-references, and it keeps
a cache of things it knows how to cross-reference.

=head1 ARGUMENTS

Pod::Html takes the following arguments:

=over 4

=item help

    --help

Displays the usage message.

=item htmlroot

    --htmlroot=name

Sets the base URL for the HTML files.  When cross-references are made,
the HTML root is prepended to the URL.

=item infile

    --infile=name

Specify the pod file to convert.  Input is taken from STDIN if no
infile is specified.

=item outfile

    --outfile=name

Specify the HTML file to create.  Output goes to STDOUT if no outfile
is specified.

=item podroot

    --podroot=name

Specify the base directory for finding library pods.

=item podpath

    --podpath=name:...:name

Specify which subdirectories of the podroot contain pod files whose
HTML converted forms can be linked-to in cross-references.

=item libpods

    --libpods=name:...:name

List of page names (eg, "perlfunc") which contain linkable C<=item>s.

=item netscape

    --netscape

Use Netscape HTML directives when applicable.

=item nonetscape

    --nonetscape

Do not use Netscape HTML directives (default).

=item index

    --index

Generate an index at the top of the HTML file (default behaviour).

=item noindex

    --noindex

Do not generate an index at the top of the HTML file.


=item recurse

    --recurse

Recurse into subdirectories specified in podpath (default behaviour).

=item norecurse

    --norecurse

Do not recurse into subdirectories specified in podpath.

=item title

    --title=title

Specify the title of the resulting HTML file.

=item verbose

    --verbose

Display progress messages.

=back

=head1 EXAMPLE

    pod2html("pod2html",
             "--podpath=lib:ext:pod:vms", 
             "--podroot=/usr/src/perl",
             "--htmlroot=/perl/nmanual",
             "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
             "--recurse",
             "--infile=foo.pod",
             "--outfile=/perl/nmanual/foo.html");

=head1 AUTHOR

Originally written by Tom Christiansen, E<lt>tchrist@perl.comE<gt>.

Modified by Stas Bekman E<lt>stas@stason.orgE<gt>.

=head1 BUGS

Has trouble with C<> etc in = commands.

=head1 SEE ALSO

L<perlpod>

=head1 COPYRIGHT

This program is distributed under the Artistic License.

=cut

use vars qw($OUT);

#my $HR = qq{<P><B><FONT SIZE=-1><A HREF="#toc">[TOC]</A></FONT></B></P><HR>};
my $HR = qq{<HR>};


my $dircache = "pod2html-dircache";
my $itemcache = "pod2html-itemcache";

my @begin_stack = ();           # begin/end stack

my @libpods = ();               # files to search for links from C<> directives
my $htmlroot = "/";             # http-server base directory from which all
                                #   relative paths in $podpath stem.
my $htmlfile = "";              # write to stdout by default
my $podfile = "";               # read from stdin by default
my @podpath = ();               # list of directories containing library pods.
my $podroot = ".";              # filesystem base directory from which all
                                #   relative paths in $podpath stem.
my $recurse = 1;                # recurse on subdirectories in $podpath.
my $verbose = 0;                # not verbose by default
my $doindex = 1;                # non-zero if we should generate an index
my $listlevel = 0;              # current list depth
my @listitem = ();              # stack of HTML commands to use when a =item is
                                #   encountered.  the top of the stack is the
                                #   current list.
my @listdata = ();              # similar to @listitem, but for the text after
                                #   an =item
my @listend = ();               # similar to @listitem, but the text to use to
                                #   end the list.
my $ignore = 1;                 # whether or not to format text.  we don't
                                #   format text until we hit our first pod
                                #   directive.

my %items_named = ();           # for the multiples of the same item in perlfunc
my @items_seen = ();
my $netscape = 0;               # whether or not to use netscape directives.
my $title;                      # title to give the pod(s)
my $top = 1;                    # true if we are at the top of the doc.  used
                                #   to prevent the first <HR> directive.
my $paragraph;                  # which paragraph we're processing (used
                                #   for error messages)
my %pages = ();                 # associative array used to find the location
                                #   of pages referenced by L<> links.
my %sections = ();              # sections within this page
my %items = ();                 # associative array used to find the location
                                #   of =item directives referenced by C<> links
my $Is83;                       # is dos with short filenames (8.3)


sub init_globals {

$OUT = '';

$dircache = "pod2html-dircache";
$itemcache = "pod2html-itemcache";

@begin_stack = ();              # begin/end stack

@libpods = ();          # files to search for links from C<> directives
$htmlroot = "/";                # http-server base directory from which all
                                #   relative paths in $podpath stem.
$htmlfile = "";         # write to stdout by default
$podfile = "";          # read from stdin by default
@podpath = ();          # list of directories containing library pods.
$podroot = ".";         # filesystem base directory from which all
                                #   relative paths in $podpath stem.
$recurse = 1;           # recurse on subdirectories in $podpath.
$verbose = 0;           # not verbose by default
$doindex = 1;                   # non-zero if we should generate an index
$listlevel = 0;         # current list depth
@listitem = ();         # stack of HTML commands to use when a =item is
                                #   encountered.  the top of the stack is the
                                #   current list.
@listdata = ();         # similar to @listitem, but for the text after
                                #   an =item
@listend = ();          # similar to @listitem, but the text to use to
                                #   end the list.
$ignore = 1;                    # whether or not to format text.  we don't
                                #   format text until we hit our first pod
                                #   directive.

@items_seen = ();
%items_named = ();
$netscape = 0;          # whether or not to use netscape directives.
$title = '';                    # title to give the pod(s)
$top = 1;                       # true if we are at the top of the doc.  used
                                #   to prevent the first <HR> directive.
$paragraph = '';                        # which paragraph we're processing (used
                                #   for error messages)
%sections = ();         # sections within this page

# These are not reinitialised here but are kept as a cache.
# See get_cache and related cache management code.
#%pages = ();                   # associative array used to find the location
                                #   of pages referenced by L<> links.
#%items = ();                   # associative array used to find the location
                                #   of =item directives referenced by C<> links
$Is83=$^O eq 'dos';
}

use vars qw($r_valid_anchors $r_links_to_check $curr_base);

# $html = pod2html(@pod_code)
#
sub pod2html {

    init_globals();

    
    @podpath  = @{+shift};
    $podroot  = shift;
    $htmlroot = shift;
    $verbose  = shift;
    my $r_html_data = shift;
    my $rh_main_toc = shift;  # a list of TableOfContents
    my $curr_page   = shift || '';
    $podfile = $curr_page || '';
    my $curr_page_index = shift || '';
    $r_valid_anchors  = shift || {};
    $r_links_to_check = shift || {};

    $curr_base = $curr_page;
    $curr_base =~ s/\.html//;

    my @poddata = @$r_html_data;

    local($/);
    local $_;


#     # Strip escapes "\'" and '\"' from words like "it\'s"
#    map {s/\\(['"])/$1/go} @poddata;

    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());

    # cache of %pages and %items from last time we ran 
    
    #undef $opt_help if defined $opt_help;

    # parse the command-line parameters
    #parse_command_line();


    #$htmlroot = "" if $htmlroot eq "/";        # so we don't get a //

    # read the pod a paragraph at a time
    warn "Scanning for sections in input file(s)\n" if $verbose;

      # must be performed before the index creation, so the first
      # header will not enter into the index but serve only as a header

      # put a title in the HTML file
    $title = '';
    my $title_sec = '';

    # find the beginning of the pod
    for (my $i = 0; $i < @poddata; $i++) {
        $title_sec = shift @poddata;
        last if $title_sec =~ s/^=head1\s*(NAME)?//;
    }

    # grab the first section
    while (1) {
        last unless $poddata[0];
        last if $poddata[0] =~ /^=head/;  # stop on the next section beginning
        $title_sec .= shift @poddata;     # otherwise grab the title data
    }
    # remove any excessive spaces, new lines
    $title_sec =~ s/\n/ /gs;
    $title_sec =~ s/\s+/ /g;

    # save away a clean header
    $title = $title_sec;

#    TITLE_SEARCH: {
#       for (my $i = 0; $i < @poddata; $i++) { 
#           if ($poddata[$i] =~ /^=head1\s*(.*)?/) {
#             # remove the title so it wouldn't show up among the
#             # section names!
#             shift @poddata;
#              my $text = $1 
#              next if $
#             $title = $1, last TITLE_SEARCH if $1;
#           }
#       }
#    }

    # scan the pod for =head[1-6] directives and build an index
    my $index = scan_headings(\%sections, @poddata);


    unless($index) {
        warn "No pod in $podfile\n" if $verbose;
        return;
    }

#    TITLE_SEARCH: {
#       for (my $i = 0; $i < @poddata; $i++) { 
#           if ($poddata[$i] =~ /^title:\s*(.*)/) {
#             $title = $1, last TITLE_SEARCH;
#           } 
#       } 
#    } 

    if (!$title and $podfile =~ /\.pod$/) {
        # probably a split pod so take first =head[12] as title
        for (my $i = 0; $i < @poddata; $i++) { 
            last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
        } 
        warn "adopted '$title' as title for $podfile\n"
            if $verbose and $title;
    } 
    if ($title) {
        $title =~ s/\s*\(.*\)//;
    } else {
        warn "$0: no title for $podfile";
        $podfile =~ /^(.*)(\.[^.\/]+)?$/;
        $title = ($podfile eq "-" ? 'No Title' : $1);
        warn "using $title" if $verbose;
    }

      # full pages index including the page's name and title
    my $full_index = '';
    $full_index = qq{<LI><A HREF="$curr_page"><B><FONT SIZE=+1>$title</FONT></B></A></LI>};
    $full_index .= "<P>";
    $full_index .= $index;
    $full_index =~ s/\#/$curr_page\#/gs;
    $full_index .= "<P>";

          # add a valid tag to a list of tags - since we might have
          # link like warnings# which points to the page itself
    $r_valid_anchors->{$curr_base."/"} = $title;

      # start the HTML
    add_header($title);

    $OUT .= $HR;

    # load/reload/validate/cache %pages and %items
    get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);

    # scan the pod for =item directives
    scan_items("", \%items, @poddata);

    # put an index at the top of the file.  note, if $doindex is 0 we
    # still generate an index, but surround it with an html comment.
    # that way some other program can extract it if desired.
    $index =~ s/--+/-/g;
    $OUT .= "<!-- INDEX BEGIN -->\n";
    $OUT .= qq{<A NAME="toc"></A>\n};
    $OUT .= qq{<P><B>Table of Contents:</B></P>\n};
    $OUT .= "<!--\n" unless $doindex;
    $OUT .= $index;
    $OUT .= "-->\n" unless $doindex;
    $OUT .= "<!-- INDEX END -->\n\n";
#    $OUT .= "$HR\n" if $doindex;

    # now convert this file
    warn "Converting input file\n" if $verbose;
    foreach my $i (0..$#poddata) {
        $_ = $poddata[$i];
        $paragraph = $i+1;
        if (/^(=.*)/s) {        # is it a pod directive?
            $ignore = 0;
            $_ = $1;
            if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
                process_begin($1, $2);
            } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
                process_end($1, $2);
            } elsif (/^=cut/) {                 # =cut
                process_cut();
            } elsif (/^=pod/) {                 # =pod
                process_pod();
            } else {
                next if @begin_stack && $begin_stack[-1] ne 'html';

                if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
                    process_head($1, $2);
                } elsif (/^=item\s*(.*\S)/sm) { # =item text
                    process_item($1);
                } elsif (/^=over\s*(.*)/) {             # =over N
                    process_over();
                } elsif (/^=back/) {            # =back
                    process_back();
                } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
                    process_for($1,$2);
                } elsif (/^=example\s+(\S+)\s+(.*)/si) {# =example
                    process_example($1,$2);
                } elsif (/^=figure\s+(\S+)\s+(.*)/si) { # =figure
                    process_figure($1,$2);
                } elsif (/^=code\s+(\S+)\s+(.*)/si) {   # =code
                    process_code($1,$2);
                } else {
                    /^=(\S*)\s*/;
                    warn "$0: $podfile: unknown pod directive '$1' in "
                       . "paragraph $paragraph.  ignoring.\n";
                }
            }
            $top = 0;
        }
        else {
            next if $ignore;
            next if @begin_stack && $begin_stack[-1] ne 'html';
            my $text = $_;
            process_text(\$text, 1);
            $OUT .= "<P>\n$text";
        }
    }

    # finish off any pending directives
    finish_list();

      # add the last <HR>
    $OUT .= $HR;

      # add the tail
    add_tail();

    $rh_main_toc->{$curr_page} = $full_index;

    @$r_html_data = $OUT;
#    return $OUT;

    warn "Finished\n" if $verbose;
}


##################
sub add_header{
  my $title = shift || '';

  $OUT .= qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
   <TITLE>$title</TITLE>
<BODY>
<H1 ALIGN=CENTER>$title</H1>
            }


} # end of add_header


sub add_tail{

  $OUT .= qq{

</BODY>
</HTML>
};

} # end of add_tail

##############################################################################

my $usage;                      # see below
sub usage {
    my $podfile = shift;
    warn "$0: $podfile: @_\n" if @_;
    die $usage;
}

$usage =<<END_OF_USAGE;
Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
           --podpath=<name>:...:<name> --podroot=<name>
           --libpods=<name>:...:<name> --recurse --verbose --index
           --netscape --norecurse --noindex

  --flush      - flushes the item and directory caches.
  --help       - prints this message.
  --htmlroot   - http-server base directory from which all relative paths
                 in podpath stem (default is /).
  --index      - generate an index at the top of the resulting html
                 (default).
  --infile     - filename for the pod to convert (input taken from stdin
                 by default).
  --libpods    - colon-separated list of pages to search for =item pod
                 directives in as targets of C<> and implicit links (empty
                 by default).  note, these are not filenames, but rather
                 page names like those that appear in L<> links.
  --netscape   - will use netscape html directives when applicable.
  --nonetscape - will not use netscape directives (default).
  --outfile    - filename for the resulting html file (output sent to
                 stdout by default).
  --podpath    - colon-separated list of directories containing library
                 pods.  empty by default.
  --podroot    - filesystem base directory from which all relative paths
                 in podpath stem (default is .).
  --noindex    - don't generate an index at the top of the resulting html.
  --norecurse  - don't recurse on those subdirectories listed in podpath.
  --recurse    - recurse on those subdirectories listed in podpath
                 (default behavior).
  --title      - title that will appear in resulting html file.
  --verbose    - self-explanatory

END_OF_USAGE

sub parse_command_line {
    my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
    my $result = GetOptions(
                            'flush'      => \$opt_flush,
                            'help'       => \$opt_help,
                            'htmlroot=s' => \$opt_htmlroot,
                            'index!'     => \$opt_index,
                            'infile=s'   => \$opt_infile,
                            'libpods=s'  => \$opt_libpods,
                            'netscape!'  => \$opt_netscape,
                            'outfile=s'  => \$opt_outfile,
                            'podpath=s'  => \$opt_podpath,
                            'podroot=s'  => \$opt_podroot,
                            'norecurse'  => \$opt_norecurse,
                            'recurse!'   => \$opt_recurse,
                            'title=s'    => \$opt_title,
                            'verbose'    => \$opt_verbose,
                           );
    usage("-", "invalid parameters") if not $result;

    usage("-") if defined $opt_help;    # see if the user asked for help
    $opt_help = "";                     # just to make -w shut-up.

    $podfile  = $opt_infile if defined $opt_infile;
    $htmlfile = $opt_outfile if defined $opt_outfile;

    @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
    @libpods  = split(":", $opt_libpods) if defined $opt_libpods;

    warn "Flushing item and directory caches\n"
        if $opt_verbose && defined $opt_flush;
    unlink($dircache, $itemcache) if defined $opt_flush;

    $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
    $podroot  = $opt_podroot if defined $opt_podroot;

    $doindex  = $opt_index if defined $opt_index;
    $recurse  = $opt_recurse if defined $opt_recurse;
    $title    = $opt_title if defined $opt_title;
    $verbose  = defined $opt_verbose ? 1 : 0;
    $netscape = $opt_netscape if defined $opt_netscape;
}


my $saved_cache_key;

sub get_cache {
    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
    my @cache_key_args = @_;

    # A first-level cache:
    # Don't bother reading the cache files if they still apply
    # and haven't changed since we last read them.

    my $this_cache_key = cache_key(@cache_key_args);

    return if $saved_cache_key and $this_cache_key eq $saved_cache_key;

    # load the cache of %pages and %items if possible.  $tests will be
    # non-zero if successful.
    my $tests = 0;
    if (-f $dircache && -f $itemcache) {
        warn "scanning for item cache\n" if $verbose;
        $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
    }

    # if we didn't succeed in loading the cache then we must (re)build
    #  %pages and %items.
    if (!$tests) {
        warn "scanning directories in pod-path\n" if $verbose;
        scan_podpath($podroot, $recurse, 0);
    }
    $saved_cache_key = cache_key(@cache_key_args);
}

sub cache_key {
    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
    return join('!', $dircache, $itemcache, $recurse,
                @$podpath, $podroot, stat($dircache), stat($itemcache));
}

#
# load_cache - tries to find if the caches stored in $dircache and $itemcache
#  are valid caches of %pages and %items.  if they are valid then it loads
#  them and returns a non-zero value.
#

sub load_cache {
    my($dircache, $itemcache, $podpath, $podroot) = @_;
    my($tests);
    local $_;

    $tests = 0;

    open(CACHE, "<$itemcache") ||
        die "$0: error opening $itemcache for reading: $!\n";
    $/ = "\n";

    # is it the same podpath?
    $_ = <CACHE>;
    chomp($_);
    $tests++ if (join(":", @$podpath) eq $_);

    # is it the same podroot?
    $_ = <CACHE>;
    chomp($_);
    $tests++ if ($podroot eq $_);

    # load the cache if its good
    if ($tests != 2) {
        close(CACHE);
        return 0;
    }

    warn "loading item cache\n" if $verbose;
    while (<CACHE>) {
        /(.*?) (.*)$/;
        $items{$1} = $2;
    }
    close(CACHE);

    warn "scanning for directory cache\n" if $verbose;
    open(CACHE, "<$dircache") ||
        die "$0: error opening $dircache for reading: $!\n";
    $/ = "\n";
    $tests = 0;

    # is it the same podpath?
    $_ = <CACHE>;
    chomp($_);
    $tests++ if (join(":", @$podpath) eq $_);

    # is it the same podroot?
    $_ = <CACHE>;
    chomp($_);
    $tests++ if ($podroot eq $_);

    # load the cache if its good
    if ($tests != 2) {
        close(CACHE);
        return 0;
    }

    warn "loading directory cache\n" if $verbose;
    while (<CACHE>) {
        /(.*?) (.*)$/;
        $pages{$1} = $2;
    }

    close(CACHE);

    return 1;
}

#
# scan_podpath - scans the directories specified in @podpath for directories,
#  .pod files, and .pm files.  it also scans the pod files specified in
#  @libpods for =item directives.
#
sub scan_podpath {
    my($podroot, $recurse, $append) = @_;
    my($pwd, $dir);
    my($libpod, $dirname, $pod, @files, @poddata);

    unless($append) {
        %items = ();
        %pages = ();
    }

    # scan each directory listed in @podpath
    $pwd = getcwd();
    chdir($podroot)
        || die "$0: error changing to directory $podroot: $!\n";
    foreach $dir (@podpath) {
        scan_dir($dir, $recurse);
    }

    # scan the pods listed in @libpods for =item directives
    foreach $libpod (@libpods) {
        # if the page isn't defined then we won't know where to find it
        # on the system.
        next unless defined $pages{$libpod} && $pages{$libpod};

        # if there is a directory then use the .pod and .pm files within it.
        if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
            #  find all the .pod and .pm files within the directory
            $dirname = $1;
            opendir(DIR, $dirname) ||
                die "$0: error opening directory $dirname: $!\n";
            @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
            closedir(DIR);

            # scan each .pod and .pm file for =item directives
            foreach $pod (@files) {
                open(POD, "<$dirname/$pod") ||
                    die "$0: error opening $dirname/$pod for input: $!\n";
                @poddata = <POD>;
                close(POD);

                scan_items("$dirname/$pod", @poddata);
            }

            # use the names of files as =item directives too.
            foreach $pod (@files) {
                $pod =~ /^(.*)(\.pod|\.pm)$/;
                $items{$1} = "$dirname/$1.html" if $1;
            }
        } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
                 $pages{$libpod} =~ /([^:]*\.pm):/) {
            # scan the .pod or .pm file for =item directives
            $pod = $1;
            open(POD, "<$pod") ||
                die "$0: error opening $pod for input: $!\n";
            @poddata = <POD>;
            close(POD);

            scan_items("$pod", @poddata);
        } else {
            warn "$0: shouldn't be here (line ".__LINE__."\n";
        }
    }
    @poddata = ();      # clean-up a bit

    chdir($pwd)
        || die "$0: error changing to directory $pwd: $!\n";

    # cache the item list for later use
    warn "caching items for later use\n" if $verbose;
    open(CACHE, ">$itemcache") ||
        die "$0: error open $itemcache for writing: $!\n";

    print CACHE join(":", @podpath) . "\n$podroot\n";
    foreach my $key (keys %items) {
        print CACHE "$key $items{$key}\n";
    }

    close(CACHE);

    # cache the directory list for later use
    warn "caching directories for later use\n" if $verbose;
    open(CACHE, ">$dircache") ||
        die "$0: error open $dircache for writing: $!\n";

    print CACHE join(":", @podpath) . "\n$podroot\n";
    foreach my $key (keys %pages) {
        print CACHE "$key $pages{$key}\n";
    }

    close(CACHE);
}

#
# scan_dir - scans the directory specified in $dir for subdirectories, .pod
#  files, and .pm files.  notes those that it finds.  this information will
#  be used later in order to figure out where the pages specified in L<>
#  links are on the filesystem.
#
sub scan_dir {
    my($dir, $recurse) = @_;
    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
    local $_;

    @subdirs = ();
    @pods = ();

    opendir(DIR, $dir) ||
        die "$0: error opening directory $dir: $!\n";
    while (defined($_ = readdir(DIR))) {
        if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
            $pages{$_}  = "" unless defined $pages{$_};
            $pages{$_} .= "$dir/$_:";
            push(@subdirs, $_);
        } elsif (/\.pod$/) {                                # .pod
            s/\.pod$//;
            $pages{$_}  = "" unless defined $pages{$_};
            $pages{$_} .= "$dir/$_.pod:";
            push(@pods, "$dir/$_.pod");
        } elsif (/\.pm$/) {                                 # .pm
            s/\.pm$//;
            $pages{$_}  = "" unless defined $pages{$_};
            $pages{$_} .= "$dir/$_.pm:";
            push(@pods, "$dir/$_.pm");
        }
    }
    closedir(DIR);

    # recurse on the subdirectories if necessary
    if ($recurse) {
        foreach my $subdir (@subdirs) {
            scan_dir("$dir/$subdir", $recurse);
        }
    }
}

#
# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
#  build an index.
#
sub scan_headings {
    my($sections, @data) = @_;
    my($tag, $which_head, $title, $listdepth, $index);

    # here we need      local $ignore = 0;
    #  unfortunately, we can't have it, because $ignore is lexical
    $ignore = 0;

    $listdepth = 0;
    $index = "";

    # scan for =head directives, note their name, and build an index
    #  pointing to each of them.
    foreach my $line (@data) {
        if ($line =~ /^=(head)([1-6])\s+(.*)/) {
            ($tag,$which_head, $title) = ($1,$2,$3);
            chomp($title);
            $$sections{htmlify(0,$title)} = 1;

            while ($which_head != $listdepth) {
                if ($which_head > $listdepth) {
                    $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
                    $listdepth++;
                } elsif ($which_head < $listdepth) {
                    $listdepth--;
                    $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
                }
            }

            $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
                      "<A HREF=\"#" . htmlify(0,$title) . "\">" .
                      html_escape(process_text(\$title, 0)) . "</A>";
        }
    }


    # finish off the lists
    while ($listdepth--) {
        $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
    }

    # get rid of bogus lists
    $index =~ s,\t*<UL>\s*</UL>\n,,g;

    $ignore = 1;        # restore old value;

    return $index;
}

#
# scan_items - scans the pod specified by $pod for =item directives.  we
#  will use this information later on in resolving C<> links.
#
sub scan_items {
    my($pod, @poddata) = @_;
    my($i, $item);
    local $_;

    $pod =~ s/\.pod$//;
    $pod .= ".html" if $pod;

    foreach $i (0..$#poddata) {
        $_ = $poddata[$i];

        # remove any formatting instructions
        s,[A-Z]<([^<>]*)>,$1,g;

        # figure out what kind of item it is and get the first word of
        #  it's name.
        if (/^=item\s+(\w*)\s*.*$/s) {
            if ($1 eq "*") {            # bullet list
                /\A=item\s+\*\s*(.*?)\s*\Z/s;
                $item = $1;
            } elsif ($1 =~ /^\d+/) {    # numbered list
                /\A=item\s+\d+\.?(.*?)\s*\Z/s;
                $item = $1;
            } else {
#               /\A=item\s+(.*?)\s*\Z/s;
                /\A=item\s+(\w*)/s;
                $item = $1;
            }

            $items{$item} = "$pod" if $item;
        }
    }
}

#
# process_head - convert a pod head[1-6] tag and convert it to HTML format.
#
sub process_head {
    my($tag, $heading) = @_;
    my $firstword;

    # figure out the level of the =head
    $tag =~ /head([1-6])/;
    my $level = $1;

    # can't have a heading full of spaces and speechmarks and so on
    $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;

    $OUT .= "<P>\n" unless $listlevel;
    $OUT .= "$HR\n" unless $listlevel || $top;
    $OUT .= "<H$level>"; # unless $listlevel;
    #$OUT .= "<H$level>" unless $listlevel;
    my $convert = $heading; process_text(\$convert, 0);
    $convert = html_escape($convert);
    $OUT .= '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
    $OUT .= "</H$level>"; # unless $listlevel;
    $OUT .= "\n";

      # add a valid tag to a list of tags
    my $key = $curr_base."/".htmlify(0,$heading);
    print STDERR "Duplicated Anchor: $key\n" if exists $r_valid_anchors->{$key};
    $r_valid_anchors->{$key} = $convert;
}

#
# process_item - convert a pod item tag and convert it to HTML format.
#
sub process_item {
    my $text = $_[0];
    my($i, $quote, $name);

    my $need_preamble = 0;
    my $this_entry;


    # lots of documents start a list without doing an =over.  this is
    # bad!  but, the proper thing to do seems to be to just assume
    # they did do an =over.  so warn them once and then continue.
    warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
        unless $listlevel;
    process_over() unless $listlevel;

    return unless $listlevel;

    # tempreparely convert E<> escape tags
    1 while $text =~ s/E<([^<>]*)>/E-_-$1-_-/g;

    # remove formatting instructions from the text
    1 while $text =~ s/[A-DF-Z]<([^<>]*)>/$1/g;

    pre_escape(\$text);

      # process the escaped Etag
    1 while $text =~ s/E-_-(.*?)-_-/process_E($1)/eg;

    $need_preamble = $items_seen[$listlevel]++ == 0;

    # check if this is the first =item after an =over
    $i = $listlevel - 1;
    my $need_new = $listlevel >= @listitem;

    if ($text =~ /\A\*/) {              # bullet

        if ($need_preamble) {
            push(@listend,  "</UL>");
            $OUT .= "<UL>\n";
        }

        $OUT .= '<P><LI>';
        if ($text =~ /\A\*\s*(.+)\Z/s) {
            $OUT .= '<STRONG>';
            if ($items_named{$1}++) {
                $OUT .= html_escape($1);
            } else {
                my $name = 'item_' . htmlify(1,$1);
                $OUT .= qq(<A NAME="$name">). html_escape($1). '</A>';
            }
            $OUT .= '</STRONG>';
        }

    } elsif ($text =~ /\A[\d#]+/) {     # numbered list

        if ($need_preamble) {
            push(@listend,  "</OL>");
            $OUT .= "<OL>\n";
        }

        $OUT .= '<P><LI>';
        if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
            $OUT .= '<STRONG>';
            if ($items_named{$1}++) {
                $OUT .= html_escape($1);
            } else {
                my $name = 'item_' . htmlify(0,$1);
                $OUT .= qq(<A NAME="$name">). html_escape($1). '</A>';
            }
            $OUT .= '</STRONG>';
        }

    } else {                    # all others

        if ($need_preamble) {
            push(@listend,  '</DL>');
            $OUT .= "<DL>\n";
        }

        $OUT .= '<P><DT>';
        if ($text =~ /(\S+)/) {
            $OUT .= '<STRONG>';
            if ($items_named{$1}++) {
                $OUT .= html_escape($text);
            } else {
                my $name = 'item_' . htmlify(1,$text);
                $OUT .= qq(<A NAME="$name">). html_escape($text). '</A>';
            }
            $OUT .= '</STRONG>';
        }
       $OUT .= '<DD>';
    }

    $OUT .= "\n";
}

#
# process_over - process a pod over tag and start a corresponding HTML
# list.
#
sub process_over {
    # start a new list
    $listlevel++;
}

#
# process_back - process a pod back tag and convert it to HTML format.
#
sub process_back {
    warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
        unless $listlevel;
    return unless $listlevel;

    # close off the list.  note, I check to see if $listend[$listlevel] is
    # defined because an =item directive may have never appeared and thus
    # $listend[$listlevel] may have never been initialized.
    $listlevel--;
    $OUT .= $listend[$listlevel] if defined $listend[$listlevel];
    $OUT .= "\n";

    # don't need the corresponding perl code anymore
    pop(@listitem);
    pop(@listdata);
    pop(@listend);

    pop(@items_seen);
}

#
# process_cut - process a pod cut tag, thus stop ignoring pod directives.
#
sub process_cut {
    $ignore = 1;
}

#
# process_pod - process a pod pod tag, thus ignore pod directives until we see a
# corresponding cut.
#
sub process_pod {
    # no need to set $ignore to 0 cause the main loop did it
}

#
# process_example - process a =example newpod tag.
# =example 1.1 This is a title
# becomes
# <p><i>Example 1.1: This is a title</i></p>
#
sub process_example {
    my($index, $title) = @_;
    $OUT .= qq{<p><i>Example $index: $title</i></p>};
}

#
# process_figure - process a =figure newpod tag.
# =figure 1.1 This is a title
# becomes
# <p><center><img src="fig1.1.gif"></center></p>
# <p><center><b>Figure 1.1: This is a title</b></center></p>
#
sub process_figure {
    my($index, $title) = @_;
    $OUT .= qq{<p><center><img src="fig$index.gif"></center></p>
               <p><center><b>Figure $index: $title</b></center></p>
              };
}

#
# process_code - process a =code newpod tag.
# =code filename This is a comment
# becomes
# <p><a href="code/filename"><code>filename</code></a>This is a comment</p>
#
sub process_code {
    my($filename, $comment) = @_;
    $OUT .= qq{
               <p><a href="code/$filename"><code>$filename</code></a> -- $comment</p>
              };
}

#
# process_for - process a =for pod tag.  if it's for html, split
# it out verbatim, if illustration, center it, otherwise ignore it.
#
sub process_for {
    my($whom, $text) = @_;
    if ( $whom =~ /^(pod2)?html$/i) {
        $OUT .= $text;
    } elsif ($whom =~ /^illustration$/i) {
        1 while chomp $text;
        for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
          $text .= $ext, last if -r "$text$ext";
        }
        $OUT .= qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
    }
}

#
# process_begin - process a =begin pod tag.  this pushes
# whom we're beginning on the begin stack.  if there's a
# begin stack, we only print if it us.
#
sub process_begin {
    my($whom, $text) = @_;
    $whom = lc($whom);
    push (@begin_stack, $whom);
    if ( $whom =~ /^(pod2)?html$/) {
        $OUT .= $text if $text;
    }
}

#
# process_end - process a =end pod tag.  pop the
# begin stack.  die if we're mismatched.
#
sub process_end {
    my($whom, $text) = @_;
    $whom = lc($whom);
    if ($begin_stack[-1] ne $whom ) {
        warn "Unmatched begin/end at chunk $paragraph\n"
    } 
    pop @begin_stack;
}

#
# process_text - handles plaintext that appears in the input pod file.
# there may be pod commands embedded within the text so those must be
# converted to html commands.
#
sub process_text {
    my($text, $escapeQuotes) = @_;
    my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
    my($podcommand, $params, $tag, $quote);

    return if $ignore;

    $quote  = 0;                # status of double-quote conversion
    $result = "";
    $rest = $$text;

    if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
        $rest =~ s/\n+\Z//;
        $rest =~ s#.*#
            my $line = $&;
            1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
            $line;
        #eg;

        $rest   =~ s/&/&amp;/g;
        $rest   =~ s/</&lt;/g;
        $rest   =~ s/>/&gt;/g;
        $rest   =~ s/"/&quot;/g;

        # try and create links for all occurrences of perl.* within
        # the preformatted text.
        $rest =~ s{
                    (\s*)(perl\w+)
                  }{
                    if (defined $pages{$2}) {   # is a link
                        qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
                    } elsif (defined $pages{dosify($2)}) {      # is a link
                        qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
                    } else {
                        "$1$2";
                    }
                  }xeg;
        $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;

  my $urls = '(' . join ('|', qw{
                http
                telnet
                mailto
                news
                gopher
                file
                wais
                ftp
            } ) 
        . ')';
  
  my $ltrs = '\w';
  my $gunk = '/#~:.?+=&%@!\-';
  my $punc = '.:?\-;';
  my $any  = "${ltrs}${gunk}${punc}";

  $rest =~ s{
        \b                          # start at word boundary
        (                           # begin $1  {
          $urls     :               # need resource and a colon
         (?! :)                     # don't convert HTTP::Foo and alike
          [$any] +?                 # followed by on or more
                                    #  of any valid character, but
                                    #  be conservative and take only
                                    #  what you need to....
        )                           # end   $1  }
        (?=                         # look-ahead non-consumptive assertion
                [$punc]*            # either 0 or more puntuation
                [^$any]             #   followed by a non-url char
            |                       # or else
                $                   #   then end of the string
        )
      }{<A HREF="$1">$1</A>}igox;

        # text should be as it is (verbatim)
        $result = "<PRE>$rest</PRE>"

    } else {                    # formatted text
        # parse through the string, stopping each time we find a
        # pod-escape.  once the string has been throughly processed
        # we can output it.
        while (length $rest) {
            # check to see if there are any possible pod directives in
            # the remaining part of the text.
            if ($rest =~ m/[BCEIFLSZ]</) {
                warn "\$rest\t= $rest\n" unless
                    $rest =~ /\A
                           ([^<]*?)
                           ([BCEIFLSZ]?)
                           <
                           (.*)\Z/xs;

                $s1 = $1;       # pure text
                $s2 = $2;       # the type of pod-escape that follows
                $s3 = '<';      # '<'
                $s4 = $3;       # the rest of the string
            } else {
                $s1 = $rest;
                $s2 = "";
                $s3 = "";
                $s4 = "";
            }

            if ($s3 eq '<' && $s2) {    # a pod-escape
                $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
                $podcommand = "$s2<";
                $rest       = $s4;

                # find the matching '>'
                $match = 1;
                $bf = 0;
                while ($match && !$bf) {
                    $bf = 1;
                    if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
                        $bf = 0;
                        $match++;
                        $podcommand .= $1;
                        $rest        = $2;
                    } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
                        $bf = 0;
                        $match--;
                        $podcommand .= $1;
                        $rest        = $2;
                    }
                }

                if ($match != 0) {
                    warn <<WARN;
$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
WARN
                    $result .= substr $podcommand, 0, 2;
                    $rest = substr($podcommand, 2) . $rest;
                    next;
                }

                # pull out the parameters to the pod-escape
                $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
                $tag    = $1;
                $params = $2;

                # process the text within the pod-escape so that any escapes
                # which must occur do.
                process_text(\$params, 0) unless $tag eq 'L';

                $s1 = $params;
                if (!$tag || $tag eq " ") {     #  <> : no tag
                    $s1 = "&lt;$params&gt;";
                } elsif ($tag eq "L") {         # L<> : link 
                    $s1 = process_L($params);
                } elsif ($tag eq "I" ||         # I<> : italicize text
                         $tag eq "B" ||         # B<> : bold text
                         $tag eq "F") {         # F<> : file specification
                    $s1 = process_BFI($tag, $params);
                } elsif ($tag eq "C") {         # C<> : literal code
                    $s1 = process_C($params, 1);
                } elsif ($tag eq "E") {         # E<> : escape
                    $s1 = process_E($params);
                } elsif ($tag eq "Z") {         # Z<> : zero-width character
                    $s1 = process_Z($params);
                } elsif ($tag eq "S") {         # S<> : non-breaking space
                    $s1 = process_S($params);
                } elsif ($tag eq "X") {         # S<> : non-breaking space
                    $s1 = process_X($params);
                } else {
                    warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
                }

                $result .= "$s1";
            } else {
                # for pure text we must deal with implicit links and
                # double-quotes among other things.
                $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
                $rest    = $s4;
            }
        }
    }
    $$text = $result;
}

sub html_escape {
    my $rest = $_[0];

      # don't break already escaped '<','>', by escaping the & from
      # &gt; or '&lt';
    return $rest if $rest =~ /&(gt|lt|amp|quot);/;

    $rest   =~ s/&/&amp;/g;
    $rest   =~ s/</&lt;/g;
    $rest   =~ s/>/&gt;/g;
    $rest   =~ s/"/&quot;/g;
    return $rest;
} 

#
# process_puretext - process pure text (without pod-escapes) converting
#  double-quotes and handling implicit C<> links.
#
sub process_puretext {
    my($text, $quote) = @_;
    my(@words, $result, $rest, $lead, $trail);

    # convert double-quotes to single-quotes
    $text =~ s/\A([^"]*)"/$1''/s if $$quote;
    while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}

    $$quote = ($text =~ m/"/ ? 1 : 0);
    $text =~ s/\A([^"]*)"/$1``/s if $$quote;

    # keep track of leading and trailing white-space
    $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
    $trail = ($text =~ /(\s*)\Z/s ? $1 : "");

    # collapse all white space into a single space
    $text =~ s/\s+/ /g;
    @words = split(" ", $text);

    # process each word individually
    foreach my $word (@words) {
        # see if we can infer a link
        if ($word =~ /^\w+\(/) {
            # has parenthesis so should have been a C<> ref
            $word = process_C($word);
#           $word =~ /^[^()]*]\(/;
#           if (defined $items{$1} && $items{$1}) {
#               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
#                       . htmlify(0,$word)
#                       . "\">$word</A></CODE>";
#           } elsif (defined $items{$word} && $items{$word}) {
#               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
#                       . htmlify(0,$word)
#                       . "\">$word</A></CODE>";
#           } else {
#               $word =   "\n<CODE><A HREF=\"#item_"
#                       . htmlify(0,$word)
#                       . "\">$word</A></CODE>";
#           }
        } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
            # perl variables, should be a C<> ref
            $word = process_C($word, 1);
        } elsif ($word =~ m,^\w+://\w,) {
            # looks like a URL
#           $word = qq(<A HREF="$word">$word</A>);

            my $urls = '(' . join ('|', qw{
                                           http
                                           telnet
                                           mailto
                                           news
                                           gopher
                                           file
                                           wais
                                           ftp
                                          } ) 
              . ')';
  
            my $ltrs = '\w';
            my $gunk = '/#~:.?+=&%@!\-';
            my $punc = '.:?\-;';
            my $any  = "${ltrs}${gunk}${punc}";

            $word =~ s{
                  \b                          # start at word boundary
                  (                           # begin $1  {
                    $urls     :               # need resource and a colon
                 (?! :)                     # don't convert HTTP::Foo and alike
                    [$any] +?                 # followed by one or more
                                              #  of any valid character, but
                                                      #  be conservative and take only
                                              #  what you need to....
                  )                           # end   $1  }
                  (?=                          # look-ahead non-consumptive assertion
                          [$punc]*            # either 0 or more puntuation
                          [^$any]             #   followed by a non-url char
                      |                       # or else
                          $                   #   then end of the string
                  )
                }{<A HREF="$1">$1</A>}igox;


        } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
            # looks like an e-mail address
            my ($w1, $w2, $w3) = ("", $word, "");
            ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
            ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
            $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
        } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
            $word = html_escape($word) if $word =~ /["&<>]/;
            $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
        } else { 
            $word = html_escape($word) if $word =~ /["&<>]/;
        }
    }

    # build a new string based upon our conversion
    $result = "";
    $rest   = join(" ", @words);
    while (length($rest) > 75) {
        if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
             $rest =~ m/^(\S*)\s(.*?)$/o) {

            $result .= "$1\n";
            $rest    = $2;
        } else {
            $result .= "$rest\n";
            $rest    = "";
        }
    }
    $result .= $rest if $rest;

    # restore the leading and trailing white-space
    $result = "$lead$result$trail";

    return $result;
}

#
# pre_escape - convert & in text to $amp;
#
sub pre_escape {
    my($str) = @_;

    $$str =~ s,&,&amp;,g;
}

#
# dosify - convert filenames to 8.3
#
sub dosify {
    my($str) = @_;
    if ($Is83) {
        $str = lc $str;
        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
        $str =~ s/(\w+)/substr ($1,0,8)/ge;
    }
    return $str;
}

#
# process_L - convert a pod L<> directive to a corresponding HTML link.
#  most of the links made are inferred rather than known about directly
#  (i.e it's not known whether the =head\d section exists in the target file,
#   or whether a .pod file exists in the case of split files).  however, the
#  guessing usually works.
#
# Unlike the other directives, this should be called with an unprocessed
# string, else tags in the link won't be matched.
#
sub process_L {
    my($str) = @_;
    my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings

    $str =~ s/\n/ /g;                   # undo word-wrapped tags
    $s1 = $str;
    for ($s1) {
        # LREF: a la HREF L<show this text|man/section>
        $linktext = $1 if s:^([^|]+)\|::;

        # a :: acts like a /
        s,::,/,;

        # make sure sections start with a /
        s,^",/",g;
        s,^,/,g if (!m,/, && / /);

        # check if there's a section specified
        if (m,^(.*?)/"?(.*?)"?$,) {     # yes
            ($page, $section) = ($1, $2);
        } else {                        # no
            ($page, $section) = ($_, "");
        }

        # check if we know that this is a section in this page
        if (!defined $pages{$page} && defined $sections{$page}) {
            $section = $page;
            $page = "";
        }
    }

    $page83=dosify($page);
    $page=$page83 if (defined $pages{$page83});
    if ($page eq "") {
        $link = "#" . htmlify(0,$section);
        $linktext = $section unless defined($linktext);
    } elsif (!defined $pages{$page}) {
        warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
        $link = "";
        $linktext = $page unless defined($linktext);
    } else {
        $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
        $section = htmlify(0,$section) if $section ne "";

        # if there is a directory by the name of the page, then assume that an
        # appropriate section will exist in the subdirectory
        if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
            $link = "$htmlroot/$1/$section.html";

        # since there is no directory by the name of the page, the section will
        # have to exist within a .html of the same name.  thus, make sure there
        # is a .pod or .pm that might become that .html
        } else {
            $section = "#$section" if $section;
            # check if there is a .pod with the page name
            if ($pages{$page} =~ /([^:]*)\.pod:/) {
                $link = "$htmlroot/$1.html$section";
            } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
                $link = "$htmlroot/$1.html$section";
            } else {
                warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
                             "no .pod or .pm found\n";
                $link = "";
                $linktext = $section unless defined($linktext);
              }
        }
    }

    process_text(\$linktext, 0);
    if ($link) {
        $s1 = "<A HREF=\"$link\">$linktext</A>";
        $link =~ s|(\./)+||;
        $link =~ s|/+||;
        $link =~ s|(\.html)?#|/|;
        $link =~ s|^/|$curr_base/|; # normalize internal links
        push @{$r_links_to_check->{$curr_base}}, $link ;
    } else {
        $s1 = "<EM>$linktext</EM>";
    }
    return $s1;
}

#
# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
# convert them to corresponding HTML directives.
#
sub process_BFI {
    my($tag, $str) = @_;
    my($s1);                    # work string
    my(%repltext) = (   'B' => 'STRONG',
                        'F' => 'EM',
                        'I' => 'EM');

    # extract the modified text and convert to HTML
    $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
    return $s1;
}

#
# process_C - process the C<> pod-escape.
#
sub process_C {
    my($str, $doref) = @_;
    my($s1, $s2);

    $s1 = $str;
    $s1 =~ s/\([^()]*\)//g;     # delete parentheses
    $s2 = $s1;
    $s1 =~ s/\W//g;             # delete bogus characters
    $str = html_escape($str);

    # if there was a pod file that we found earlier with an appropriate
    # =item directive, then create a link to that page.
    if ($doref && defined $items{$s1}) {
        $s1 = ($items{$s1} ?
               "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) .  "\">$str</A>" :
               "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>");
        $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,; 
        confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
    } else {
        $s1 = "<CODE>$str</CODE>";
        # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
    }


    return $s1;
}

#
# process_E - process the E<> pod directive which seems to escape a character.
#
sub process_E {
    my($str) = @_;

    for ($str) {
        s,([^/].*),\&$1\;,g;
    }

    return $str;
}

#
# process_Z - process the Z<> pod directive which really just amounts to
# ignoring it.  this allows someone to start a paragraph with an =
#
sub process_Z {
    my($str) = @_;

    # there is no equivalent in HTML for this so just ignore it.
    $str = "";
    return $str;
}

#
# process_S - process the S<> pod directive which means to convert all
# spaces in the string to non-breaking spaces (in HTML-eze).
#
sub process_S {
    my($str) = @_;

    # convert all spaces in the text to non-breaking spaces in HTML.
    $str =~ s/ /&nbsp;/g;
    return $str;
}

#
# process_X - this is supposed to make an index entry.  we'll just 
# ignore it.
#
sub process_X {
    return '';
}


#
# finish_list - finish off any pending HTML lists.  this should be called
# after the entire pod file has been read and converted.
#
sub finish_list {
    while ($listlevel > 0) {
        $OUT .= "</DL>\n";
        $listlevel--;
    }
}

#
# htmlify - converts a pod section specification to a suitable section
# specification for HTML.  if first arg is 1, only takes 1st word.
#
sub htmlify {
    my($compact, $heading) = @_;

    if ($compact) {
      $heading =~ /^(\w+)/;
      $heading = $1;
    } 

  # $heading = lc($heading);
  $heading =~ s/[^\w\s]/_/g;
  $heading =~ s/(\s+)/ /g;
  $heading =~ s/^\s*(.*?)\s*$/$1/s;
  $heading =~ s/ /_/g;
  $heading =~ s/\A(.{32}).*\Z/$1/s;
  $heading =~ s/\s+\Z//;
  $heading =~ s/_{2,}/_/g;

  return $heading;
}

BEGIN {
}

1;