#!/usr/bin/perl -w
# lineage.pl 
# $Header: /home01/cvs/vwr2/docs/doc-generator/lineage.pl,v 1.10 2001/01/17 16:07:32 stedav Exp $
# Purpose: Walk a directory tree specified on the command line printing out
# the file and package names, parents, and methods of all *.pm files in
# that directory tree
# First iteration implemented by Bill Jonas on 11/30/2000
# Much help with implementation and optimization provided by Kyle Burton.
# Thanks, Kyle!
 
use strict;
use constant OOINDEX => '00index';

# We must have at least one argument (but no more than two), the top of the
# directory tree that is to be parsed (and the directory in which to output
# our HTML files, if that's desired).
($#ARGV == 1 or $#ARGV == 0) or die <<EndOfHelp;
Usage: $0 <pathname> [<pathname>]

The first path is the directory tree containing the .pm files that are
to be traversed.  The optional second pathname is the directory where
the output .html files are to be created.

If only the first path is given, the output will be plain text.
If both paths are given, the output will be a set of HTML files.
EndOfHelp

use File::Find ();
my $data;
my $path = $ARGV[0];
my $htmlpath;
if ($ARGV[1]) {
	$htmlpath = $ARGV[1];
	unless (-d $htmlpath) {
		print "Creating $htmlpath\n";
		mkdir ($htmlpath,0777) or die "$htmlpath does not exist and can't be created: $!\n";
	}
	$htmlpath =~ s/\/$//;
} else {
	$htmlpath = undef;
}
my @files;
my @data;
my %package_data;
my $package;
my %html_tree;
#$|=1;  #debug only

# populate @files with the *.pm modules
File::Find::find({wanted => \&wanted}, $path);

&get_data;
unless ($htmlpath) {
	&print_text;
} else {
	&output_html;
}

sub get_data
{
	foreach my $file (@files) {
		my @packages;
		open (F, $file) or warn "Can't open $file: $!\n";
		#Slurp the whole file into one big scalar
		{ local $/=undef; $data=<F>; }
		close F or warn "Can't close $file: $!\n";

		#Get rid of the embedded POD
		$data =~ s/^=[a-z].*?^=cut//msg;
		@packages = ($data =~ m/^package ([^;]+)/msg);
		@data = split (/^package [^;]+;/ms, $data);
		shift @data unless $data[0]; # $data[0] appears to be null *every* time
		#Populate the data structure
		foreach $package (@packages) {
			$package_data{$package}->{'filenames'} ||= [];
			$package_data{$package}->{'parents'} ||= [];
			$package_data{$package}->{'seen_parents'} ||= '';
			$package_data{$package}->{'methods'} ||= [];
			my $data = shift @data;
			push (@{$package_data{$package}{'parents'}}, &OOINDEX) if !($data);
			push @{$package_data{$package}{'filenames'}}, $file;
			foreach ($data =~ m/^\s*\@ISA [^;]+;/msg) {
				my $parents = $_;
				my @parents;
				$parents =~ s/\@ISA/\@parents/;
				eval $parents;
				push @{$package_data{$package}{'parents'}}, @parents;
				# 'seen_parents' is necessary because if a
				# package declares no @ISA, it belongs to the
				# fictitious "grandfather" node, &OOINDEX, at
				# the very top. If we try to push &OOINDEX onto
				# the stack here, it won't work since this
				# block never gets executed without an @ISA.
				# 'seen_parents' is used once, below (see
				# the comments there).  (Yes, I had to justify
				# it, it seemed a little wasteful to me.)
				$package_data{$package}->{'seen_parents'} = 1;
			}
			# Get nethods for each package
			push @{$package_data{$package}{'methods'}}, ($data =~ m/^sub (\w+)/msg);
			# Get fields for each package
			push @{$package_data{$package}{'fields'}}, 
				($data =~ m/self\-\>\{\'(\w+)\'\}/msg);
		}
	}
	# Do a quick loop after *all* the info is gathered, in order to
	# determine which modules are top-level in this particular set of
	# packages, and make them children of the fictitious top-level node
	# &OOINDEX.  Oh, and thanks to mct for a nudge from the cluebat.
	foreach $package (keys %package_data) {
		push (@{$package_data{$package}{'parents'}}, &OOINDEX) unless $package_data{$package}->{'seen_parents'};
		push (@{$package_data{$package}{'parents'}}, &OOINDEX) if !grep(!/Export|AutoLoader|DynaLoader/, @{$package_data{$package}{'parents'}});
	}
	&get_children;
}

sub print_text
#Print some nicely formatted text, if desired
{
	foreach $package (sort keys %package_data) {
		next if $package eq &OOINDEX;
		print "Package Name: ", $package, "\n\nParent(s):\n";
		foreach (sort @{$package_data{$package}->{'parents'}}) {
			next if $_ eq &OOINDEX;
			print "$_\n";
		}
		print "\nFile Name(s):\n";
		foreach (sort @{$package_data{$package}->{'filenames'}}) {
			print "$_\n";
		}
		print "\nMethod(s):\n";
		foreach (sort @{$package_data{$package}->{'methods'}}) {
			print "$_\n";
		}
    print "\nField(s):\n";
		my $old_field = "";
    foreach (sort @{$package_data{$package}->{'fields'}}) {
			next if $_ eq $old_field;
      print "$_\n";
			$old_field = $_;
    }
		print "\nChild(ren):\n";
		foreach (sort @{$package_data{$package}->{'children'}}) {
			next if ($_ eq "Exporter" || $_ eq "AutoLoader" || $_ eq "DynaLoader");
			print "$_\n";
		}
		print "\n\n\n";
	}
}

sub get_children {
	foreach my $parent (keys %package_data) {
		next if ($parent eq &OOINDEX);
		$package_data{$parent}->{'children'} ||= [];
		push (@{$package_data{&OOINDEX}->{'children'}}, $parent) unless (grep(!/Exporter|AutoLoader|DynaLoader|00index|$parent/, @{$package_data{$parent}->{'parents'}}));
		foreach my $child (keys %package_data) {
			next if ($child eq &OOINDEX);
			push (@{$package_data{$parent}->{'children'}}, $child) if(grep /$parent/, @{$package_data{$child}->{'parents'}});
		}
	}
}


sub output_html {
	print "Preparing HTML...";
	my $time = localtime();
	$html_tree{&OOINDEX}->{'html_filename'} = join('/', $htmlpath, '00index.html');
	$html_tree{&OOINDEX}->{'html_data'} = "<html>\n<head><title>Top-Level Modules</title></head>\n<body><h1>Top-Level Modules</h1>\n<ul>";
	foreach (sort @{$package_data{&OOINDEX}->{'children'}}) {
		$html_tree{&OOINDEX}->{'html_data'} .= "<li><a href=\"" . $_ . ".html\">" . $_ . "</a>\n";
	}
	$html_tree{&OOINDEX}->{'html_data'} .= "</ul><hr>Auto-generated
	by lineage.pl on " . $time . ".\n</body></html>";
	open (F, '>'.$html_tree{&OOINDEX}->{'html_filename'}) or warn "Couldn't create $html_tree{&OOINDEX}->{'html_filename'}: $!\n";
	print F $html_tree{&OOINDEX}->{'html_data'};
	close (F) or warn "Couldn't close ",$html_tree{&OOINDEX}->{'html_filename'},": $!\n";
	foreach $package (keys %package_data) {
		next if $package eq &OOINDEX;
		$html_tree{$package}->{'html_filename'} = join('/', $htmlpath, $package . ".html");
		#$html_tree{$package}->{'html_data'} ||= '';
		$html_tree{$package}->{'html_data'} .= "<html>\n<head><title>" . $package . "</title></head>\n<body>";
		$html_tree{$package}->{'html_data'} .= "<h1>" . $package . "</h1>\n<hr>\n";
		$html_tree{$package}->{'html_data'} .= "<h2>Parents:</h2>\n<ul>";
		foreach (sort @{$package_data{$package}->{'parents'}}) {
			next if ($_ eq "Exporter" || $_ eq "AutoLoader" || $_ eq "DynaLoader");
			$html_tree{$package}->{'html_data'} .= "<li><a href=\"" . $_ . ".html\">" . $_ . "</a>\n";
		}
		$html_tree{$package}->{'html_data'} .= "</ul>\n<hr>\n<h2>File Name(s):</h2>\n<ul>";
		foreach (sort @{$package_data{$package}->{'filenames'}}) {
			$html_tree{$package}->{'html_data'} .= "<li>$_\n";
		}
		$html_tree{$package}->{'html_data'} .= "</ul><hr>\n<h2>Method(s):</h2>\n<ul>";
		foreach (sort @{$package_data{$package}->{'methods'}}) {
			$html_tree{$package}->{'html_data'} .= "<li>$_\n";
		}
		$html_tree{$package}->{'html_data'} .= "</ul><hr>\n<h2>Child(ren):</h2>\n<ul>";
		if (@{$package_data{$package}->{'children'}}) {
			foreach (sort @{$package_data{$package}->{'children'}}) {
				$html_tree{$package}->{'html_data'} .= "<li><a href=\"" . $_ . ".html\">" . $_ . "</a>\n";
			}
		} else {
			$html_tree{$package}->{'html_data'} .= "</ul><p>None</p>";
		}
		$html_tree{$package}->{'html_data'} .= "</ul><hr>Auto-generated by lineage.pl on " . $time . ".\n</body></html>";
		open (F, ">$html_tree{$package}->{'html_filename'}") or warn "Couldn't create $html_tree{$package}->{'html_filename'}: $!\n";
		print F $html_tree{$package}->{'html_data'};
		close (F) or warn "Couldn't close $html_tree{$package}->{'html_filename'}: $!\n";
	}
	print " done.\n";
}

sub wanted { /^.*\.pm\z/s && push(@files, "$File::Find::name"); }

__END__
=head1 NAME

lineage.pl - generate textual or HTML-based documentation of Perl modules

=head1 SYNOPSIS

lineage.pl <modules_directory> [<html_output_directory>]

=head1 DESCRIPTION

lineage.pl finds all the .pm files under a given directory tree, finds out
which packages they contain, which parents those packages have, what their
children are, and which methods they contain.  The program takes one
required argument and one optional argument.  The first argument is a
directory name under which there are Perl modules.  The second, optional
argument is a directory in which to place a series of HTML files.  If the
HTML output directory does not exist, it will be created.  If it is not
given, the output will be plain text on stdout.

=head1 AUTHOR

Bill Jonas, bjonas@quarterleaf.com
David Steuber dsteuber@quarterleaf.com
