ports//devel/cvsplot/work/cvsplot-1.7.4/cvsplot.pl

#!/usr/bin/env perl -w

###############################################################################
# cvsplot: Copyright (c) 2001, 2002, 2003, 2004 David Sitsky.
# All rights reserved.
#
# cvsplot is a perl script which is used to extract information from CVS and
# plots the total number of lines and number of files in a selected file set
# against time.
#
# File sets can be specified using regular expressions.
# The start and end dates may also be specified.
#
# This program is free software; you can redistribute it and modify it under
# the terms of the GPL.

use Config;
use Cwd;
use Symbol;
use IPC::Open3;

# Whether debugging is enabled or not.
$debug = 0;

# Additional global arguments to use with CVS commands.
$cvs_global_args = "";

# The start date in which to gather statistics.
$start_date = "";

# The final date in which to gather statistics.
$end_date = "";

# Indicate whether to count the number of lines _changed_, or the
# the number of lines added.
$count_lines_changed = 0;

# The directory is which to gather the cvs statistics (where the cvs log
# command is run from), or the directory of the CVS repository, if the
# -rlog option is used.
$cvsdir = "";

# The module to run cvs rlog over, if the -rlog option is specified.
$rlog_module = "";

# The branch that we are collecting statistics from.  By default, the main
# branch is used.
$branch_tag = "";

# Parallel arrays of file patterns which indicate whether it is an include
# or exclude pattern, and what the regular expression is.
@pattern_include = ();
@pattern_regexp = ();

# Where the number of lines statistics will be stored.
$linedata = "";

# Where the number of files statistics will be stored.
$filedata = "";

# Where the per-user line statistics will be stored.
$userdata = "";

# A set of users whose data needs to be examined individually.
%filterusers = ();

# A map of a set of group to user memberships.
%usergroups = ();

# The name of the default user group (if any).
$default_user_group = "";

# A hash (by date) of a hash (by filename) of lines added.
%line_stats = ();

# A hash (by date) of a hash (by filename) of the status.
%state_stats = ();

# A hash (by date) of a hash (by filename) of the revision.
%revision_stats = ();

# A hash (by date) of a hash (by filename) of the author.
%author_stats = ();

# A hash (by filename) of a hash (by version) of lines added.
%file_version_delta = ();

# A hash (by filename) of a hash (by version) of the file state.
%file_version_state = ();

# A hash (by filename) of a hash (by version) of the author.
%file_version_author = ();

# A hash (by filename) of the magic branch number.
%file_branch_number = ();

# A hash (by filename) of the number of branch revisions made.
%file_number_branch_revisions = ();

# A hash (by date) of the total number of lines.
%total_lines = ();

# A hash (by date) of the total number of files.
%total_files = ();

# A hash (by date) of a hash (by user) of their total number of lines.
%total_user_lines = ();

# Flag to indicate if gnuplot is to be used.
$use_gnuplot = 0;

# The location of the gnuplot binary.  It is assumed to be in the
# path, but may be over-ridden on the command-line with the
# -gnuplot argument.  For Window platforms, this is useful, since
# the name of the binary is not "gnuplot", but often "wgnupl32"
# or "pgnuplot".
$gnuplot = "gnuplot";

# The gnuplot output filename to write the output of the line data.
$gnuplot_linedata = "";

# The gnuplot output filename to write the output of the file data.
$gnuplot_filedata = "";

# The gnuplot output filename to write the output of the user data.
$gnuplot_userdata = "";

# The gnuplot "set term" expression to use when generating output.
# By default, generate colour png files.
$gnuplot_setterm = "png color";

# A general gnuplot command that can be executed to change some
# aspect of the plotting command, such as the format of the x values.
$gnuplot_command = "";

# Flag to indicate if lines are to be used in the final plot, rather
# than discrete points.  For large plots, this can sometimes improve
# readability.
$use_linestyle = 0;

# Determine if this process is running under Windows.
$osname = $Config{'osname'};
$windows = (defined $osname && $osname eq "MSWin32") ? 1 : 0;

check_missing_modules();
process_command_line_arguments();
get_cvs_statistics();
analyse_statistics();
generate_data_files();
generate_plots();

###############################################################################
# Utility method for quoting an argument for a shell command.  ShellQuote
# is good for UNIX boxes, but doesn't work for DOS platforms as it uses
# single quotes, while DOS needs double quotes.  Its a shame shell_quote
# isn't cross-platform.
sub quote
{
    my ($arg) = @_;

    if ($windows)
    {
        return "\"$arg\"";
    }
    else
    {
        String::ShellQuote::shell_quote($arg);
    }
}

###############################################################################
# Method for writing out help if modules are missing.
sub check_missing_modules
{
    my @missing = ();

    # Load the Date::Manip module.
    eval
    {
	require Date::Manip;
    };
    if ($@)
    {
	push @missing, 'Date::Manip';
    }

    # Load the String::ShellQuote module for UNIX platforms.
    eval
    {
	if (! $windows)
	{
	    require String::ShellQuote;
	}
    };
    if ($@)
    {
	push @missing, 'String::ShellQuote';
    }
    
    # Check if there are any missing modules.
    return if $#missing == -1;

    # First, output the generic "missing module" message.
    print "\n";
    print "Cvsplot requires some Perl modules which are missing " .
	  "from your system.\n";

    if ($windows) {
	print "These can be installed by issuing the following commands:\n\n";
	foreach my $module (@missing) {
	    $module =~ s/:://g;
	    print 'C:\> ' . "ppm install $module\n";
	}
	print "\n";
    }
    else
    {
	print "They can be installed by running (as root) the following:\n";
	foreach my $module (@missing) {
	    print "   perl -MCPAN -e 'install \"$module\"'\n";
	}
	print "\n";
	print "Modules can also be downloaded from http://www.cpan.org.\n\n";
    }
    exit;
}


###############################################################################
# Check whether the supplied file is to be examined or not depending on what
# the user set for the -include and -exclude options.  Return true if the
# file is to be included.  If no -include or -exclude options have been
# set by the user, return true by default.
#
sub include_file
{
    my ($filename) = @_;

    # If there are no settings, include everything.
    if ($#pattern_regexp == -1)
    {
	return 1;
    }

    # Go through the pattern_regexp array, and see if there is any matches.
    for ($i = 0; $i <= $#pattern_regexp; $i++)
    {
	if ($filename =~ /$pattern_regexp[$i]/)
	{
	    # Got a match, return whether or not the file should be included
	    # or not.
	    return $pattern_include[$i];
	}
    }

    # No matches, don't include this file.
    return 0;
}

###############################################################################
# Using "cvs log" and a few other commands, gather all of the necessary
# statistics.
#
sub get_cvs_statistics
{
    if ($debug && defined $osname)
    {
        print "Platform is $osname\n";
    }

    # Explicitly set the timezone for window platforms, so that DateManip
    # works.
    if ($windows)
    {
        $ENV{TZ} = "C";
    }

    my $working_file = "";
    my $relative_working_file = "";
    my $working_cvsdir = "";
    my $search_file = 0;

    # Change to the directory nominated by $cvsdir, and save the current
    # directory, only if we aren't using the -rlog option.
    if ($rlog_module eq "")
    {
	$saved_cwd = cwd();
	chdir $cvsdir || die "Failed to change to directory \"$cvsdir\": $!";
    }
    else
    {
	# Remove the accessor part, and just get the pathname.
	$cvsdir =~ /([^:]+)$/;
	$working_cvsdir = $1;
	print "Got working_cvsdir as $working_cvsdir\n" if $debug;

	# Since this is used in a regexp below, need to make sure DOS pathnames
	# are correctly matched against.
	$working_cvsdir =~ s/\\/\\\\/g;
    }

    # Flag to indicate what the state is when parsing the output from cvs log.
    # true indicates that the parser is waiting for the start of a cvs log
    # entry.
    $search_file = 1;

    # Build up the command string appropriately, depending on what options
    # have been set.
    my $command = ($rlog_module eq '') ? "cvs $cvs_global_args log" :
	sprintf("cvs $cvs_global_args -d %s rlog %s",
		quote($cvsdir), quote($rlog_module));

    print "Executing \"$command\"\n" if $debug;

    open (CVSLOG, "$command |") || die "Couldn't execute \"$command\"";
    while (<CVSLOG>)
    {
	if ($search_file == 1)
	{
	    # Need to locate the name of the working file
	    if (/^RCS file: (.*),v$/)
	    {
		$working_file = $1;
		$working_file =~ s/Attic\///g;
		$relative_working_file = "";

		# Check if this file is to be included or not.
		if (include_file($working_file))
		{
		    # Yep, search for more details on this file.
		    $search_file = 0;

		    if ($branch_tag eq "")
		    {
			# Main branch to be investigated only.
			$file_branch_number{$working_file} = "1";
			$file_number_branch_revisions{$working_file} = 0;
		    }
		    print "Including file \"$working_file\"\n" if $debug;
		}
		else
		{
		    print "Excluding file \"$working_file\"\n" if $debug;
		}
	    }
	}
	else
	{
	    # Collective the relative part for those runs that don't use
	    # -rlog.
	    if (/^Working file: (.*)$/)
	    {
		$relative_working_file = $1;
	    }
	    # Handle repositories working off an explicit numbering scheme,
	    # such as 8.1.  Only do this if the user hasn't specified an
	    # explicit branch to gather statistics over.  In most cases,
	    # the result will still be 1, but this handles the stranger
	    # repositories out there.
	    elsif ($branch_tag eq "" && /^head: (\d+)\./) {
		$file_branch_number{$working_file} = $1;       
	    }
	    # If we are collecting statistics on a branch, determine the magic
	    # branch number for this file.
	    elsif ( (! defined $file_branch_number{$working_file}) &&
		 (/^\s*${branch_tag}: ([\d\.]+)\.0\.(\d+)$/) )
	    {
		$file_branch_number{$working_file} = "${1}.${2}";
		$file_number_branch_revisions{$working_file} = 0;
		if ($debug)
		{
		    print "Got branch $file_branch_number{$working_file}";
		    print " for file \"$working_file\"\n";
		}
	    }
	    elsif (/^keyword substitution: b$/)
	    {
		# This is a binary file, ignore it.
		undef($file_branch_number{$working_file});
		undef($file_number_branch_revisions{$working_file});
		$search_file = 1;
		print "Excluding binary file \"$working_file\"\n" if $debug;
	    }
	    elsif (/^=============================================================================$/)
	    {
		# End of the log entry for this file, start parsing for the
		# next file.
		$search_file = 1;
		next;
	    }
	    elsif (/^----------------------------$/)
	    {
		# Matched the description separator.  If a branch has been
		# specified, but this file doesn't exist on it, skip this file.
		if (($branch_tag ne "") &&
		    (! defined $file_branch_number{$working_file}))
		{
		    if ($debug)
		    {
			print "File \"$working_file\" not on branch\n";
		    }
		    $search_file = 1;
		    next;
		}

		# Read the revision line, and record the appropriate
		# information.
		$_ = <CVSLOG>;

		if (/^revision ([\d\.]+)$/)
		{
		    # Record the revision, and whether it is part of the tag
		    # of interest.
		    $revision = $1;
		    if ($revision =~
			/^$file_branch_number{$working_file}\.\d+$/)
		    {
			$file_on_branch = 1;
			$file_number_branch_revisions{$working_file}++;
		    }
		    else
		    {
			$file_on_branch = 0;
		    }
		    if ($debug)
		    {
			print "Got branch number: $file_branch_number{$working_file} rev $revision on branch: $file_on_branch\n";
		    }
		}
		else
		{
		    # Problem in parsing, skip it.
		    print "Couldn't parse line: $_\n";
		    $search_file = 1;
		    next;
		}
		    
		$_ = <CVSLOG>;		# Read the "date" line.
		if (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: (.*);.*lines: \+(\d+) \-(\d+).*$/)
		{
		    # Note for some CVS clients, state dead is presented in
		    # this this way, as the following pattern.
		    $date = $1;
		    $author = $2;
		    $users{$author} = 1;
		    $state = $3;
		    $lines_added = $4;
		    $lines_removed = $5;
		    $number_lines = $count_lines_changed ?
			$lines_added + $lines_removed : $lines_added - $lines_removed;

		    $file_version_delta{$working_file}{$revision} =
			$number_lines;
		    $file_version_state{$working_file}{$revision} = $state;
		    $file_version_author{$working_file}{$revision} = $author;

		    if ($file_on_branch)
		    {
			# This revision lives on the branch of interest.
			$line_stats{$date}{$working_file} += $number_lines;
			$state_stats{$date}{$working_file} = $state;
			$revision_stats{$date}{$working_file} = $revision;
			$author_stats{$date}{$working_file} = $author;
		    }
	        }
		elsif (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: dead;.*$/)
		{
		    # File has been removed.
		    $date = $1;
		    $author = $2;
		    $users{$author} = 1;

		    $file_version_delta{$working_file}{$revision} = 0;
		    $file_version_state{$working_file}{$revision} = "dead";
		    $file_version_author{$working_file}{$revision} = $author;
		    
		    if ($file_on_branch)
		    {
			$line_stats{$date}{$working_file} = 0;
			$state_stats{$date}{$working_file} = "dead";
			$revision_stats{$date}{$working_file} = $revision;
			$author_stats{$date}{$working_file} = $author;
		    }
		}
		elsif (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: ([^;]*);.*$/)
		{
		    $date = $1;
		    $author = $2;
		    $users{$author} = 1;
		    $state = $3;

		    # Unfortunately, cvs log doesn't indicate the number of
		    # lines an initial revision is created with, so find this
		    # out using the following cvs command.  Note the regexp
		    # below has an optional drive delimeter to support DOS
		    # installations.
		    my $lccmd = "";
		    if ($rlog_module ne "")
		    {
			print "Working cvsdir is: $working_cvsdir working file: $working_file\n" if $debug;

			# For DOS-based repositories, the filename may contain
			# a drive letter.  Also need to be flexible with the
			# pathname separator.
			if (! ($working_file =~ /^([A-z]:)?${working_cvsdir}[\/\\](.*)$/))
			{
			    print STDERR "-cvsdir argument $working_cvsdir doesn't match ";
			    print STDERR "repository filename prefix $working_file\n";
			    print STDERR "Please correct your -cvsdir argument and try again\n";
			    exit 1;
			}
			$lccmd = sprintf("cvs $cvs_global_args -d %s co -r %s -p %s",
					 quote($cvsdir),
					 quote($revision),
					 quote($2));
		    }
		    else
		    {
			$lccmd = sprintf("cvs $cvs_global_args update -r %s -p %s",
					 quote($revision),
					 quote($relative_working_file));
		    }
		    print "Executing $lccmd\n" if $debug;
		    
		    my $WTR = gensym();
		    my $RDR = gensym();
		    my $ERR = gensym();
		    my $pid = open3($WTR, $RDR, $ERR, $lccmd);
		    for ($number_lines = 0; defined <$RDR>; $number_lines++) {}
		    close ($RDR);
		    my $error_string = "";
		    while (<$ERR>)
		    {
			$error_string .= $_;
		    }
		    waitpid $pid, 0;
		    
		    if ($?) {
			print "CVS command failed: \"$lccmd\" status $?\n";
			print "$error_string\n";
			exit 1;
		    }
		    
		    print "$working_file 1.1 = $number_lines lines\n" if $debug;
		    
		    $file_version_delta{$working_file}{$revision} =
			$number_lines;
		    $file_version_state{$working_file}{$revision} = $state;
		    $file_version_author{$working_file}{$revision} = $author;
		    
		    if ($file_on_branch)
		    {
			$line_stats{$date}{$working_file} += $number_lines;
			$state_stats{$date}{$working_file} = "Exp";
			$revision_stats{$date}{$working_file} = $revision;
			$author_stats{$date}{$working_file} = $author;
		    }
		}
		else
		{
		    print "Couldn't parse: $_";
		}
		if ($debug)
		{
		    print "File \"$working_file\" rev $revision ";
		    print "delta $file_version_delta{$working_file}{$revision} ";
		    print "state $file_version_state{$working_file}{$revision}\n";
		    print "author $file_version_author{$working_file}{$revision}\n";
		}
	    }
	}
    }
    close(CVSLOG);

    # Go back to the original directory if we aren't using the -rlog option.
    if ($rlog_module eq "")
    {
	chdir $saved_cwd;
    }
}

# Variable to store results when calling get_line_count.
%memorise_line_count = ();

###############################################################################
# Return the number of lines that constitute a particular revision of a file.
#
sub get_line_count
{
    my ($filename, $revision) = @_;

    my $count = get_line_count_inner($filename, $revision);

    # Store this result for future intermediate calculations.
    $memorise_line_count{$filename}{$revision} = $count;

    if ($debug)
    {
	print "get_line_count($filename, $revision) = $count\n";
    }

    return $count;
}

sub get_line_count_inner
{
    my ($filename, $revision) = @_;
    my $count = 0;
    my $finished = 0;

    while (!$finished)
    {
	if (defined $memorise_line_count{$filename}{$revision})
	{
	    $count += $memorise_line_count{$filename}{$revision};
	    $finished = 1;
	}
	elsif (! defined($file_version_state{$filename}{$revision}))
	{
	    # Case where we are looking for a revision that hasn't
	    # been found in the output of the CVS log command. This is
	    # usually because a developer decided to start the file
	    # revision at something other than 1.1.
	    $memorise_line_count{$filename}{$revision} = 0;
	    $finished = 1;
	}
	elsif ($revision eq "1.1")
	{
	    # Base case where the revision is 1.1
	    $memorise_line_count{$filename}{$revision} =
		$file_version_delta{$filename}{$revision};
	    $count += $memorise_line_count{$filename}{$revision};
	    $finished = 1;
	}
	elsif ($file_version_state{$filename}{$revision} eq "dead")
	{
	    # Case where file has been removed.  The file count is
	    # effectively the previous version's count.
	    $revision =~ /^([\d\.]+)\.(\d+)$/;
	    $previous_subrevision = $2 - 1;
	    $previous_revision = "${1}.${previous_subrevision}";
	    $revision = $previous_revision;
	}
	elsif ($revision =~ /^([\d\.]+)\.\d+\.1$/)
	{
	    # Case where need to decend down branch point and find the
	    # contributions made there.
	    $branch_point_revision = $1;
	    if (! defined($file_version_delta{$filename}{$revision}))
	    {
		print "file_version_data not defined for $filename $revision\n";
	    }
	    $count += $file_version_delta{$filename}{$revision};
	    $revision = $branch_point_revision;
	}
	elsif ($revision =~ /^([\d\.]+)\.(\d+)$/)
	{
	    # Need to determine previous revision number + this revision's
	    # contribution.
	    $previous_subrevision = $2 - 1;
	    $previous_revision = "${1}.${previous_subrevision}";
	    if (! defined($file_version_delta{$filename}{$revision}))
	    {
		print "[2] file_version_data not defined for $filename $revision\n";
	    }
	    $count += $file_version_delta{$filename}{$revision};
	    $revision = $previous_revision;
	}
	else
	{
	    print "Unhandled case for file $filename revision $revision\n";
	    exit 0;
	}

    }

    return $count;
}
	

###############################################################################
# Sum up those entries with the same date, and add up the line count.
# When a file has been removed, its contribution from the total file
# count must be removed completely.
#
sub analyse_statistics
{
    # Keep a record of what files are present when gathering statistics.
    my %files_present = ();

    # Keep a record of the current revision a file has when gathering
    # statistics.  Their initial revisions will be the revision that
    # they branched from if we are doing statistics on a branch.
    my %file_revision = ();

    # Keep a record of the current state of a file when gathering
    # statistics.
    my %file_state = ();

    # Keep a record of per-user line counts;
    my %user_total = ();

    # A hash of filenames to a hash of usernames to linecount totals.  This
    # is maintained as the statistics are processed, so that when files are
    # removed or re-added, all of the individual contributions are properly
    # maintained to the correct authors.
    my %file_author_linecount = ();

    if ($branch_tag ne "")
    {
	foreach $file ( keys %file_branch_number )
	{
	    $file_branch_number{$file} =~ /^([\d\.]+)\.\d+$/;
	    my $base_revision = $1;
	    $file_revision{$file} = $base_revision;
	    if ($file_version_state{$file}{$base_revision} ne "dead")
	    {
		$files_present{$file} = 1;
	    }
	}
    }

    # Go through the records in order of earliest to latest.
    foreach $date ( sort keys %line_stats )
    {
	# Investigate what CVS operations occured on this date.  Record
	# individual author contributions.
	foreach $file ( keys %{ $line_stats{$date} } )
	{
	    # Update the current revision the file has.
	    $file_revision{$file} = $revision_stats{$date}{$file};
	    $author = $author_stats{$date}{$file};

	    # Record if a file was removed or not.
	    if ($state_stats{$date}{$file} eq "dead")
	    {
		delete $files_present{$file};

		# For each author contribution made to this file, deduct them
		# now.
		foreach my $innerauthor (keys %{ $file_author_linecount{$file} })
		{
		    my $amount = $file_author_linecount{$file}{$innerauthor};
		    $user_total{$innerauthor} -= $amount;
		    $total_user_lines{$innerauthor}{$date} = $user_total{$innerauthor};
		}
	    }
	    else
	    {
		$files_present{$file} = 1;

		# If the file was removed in the previous commit, need to add
		# the entire line count broken amongst the appropriate authors,
		# otherwise just add the increment.
		my $state = $file_state{$file};
		if (defined $state && $state eq "dead")
		{
		    # For each author contribution made to this file, add them
		    # to the appropriate authors now.
		    foreach my $innerauthor (keys %{ $file_author_linecount{$file} })
		    {
			my $amount = $file_author_linecount{$file}{$innerauthor};
			$user_total{$innerauthor} += $amount;
			$total_user_lines{$innerauthor}{$date} = $amount;
		    }
		}

		# Now add in the increment done in this commit.
		$user_total{$author} += $line_stats{$date}{$file};
		$file_author_linecount{$file}{$author} +=
		    $line_stats{$date}{$file};
		$total_user_lines{$author}{$date} = $user_total{$author};
	    }

	    # Record the current state of this file.
	    $file_state{$file} = $state_stats{$date}{$file};
	}

	# Calculate the total number of files present on this date.
	$total_files{$date} = scalar keys %files_present;

	# Count the total number of lines present for the current file set.
	my $total_line_count = 0;
	foreach $file ( keys %file_revision )
	{
	    if (defined $files_present{$file})
	    {
		$total_line_count += get_line_count($file, $file_revision{$file});
	    }
	}
	$total_lines{$date} = $total_line_count;
    }

    # Filter out those entries to only contain what the user specified
    # in the date interval.  This could be done far more efficiently, but
    # for now...
    foreach $date ( sort keys %total_lines )
    {
	$current_date = Date::Manip::ParseDate($date);
	if (($start_date ne "" &&
	     Date::Manip::Date_Cmp($current_date, $start_date) < 0) ||
	    ($end_date ne "" &&
	     Date::Manip::Date_Cmp($current_date, $end_date) > 0))
	{
	    # This date is before the start date specified by the user, or
	    # this date is after the end date specified by the user.
	    # Delete it.
	    delete $total_lines{$date};
	    delete $total_files{$date};

	    # Also handle per-user date filtering.
	    foreach $author ( keys %total_user_lines )
	    {
		delete $total_user_lines{$author}{$date};
	    }
	}
    }
}

###############################################################################
# Generate the data files into the specified locations.  Only write out
# those entries within the user's specified date interval.  Note, this could
# be handled far more efficiently, but for now...
#
sub generate_data_files
{
    # Create a map from user names to ids, and likewise for groups, so that
    # each distinct user/group is mapped to an id, which is used later for
    # Gnuplot.
    my %user_map = ();
    my %group_map = ();
    my $index = 0;
    foreach my $user ( sort keys %total_user_lines )
    {
	$user_map{$user} = $index++;

	# If groups are enabled, and the default group is specified, check if
	# this user needs to be added to it.
	if ($default_user_group ne '')
	{
	    my $found = 0;
	    foreach my $group ( sort keys %usergroups )
	    {
		if (exists $usergroups{$group}{$user})
		{
		    $found = 1;
		    last;
		}
	    }
	    if ($found == 0)
	    {
		# Add to default group.
		$usergroups{$default_user_group}{$user} = 1;
	    }
	}
    }
    $index = 0;
    foreach my $group ( sort keys %usergroups )
    {
	$group_map{$group} = $index++;
    }

    # Keep a running record of total linecounts for each group.
    my %total_group_lines = ();

    # Determine if authors are being filtered.
    my $filtering_users = scalar keys %filterusers != 0;
    my $filtering_groups = scalar keys %usergroups != 0;

    # Write out the data for each commit in date order.
    foreach $date ( sort keys %total_lines )
    {
	print LINEDATA "$date $total_lines{$date}\n";
	print FILEDATA "$date $total_files{$date}\n";

	# Check if there is any author data to output.
	if ($userdata ne "")
	{
	    foreach my $author ( sort keys %user_map )
	    {
		# Only output if it is an author of interest.
		next if ($filtering_users && $default_user_group eq '' &&
			 ! exists $filterusers{$author});

		# Update author/group data if defined.
		my $data = $total_user_lines{$author}{$date};
		if (defined $data)
		{
		    if ($filtering_groups)
		    {
			# Find out the group membership for this author.
			foreach my $group ( sort keys %usergroups )
			{
			    if (exists $usergroups{$group}{$author})
			    {
				# Author is a member of this group, update the
				# group stats and update the total count.
				$total_group_lines{$group}{$author} = $data;
				my $total = 0;
				foreach my $user (keys %{ $total_group_lines{$group} })
				{
				    $total += $total_group_lines{$group}{$user};
				}
				print USERDATA "$date $group " .
				               "$group_map{$group} " .
				               "$total\n";
			    }
			}
		    }
		    else
		    {
			# Output author information.
			print USERDATA "$date $author " .
			               "$user_map{$author} $data\n";
		    }
		}
	    }
	}
    }
    close LINEDATA;
    close FILEDATA;
    close USERDATA if $userdata ne "";
}

###############################################################################
# Generate the gnuplot data files into the specified locations.
#
# useful reference : http://www.cs.uni.edu/Help/gnuplot/TOC.html
#
sub generate_plots
{
    my $linestyle_command = "";
    if ($use_linestyle)
    {
	$linestyle_command = "set data style lines";
    }

    if ($use_gnuplot)
    {
	# Generate the gnuplot command scripts to build the necessary images.

	if ($gnuplot_linefiledata)
	{
	    # Produce a combined plot if the user has specified to build one.
	    $command = <<EOF;

set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set title 'CVS Line/File Statistics'
set key left top
set ylabel 'Number of lines'
set yrange [0:*]
set ytics
set y2label 'Number of files'
set y2range [0:*]
set y2tics
set terminal $gnuplot_setterm
set output '$gnuplot_linefiledata'
$linestyle_command
$gnuplot_command
plot '$linedata' using 1:3 axes x1y1 title 'Lines',\\
     '$filedata' using 1:3 axes x1y2 title 'Files'
exit
EOF

            # Pipe this command into gnuplot.
            print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
	    open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
            print GNUPLOT $command;
            close GNUPLOT;
        }

        # Produce two separate plots, one for the file data, one for the
        # line data.
        $command = <<EOF;

set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set ylabel 'Number of lines'
set title 'CVS Line Statistics'
set yrange [0:*]
set nokey
set terminal $gnuplot_setterm
set output '$gnuplot_linedata'
$linestyle_command
$gnuplot_command
plot '$linedata' using 1:3
set title 'CVS File Statistics'
set ylabel 'Number of files'
set yrange [0:*]
set output '$gnuplot_filedata'
$gnuplot_command
plot '$filedata' using 1:3
exit
EOF

        # Pipe this command into gnuplot.
        print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
	open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
        print GNUPLOT $command;
        close GNUPLOT;
    }

    if ($gnuplot_userdata)
    {
        # Now build up the plot command, for the specified users.
	# Determine if authors are being filtered.
        my $filtering_users = scalar keys %filterusers != 0;
	my $filtering_groups = scalar keys %usergroups != 0;
	my $title = 'Per-' . ($filtering_groups ? 'group' : 'user') .
	    ' CVS Line Statistics';

	# Produce a plot containing a list of per-user stats.
	$command = <<EOF;

set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set ylabel 'Number of lines'
set title '$title'
set yrange [0:*]
set key top left
set terminal $gnuplot_setterm
set output '$gnuplot_userdata'
$linestyle_command
$gnuplot_command
EOF
    
	$command .= "\nplot " ;
        my $index = 0;
	if ($filtering_groups)
	{
	    foreach my $group ( sort keys %usergroups )
	    {
		$command .= ", " if ($index != 0);
		$command .= "'$userdata' using 1:(\$4 == $index ? \$5 : 1/0) t '$group'";
		$index++;
	    }
	}
	else
	{
	    foreach my $user ( sort keys %total_user_lines )
	    {
		if (! $filtering_users || exists $filterusers{$user})
		{
		    $command .= ", " if ($index != 0);
		    $command .= "'$userdata' using 1:(\$4 == $index ? \$5 : 1/0) t '$user'";
		}
		$index++;
	    }
	}
	$command .= "\nexit\n";

	# Pipe this command into gnuplot.
        print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
	open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
        print GNUPLOT $command;
        close GNUPLOT;
    }
}

###############################################################################
# Process the command line arguments and perform sanity checks.
#
sub process_command_line_arguments
{
    for ($i = 0; $i <= $#ARGV; )
    {
	if ($ARGV[$i] eq "-debug")
	{
	    $debug = 1;
	    $i++;
	}
	elsif ($ARGV[$i] eq "-cvs-global-args")
	{
	    $cvs_global_args = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-linestyle")
	{
	    $use_linestyle = 1;
	    $i++;
	}
	elsif ($ARGV[$i] eq "-countchangedlines")
	{
	    $count_lines_changed = 1;
	    $i++;
	}
	elsif ($ARGV[$i] eq "-include")
	{
	    $pattern_include[++$#pattern_include] = 1;
	    $pattern_regexp[++$#pattern_regexp] = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-exclude")
	{
	    $pattern_include[++$#pattern_include] = 0;
	    $pattern_regexp[++$#pattern_regexp] = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-branch")
	{
	    $branch_tag = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-start")
	{
	    $start_date = Date::Manip::ParseDate($ARGV[$i+1]);
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-end")
	{
	    $end_date = Date::Manip::ParseDate($ARGV[$i+1]);
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-cvsdir")
	{
	    $cvsdir = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-rlog")
	{
	    $rlog_module = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-linedata")
	{
	    $linedata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-filedata")
	{
	    $filedata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-userdata")
	{
	    $userdata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-userlist")
	{
	    my @userlist = ();
	    my $groupname = undef;
	    if ($ARGV[$i+1] =~ /^(\w+)=(.*)$/)
	    {
		# A list of users assigned to a group.
		$groupname = $1;
		@userlist = split /,/, $2;
	    }
	    else
	    {
		# Just a list of individual users to run over.
		@userlist = split /,/, $ARGV[$i+1];
	    }
	    foreach my $username (@userlist)
	    {
		$filterusers{$username} = 1;
		$usergroups{$groupname}{$username} = 1 if defined $groupname;
	    }
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-defaultusergroup")
	{
	    $default_user_group = $ARGV[$i+1];
	    $usergroups{$default_user_group} = ();
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplot")
	{
	    $use_gnuplot = 1;
	    $gnuplot = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotlinedata")
	{
	    $use_gnuplot = 1;
	    $gnuplot_linedata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotfiledata")
	{
	    $use_gnuplot = 1;
	    $gnuplot_filedata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotuserdata")
	{
	    $use_gnuplot = 1;
	    $gnuplot_userdata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotlinefiledata")
	{
	    $use_gnuplot = 1;
	    $gnuplot_linefiledata = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotsetterm")
	{
	    $use_gnuplot = 1;
	    $gnuplot_setterm = $ARGV[$i+1];
	    $i += 2;
	}
	elsif ($ARGV[$i] eq "-gnuplotcommand")
	{
	    $use_gnuplot = 1;
	    $gnuplot_command = $ARGV[$i+1];
	    $i += 2;
	}
	else
	{
	    print "Unrecognized option: $ARGV[$i]\n";
	    usage();
	}
    }

    # If any -include or -exclude options have been specified, check that
    # there is at least one -include option, otherwise the resulting
    # file set will be empty.
    if ($#pattern_include >= 0)
    {
	$found_include = 0;
	for ($i = 0; $i <= $#pattern_include; $i++)
	{
	    if ($pattern_include[$i])
	    {
		$found_include = 1;
		last;
	    }
	}
	if ($found_include == 0)
	{
	    print "error: empty file set specified: ";
	    print "missing -include option\n";
	    print " You probably want to add \"-include \'.*\'\"";
	    print " to the end of your command.\n";
	    exit 1;
	}
    }

    # Check the mandatory arguments have been set.
    if ($cvsdir eq "" || $linedata eq "" || $filedata eq "")
    {
	print "error: Not all mandatory arguments specified.\n\n";
	usage();
    }


    # The line and file (or linefile) gnuplot options need to be set if a
    # gnuplot option is used.
    if ($use_gnuplot)
    {
	if ($gnuplot_linedata eq "" || $gnuplot_filedata eq "")
	{
	    print "error: Both the -gnuplotlinedata and -gnuplotfiledata ";
	    print "options must be specified if using gnuplot.\n\n";
	    usage();
	}
    }

    # If -userlist has been specified, make sure -userdata has.
    if (scalar keys %filterusers != 0 && $userdata eq "")
    {
	print "error: -userdata needs to be specified if -userdata is.\n\n";
	usage();
    }

    # If -gnuplotuseruserdata has been specified, make sure -userdata has.
    if ($gnuplot_userdata ne "" && $userdata eq "")
    {
	print "error: -userdata needs to be specified if -gnuplotuserdata is.\n\n";
	usage();
    }

    # If both the start and end dates are specified, check that the start date
    # occurs before the end date.
    if ($start_date ne "" && $end_date ne "" &&
	&Date_Cmp($start_date, $end_date) >= 0)
    {
	print "error: Start date specified must occur before the end date.\n\n";
	usage();
    }

    # If the -rlog option has been specified, need to make sure that the
    # CVS version install is >= 1.11.1, as it is not supported in earlier
    # versions.
    if ($rlog_module ne "")
    {
	my $WTR = gensym();
	my $RDR = gensym();
	my $ERR = gensym();
	my $pid = open3($WTR, $RDR, $ERR, "cvs $cvs_global_args rlog");
	my $deprecated_found = 0;
        while (<$ERR>)
        {
            $deprecated_found = 1 if (/deprecated/);
        }
        close $WTR;
        close $RDR;
        close $ERR;
        waitpid $pid, 0;
	if ($deprecated_found)
        {
	    print "error: -rlog option requires CVS version >= 1.11.1\n\n";
	    exit 1;
        }
    }

    # Open the specified output files.
    open (LINEDATA, ">$linedata")
	|| die "Failed to create file \"$linedata\": $!";

    open (FILEDATA, ">$filedata")
	|| die "Failed to create file \"$filedata\": $!";
    
    if ($userdata ne "")
    {
	open (USERDATA, ">$userdata")
	    || die "Failed to create file \"$userdata\": $!";
    }
}
	    
###############################################################################
# Print out a usage message.
#
sub usage
{
    print "cvsplot version 1.7.4 - ";
    print "Copyright David Sitsky: sits\@users.sourceforge.net\n\n";
    print "cvsplot collects statistics from CVS controlled files.\n\n";
    print "usage:\n";
    print "cvsplot.pl -cvsdir <dir> [-rlog <module>] -linedata <file> -filedata <file>\n";
    print "           [-userdata <file>] [-userlist [groupname=]<user1,user2,...>]*\n";
    print "           [-defaultusergroup <groupname>]\n";
    print "           [-cvs-global-args <global-args>]\n";
    print "           [-countchangedlines]\n";
    print "           [-branch <branch name>] [-start <date>] [-end <date>]\n";
    print "           {-include <regexp> | -exclude <regexp>} [-linestyle]\n";
    print "           [-gnuplotlinedata <output file for line data>\n";
    print "            -gnuplotfiledata <output file for file data>\n";
    print "            [-gnuplot <path to gnuplot binary>]\n";
    print "            [-gnuplotlinefiledata <output file combined line and file data>]\n";
    print "            [-gnuplotuserdata <output file for user data>\n";
    print "            [-gnuplotsetterm <gnuplot set terminal expression>]\n";
    print "            [-gnuplotcommand <general gnuplot command>]]\n\n";
    print "See http://cvsplot.sourceforge.net for updates.\n";
    exit 1;
}
syntax highlighted by Code2HTML, v. 0.9.1