#!/usr/bin/env perl -w
###############################################################################
# cvsplot: Copyright (c) 2001, 2002, 2003, 2004 David Sitsky.
# All rights reserved.
#
# cvsplot is a perl script which is used to extract information from CVS and
# plots the total number of lines and number of files in a selected file set
# against time.
#
# File sets can be specified using regular expressions.
# The start and end dates may also be specified.
#
# This program is free software; you can redistribute it and modify it under
# the terms of the GPL.
use Config;
use Cwd;
use Symbol;
use IPC::Open3;
# Whether debugging is enabled or not.
$debug = 0;
# Additional global arguments to use with CVS commands.
$cvs_global_args = "";
# The start date in which to gather statistics.
$start_date = "";
# The final date in which to gather statistics.
$end_date = "";
# Indicate whether to count the number of lines _changed_, or the
# the number of lines added.
$count_lines_changed = 0;
# The directory is which to gather the cvs statistics (where the cvs log
# command is run from), or the directory of the CVS repository, if the
# -rlog option is used.
$cvsdir = "";
# The module to run cvs rlog over, if the -rlog option is specified.
$rlog_module = "";
# The branch that we are collecting statistics from. By default, the main
# branch is used.
$branch_tag = "";
# Parallel arrays of file patterns which indicate whether it is an include
# or exclude pattern, and what the regular expression is.
@pattern_include = ();
@pattern_regexp = ();
# Where the number of lines statistics will be stored.
$linedata = "";
# Where the number of files statistics will be stored.
$filedata = "";
# Where the per-user line statistics will be stored.
$userdata = "";
# A set of users whose data needs to be examined individually.
%filterusers = ();
# A map of a set of group to user memberships.
%usergroups = ();
# The name of the default user group (if any).
$default_user_group = "";
# A hash (by date) of a hash (by filename) of lines added.
%line_stats = ();
# A hash (by date) of a hash (by filename) of the status.
%state_stats = ();
# A hash (by date) of a hash (by filename) of the revision.
%revision_stats = ();
# A hash (by date) of a hash (by filename) of the author.
%author_stats = ();
# A hash (by filename) of a hash (by version) of lines added.
%file_version_delta = ();
# A hash (by filename) of a hash (by version) of the file state.
%file_version_state = ();
# A hash (by filename) of a hash (by version) of the author.
%file_version_author = ();
# A hash (by filename) of the magic branch number.
%file_branch_number = ();
# A hash (by filename) of the number of branch revisions made.
%file_number_branch_revisions = ();
# A hash (by date) of the total number of lines.
%total_lines = ();
# A hash (by date) of the total number of files.
%total_files = ();
# A hash (by date) of a hash (by user) of their total number of lines.
%total_user_lines = ();
# Flag to indicate if gnuplot is to be used.
$use_gnuplot = 0;
# The location of the gnuplot binary. It is assumed to be in the
# path, but may be over-ridden on the command-line with the
# -gnuplot argument. For Window platforms, this is useful, since
# the name of the binary is not "gnuplot", but often "wgnupl32"
# or "pgnuplot".
$gnuplot = "gnuplot";
# The gnuplot output filename to write the output of the line data.
$gnuplot_linedata = "";
# The gnuplot output filename to write the output of the file data.
$gnuplot_filedata = "";
# The gnuplot output filename to write the output of the user data.
$gnuplot_userdata = "";
# The gnuplot "set term" expression to use when generating output.
# By default, generate colour png files.
$gnuplot_setterm = "png color";
# A general gnuplot command that can be executed to change some
# aspect of the plotting command, such as the format of the x values.
$gnuplot_command = "";
# Flag to indicate if lines are to be used in the final plot, rather
# than discrete points. For large plots, this can sometimes improve
# readability.
$use_linestyle = 0;
# Determine if this process is running under Windows.
$osname = $Config{'osname'};
$windows = (defined $osname && $osname eq "MSWin32") ? 1 : 0;
check_missing_modules();
process_command_line_arguments();
get_cvs_statistics();
analyse_statistics();
generate_data_files();
generate_plots();
###############################################################################
# Utility method for quoting an argument for a shell command. ShellQuote
# is good for UNIX boxes, but doesn't work for DOS platforms as it uses
# single quotes, while DOS needs double quotes. Its a shame shell_quote
# isn't cross-platform.
sub quote
{
my ($arg) = @_;
if ($windows)
{
return "\"$arg\"";
}
else
{
String::ShellQuote::shell_quote($arg);
}
}
###############################################################################
# Method for writing out help if modules are missing.
sub check_missing_modules
{
my @missing = ();
# Load the Date::Manip module.
eval
{
require Date::Manip;
};
if ($@)
{
push @missing, 'Date::Manip';
}
# Load the String::ShellQuote module for UNIX platforms.
eval
{
if (! $windows)
{
require String::ShellQuote;
}
};
if ($@)
{
push @missing, 'String::ShellQuote';
}
# Check if there are any missing modules.
return if $#missing == -1;
# First, output the generic "missing module" message.
print "\n";
print "Cvsplot requires some Perl modules which are missing " .
"from your system.\n";
if ($windows) {
print "These can be installed by issuing the following commands:\n\n";
foreach my $module (@missing) {
$module =~ s/:://g;
print 'C:\> ' . "ppm install $module\n";
}
print "\n";
}
else
{
print "They can be installed by running (as root) the following:\n";
foreach my $module (@missing) {
print " perl -MCPAN -e 'install \"$module\"'\n";
}
print "\n";
print "Modules can also be downloaded from http://www.cpan.org.\n\n";
}
exit;
}
###############################################################################
# Check whether the supplied file is to be examined or not depending on what
# the user set for the -include and -exclude options. Return true if the
# file is to be included. If no -include or -exclude options have been
# set by the user, return true by default.
#
sub include_file
{
my ($filename) = @_;
# If there are no settings, include everything.
if ($#pattern_regexp == -1)
{
return 1;
}
# Go through the pattern_regexp array, and see if there is any matches.
for ($i = 0; $i <= $#pattern_regexp; $i++)
{
if ($filename =~ /$pattern_regexp[$i]/)
{
# Got a match, return whether or not the file should be included
# or not.
return $pattern_include[$i];
}
}
# No matches, don't include this file.
return 0;
}
###############################################################################
# Using "cvs log" and a few other commands, gather all of the necessary
# statistics.
#
sub get_cvs_statistics
{
if ($debug && defined $osname)
{
print "Platform is $osname\n";
}
# Explicitly set the timezone for window platforms, so that DateManip
# works.
if ($windows)
{
$ENV{TZ} = "C";
}
my $working_file = "";
my $relative_working_file = "";
my $working_cvsdir = "";
my $search_file = 0;
# Change to the directory nominated by $cvsdir, and save the current
# directory, only if we aren't using the -rlog option.
if ($rlog_module eq "")
{
$saved_cwd = cwd();
chdir $cvsdir || die "Failed to change to directory \"$cvsdir\": $!";
}
else
{
# Remove the accessor part, and just get the pathname.
$cvsdir =~ /([^:]+)$/;
$working_cvsdir = $1;
print "Got working_cvsdir as $working_cvsdir\n" if $debug;
# Since this is used in a regexp below, need to make sure DOS pathnames
# are correctly matched against.
$working_cvsdir =~ s/\\/\\\\/g;
}
# Flag to indicate what the state is when parsing the output from cvs log.
# true indicates that the parser is waiting for the start of a cvs log
# entry.
$search_file = 1;
# Build up the command string appropriately, depending on what options
# have been set.
my $command = ($rlog_module eq '') ? "cvs $cvs_global_args log" :
sprintf("cvs $cvs_global_args -d %s rlog %s",
quote($cvsdir), quote($rlog_module));
print "Executing \"$command\"\n" if $debug;
open (CVSLOG, "$command |") || die "Couldn't execute \"$command\"";
while (<CVSLOG>)
{
if ($search_file == 1)
{
# Need to locate the name of the working file
if (/^RCS file: (.*),v$/)
{
$working_file = $1;
$working_file =~ s/Attic\///g;
$relative_working_file = "";
# Check if this file is to be included or not.
if (include_file($working_file))
{
# Yep, search for more details on this file.
$search_file = 0;
if ($branch_tag eq "")
{
# Main branch to be investigated only.
$file_branch_number{$working_file} = "1";
$file_number_branch_revisions{$working_file} = 0;
}
print "Including file \"$working_file\"\n" if $debug;
}
else
{
print "Excluding file \"$working_file\"\n" if $debug;
}
}
}
else
{
# Collective the relative part for those runs that don't use
# -rlog.
if (/^Working file: (.*)$/)
{
$relative_working_file = $1;
}
# Handle repositories working off an explicit numbering scheme,
# such as 8.1. Only do this if the user hasn't specified an
# explicit branch to gather statistics over. In most cases,
# the result will still be 1, but this handles the stranger
# repositories out there.
elsif ($branch_tag eq "" && /^head: (\d+)\./) {
$file_branch_number{$working_file} = $1;
}
# If we are collecting statistics on a branch, determine the magic
# branch number for this file.
elsif ( (! defined $file_branch_number{$working_file}) &&
(/^\s*${branch_tag}: ([\d\.]+)\.0\.(\d+)$/) )
{
$file_branch_number{$working_file} = "${1}.${2}";
$file_number_branch_revisions{$working_file} = 0;
if ($debug)
{
print "Got branch $file_branch_number{$working_file}";
print " for file \"$working_file\"\n";
}
}
elsif (/^keyword substitution: b$/)
{
# This is a binary file, ignore it.
undef($file_branch_number{$working_file});
undef($file_number_branch_revisions{$working_file});
$search_file = 1;
print "Excluding binary file \"$working_file\"\n" if $debug;
}
elsif (/^=============================================================================$/)
{
# End of the log entry for this file, start parsing for the
# next file.
$search_file = 1;
next;
}
elsif (/^----------------------------$/)
{
# Matched the description separator. If a branch has been
# specified, but this file doesn't exist on it, skip this file.
if (($branch_tag ne "") &&
(! defined $file_branch_number{$working_file}))
{
if ($debug)
{
print "File \"$working_file\" not on branch\n";
}
$search_file = 1;
next;
}
# Read the revision line, and record the appropriate
# information.
$_ = <CVSLOG>;
if (/^revision ([\d\.]+)$/)
{
# Record the revision, and whether it is part of the tag
# of interest.
$revision = $1;
if ($revision =~
/^$file_branch_number{$working_file}\.\d+$/)
{
$file_on_branch = 1;
$file_number_branch_revisions{$working_file}++;
}
else
{
$file_on_branch = 0;
}
if ($debug)
{
print "Got branch number: $file_branch_number{$working_file} rev $revision on branch: $file_on_branch\n";
}
}
else
{
# Problem in parsing, skip it.
print "Couldn't parse line: $_\n";
$search_file = 1;
next;
}
$_ = <CVSLOG>; # Read the "date" line.
if (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: (.*);.*lines: \+(\d+) \-(\d+).*$/)
{
# Note for some CVS clients, state dead is presented in
# this this way, as the following pattern.
$date = $1;
$author = $2;
$users{$author} = 1;
$state = $3;
$lines_added = $4;
$lines_removed = $5;
$number_lines = $count_lines_changed ?
$lines_added + $lines_removed : $lines_added - $lines_removed;
$file_version_delta{$working_file}{$revision} =
$number_lines;
$file_version_state{$working_file}{$revision} = $state;
$file_version_author{$working_file}{$revision} = $author;
if ($file_on_branch)
{
# This revision lives on the branch of interest.
$line_stats{$date}{$working_file} += $number_lines;
$state_stats{$date}{$working_file} = $state;
$revision_stats{$date}{$working_file} = $revision;
$author_stats{$date}{$working_file} = $author;
}
}
elsif (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: dead;.*$/)
{
# File has been removed.
$date = $1;
$author = $2;
$users{$author} = 1;
$file_version_delta{$working_file}{$revision} = 0;
$file_version_state{$working_file}{$revision} = "dead";
$file_version_author{$working_file}{$revision} = $author;
if ($file_on_branch)
{
$line_stats{$date}{$working_file} = 0;
$state_stats{$date}{$working_file} = "dead";
$revision_stats{$date}{$working_file} = $revision;
$author_stats{$date}{$working_file} = $author;
}
}
elsif (/^date: (\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d); .* author: (.*); .* state: ([^;]*);.*$/)
{
$date = $1;
$author = $2;
$users{$author} = 1;
$state = $3;
# Unfortunately, cvs log doesn't indicate the number of
# lines an initial revision is created with, so find this
# out using the following cvs command. Note the regexp
# below has an optional drive delimeter to support DOS
# installations.
my $lccmd = "";
if ($rlog_module ne "")
{
print "Working cvsdir is: $working_cvsdir working file: $working_file\n" if $debug;
# For DOS-based repositories, the filename may contain
# a drive letter. Also need to be flexible with the
# pathname separator.
if (! ($working_file =~ /^([A-z]:)?${working_cvsdir}[\/\\](.*)$/))
{
print STDERR "-cvsdir argument $working_cvsdir doesn't match ";
print STDERR "repository filename prefix $working_file\n";
print STDERR "Please correct your -cvsdir argument and try again\n";
exit 1;
}
$lccmd = sprintf("cvs $cvs_global_args -d %s co -r %s -p %s",
quote($cvsdir),
quote($revision),
quote($2));
}
else
{
$lccmd = sprintf("cvs $cvs_global_args update -r %s -p %s",
quote($revision),
quote($relative_working_file));
}
print "Executing $lccmd\n" if $debug;
my $WTR = gensym();
my $RDR = gensym();
my $ERR = gensym();
my $pid = open3($WTR, $RDR, $ERR, $lccmd);
for ($number_lines = 0; defined <$RDR>; $number_lines++) {}
close ($RDR);
my $error_string = "";
while (<$ERR>)
{
$error_string .= $_;
}
waitpid $pid, 0;
if ($?) {
print "CVS command failed: \"$lccmd\" status $?\n";
print "$error_string\n";
exit 1;
}
print "$working_file 1.1 = $number_lines lines\n" if $debug;
$file_version_delta{$working_file}{$revision} =
$number_lines;
$file_version_state{$working_file}{$revision} = $state;
$file_version_author{$working_file}{$revision} = $author;
if ($file_on_branch)
{
$line_stats{$date}{$working_file} += $number_lines;
$state_stats{$date}{$working_file} = "Exp";
$revision_stats{$date}{$working_file} = $revision;
$author_stats{$date}{$working_file} = $author;
}
}
else
{
print "Couldn't parse: $_";
}
if ($debug)
{
print "File \"$working_file\" rev $revision ";
print "delta $file_version_delta{$working_file}{$revision} ";
print "state $file_version_state{$working_file}{$revision}\n";
print "author $file_version_author{$working_file}{$revision}\n";
}
}
}
}
close(CVSLOG);
# Go back to the original directory if we aren't using the -rlog option.
if ($rlog_module eq "")
{
chdir $saved_cwd;
}
}
# Variable to store results when calling get_line_count.
%memorise_line_count = ();
###############################################################################
# Return the number of lines that constitute a particular revision of a file.
#
sub get_line_count
{
my ($filename, $revision) = @_;
my $count = get_line_count_inner($filename, $revision);
# Store this result for future intermediate calculations.
$memorise_line_count{$filename}{$revision} = $count;
if ($debug)
{
print "get_line_count($filename, $revision) = $count\n";
}
return $count;
}
sub get_line_count_inner
{
my ($filename, $revision) = @_;
my $count = 0;
my $finished = 0;
while (!$finished)
{
if (defined $memorise_line_count{$filename}{$revision})
{
$count += $memorise_line_count{$filename}{$revision};
$finished = 1;
}
elsif (! defined($file_version_state{$filename}{$revision}))
{
# Case where we are looking for a revision that hasn't
# been found in the output of the CVS log command. This is
# usually because a developer decided to start the file
# revision at something other than 1.1.
$memorise_line_count{$filename}{$revision} = 0;
$finished = 1;
}
elsif ($revision eq "1.1")
{
# Base case where the revision is 1.1
$memorise_line_count{$filename}{$revision} =
$file_version_delta{$filename}{$revision};
$count += $memorise_line_count{$filename}{$revision};
$finished = 1;
}
elsif ($file_version_state{$filename}{$revision} eq "dead")
{
# Case where file has been removed. The file count is
# effectively the previous version's count.
$revision =~ /^([\d\.]+)\.(\d+)$/;
$previous_subrevision = $2 - 1;
$previous_revision = "${1}.${previous_subrevision}";
$revision = $previous_revision;
}
elsif ($revision =~ /^([\d\.]+)\.\d+\.1$/)
{
# Case where need to decend down branch point and find the
# contributions made there.
$branch_point_revision = $1;
if (! defined($file_version_delta{$filename}{$revision}))
{
print "file_version_data not defined for $filename $revision\n";
}
$count += $file_version_delta{$filename}{$revision};
$revision = $branch_point_revision;
}
elsif ($revision =~ /^([\d\.]+)\.(\d+)$/)
{
# Need to determine previous revision number + this revision's
# contribution.
$previous_subrevision = $2 - 1;
$previous_revision = "${1}.${previous_subrevision}";
if (! defined($file_version_delta{$filename}{$revision}))
{
print "[2] file_version_data not defined for $filename $revision\n";
}
$count += $file_version_delta{$filename}{$revision};
$revision = $previous_revision;
}
else
{
print "Unhandled case for file $filename revision $revision\n";
exit 0;
}
}
return $count;
}
###############################################################################
# Sum up those entries with the same date, and add up the line count.
# When a file has been removed, its contribution from the total file
# count must be removed completely.
#
sub analyse_statistics
{
# Keep a record of what files are present when gathering statistics.
my %files_present = ();
# Keep a record of the current revision a file has when gathering
# statistics. Their initial revisions will be the revision that
# they branched from if we are doing statistics on a branch.
my %file_revision = ();
# Keep a record of the current state of a file when gathering
# statistics.
my %file_state = ();
# Keep a record of per-user line counts;
my %user_total = ();
# A hash of filenames to a hash of usernames to linecount totals. This
# is maintained as the statistics are processed, so that when files are
# removed or re-added, all of the individual contributions are properly
# maintained to the correct authors.
my %file_author_linecount = ();
if ($branch_tag ne "")
{
foreach $file ( keys %file_branch_number )
{
$file_branch_number{$file} =~ /^([\d\.]+)\.\d+$/;
my $base_revision = $1;
$file_revision{$file} = $base_revision;
if ($file_version_state{$file}{$base_revision} ne "dead")
{
$files_present{$file} = 1;
}
}
}
# Go through the records in order of earliest to latest.
foreach $date ( sort keys %line_stats )
{
# Investigate what CVS operations occured on this date. Record
# individual author contributions.
foreach $file ( keys %{ $line_stats{$date} } )
{
# Update the current revision the file has.
$file_revision{$file} = $revision_stats{$date}{$file};
$author = $author_stats{$date}{$file};
# Record if a file was removed or not.
if ($state_stats{$date}{$file} eq "dead")
{
delete $files_present{$file};
# For each author contribution made to this file, deduct them
# now.
foreach my $innerauthor (keys %{ $file_author_linecount{$file} })
{
my $amount = $file_author_linecount{$file}{$innerauthor};
$user_total{$innerauthor} -= $amount;
$total_user_lines{$innerauthor}{$date} = $user_total{$innerauthor};
}
}
else
{
$files_present{$file} = 1;
# If the file was removed in the previous commit, need to add
# the entire line count broken amongst the appropriate authors,
# otherwise just add the increment.
my $state = $file_state{$file};
if (defined $state && $state eq "dead")
{
# For each author contribution made to this file, add them
# to the appropriate authors now.
foreach my $innerauthor (keys %{ $file_author_linecount{$file} })
{
my $amount = $file_author_linecount{$file}{$innerauthor};
$user_total{$innerauthor} += $amount;
$total_user_lines{$innerauthor}{$date} = $amount;
}
}
# Now add in the increment done in this commit.
$user_total{$author} += $line_stats{$date}{$file};
$file_author_linecount{$file}{$author} +=
$line_stats{$date}{$file};
$total_user_lines{$author}{$date} = $user_total{$author};
}
# Record the current state of this file.
$file_state{$file} = $state_stats{$date}{$file};
}
# Calculate the total number of files present on this date.
$total_files{$date} = scalar keys %files_present;
# Count the total number of lines present for the current file set.
my $total_line_count = 0;
foreach $file ( keys %file_revision )
{
if (defined $files_present{$file})
{
$total_line_count += get_line_count($file, $file_revision{$file});
}
}
$total_lines{$date} = $total_line_count;
}
# Filter out those entries to only contain what the user specified
# in the date interval. This could be done far more efficiently, but
# for now...
foreach $date ( sort keys %total_lines )
{
$current_date = Date::Manip::ParseDate($date);
if (($start_date ne "" &&
Date::Manip::Date_Cmp($current_date, $start_date) < 0) ||
($end_date ne "" &&
Date::Manip::Date_Cmp($current_date, $end_date) > 0))
{
# This date is before the start date specified by the user, or
# this date is after the end date specified by the user.
# Delete it.
delete $total_lines{$date};
delete $total_files{$date};
# Also handle per-user date filtering.
foreach $author ( keys %total_user_lines )
{
delete $total_user_lines{$author}{$date};
}
}
}
}
###############################################################################
# Generate the data files into the specified locations. Only write out
# those entries within the user's specified date interval. Note, this could
# be handled far more efficiently, but for now...
#
sub generate_data_files
{
# Create a map from user names to ids, and likewise for groups, so that
# each distinct user/group is mapped to an id, which is used later for
# Gnuplot.
my %user_map = ();
my %group_map = ();
my $index = 0;
foreach my $user ( sort keys %total_user_lines )
{
$user_map{$user} = $index++;
# If groups are enabled, and the default group is specified, check if
# this user needs to be added to it.
if ($default_user_group ne '')
{
my $found = 0;
foreach my $group ( sort keys %usergroups )
{
if (exists $usergroups{$group}{$user})
{
$found = 1;
last;
}
}
if ($found == 0)
{
# Add to default group.
$usergroups{$default_user_group}{$user} = 1;
}
}
}
$index = 0;
foreach my $group ( sort keys %usergroups )
{
$group_map{$group} = $index++;
}
# Keep a running record of total linecounts for each group.
my %total_group_lines = ();
# Determine if authors are being filtered.
my $filtering_users = scalar keys %filterusers != 0;
my $filtering_groups = scalar keys %usergroups != 0;
# Write out the data for each commit in date order.
foreach $date ( sort keys %total_lines )
{
print LINEDATA "$date $total_lines{$date}\n";
print FILEDATA "$date $total_files{$date}\n";
# Check if there is any author data to output.
if ($userdata ne "")
{
foreach my $author ( sort keys %user_map )
{
# Only output if it is an author of interest.
next if ($filtering_users && $default_user_group eq '' &&
! exists $filterusers{$author});
# Update author/group data if defined.
my $data = $total_user_lines{$author}{$date};
if (defined $data)
{
if ($filtering_groups)
{
# Find out the group membership for this author.
foreach my $group ( sort keys %usergroups )
{
if (exists $usergroups{$group}{$author})
{
# Author is a member of this group, update the
# group stats and update the total count.
$total_group_lines{$group}{$author} = $data;
my $total = 0;
foreach my $user (keys %{ $total_group_lines{$group} })
{
$total += $total_group_lines{$group}{$user};
}
print USERDATA "$date $group " .
"$group_map{$group} " .
"$total\n";
}
}
}
else
{
# Output author information.
print USERDATA "$date $author " .
"$user_map{$author} $data\n";
}
}
}
}
}
close LINEDATA;
close FILEDATA;
close USERDATA if $userdata ne "";
}
###############################################################################
# Generate the gnuplot data files into the specified locations.
#
# useful reference : http://www.cs.uni.edu/Help/gnuplot/TOC.html
#
sub generate_plots
{
my $linestyle_command = "";
if ($use_linestyle)
{
$linestyle_command = "set data style lines";
}
if ($use_gnuplot)
{
# Generate the gnuplot command scripts to build the necessary images.
if ($gnuplot_linefiledata)
{
# Produce a combined plot if the user has specified to build one.
$command = <<EOF;
set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set title 'CVS Line/File Statistics'
set key left top
set ylabel 'Number of lines'
set yrange [0:*]
set ytics
set y2label 'Number of files'
set y2range [0:*]
set y2tics
set terminal $gnuplot_setterm
set output '$gnuplot_linefiledata'
$linestyle_command
$gnuplot_command
plot '$linedata' using 1:3 axes x1y1 title 'Lines',\\
'$filedata' using 1:3 axes x1y2 title 'Files'
exit
EOF
# Pipe this command into gnuplot.
print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
print GNUPLOT $command;
close GNUPLOT;
}
# Produce two separate plots, one for the file data, one for the
# line data.
$command = <<EOF;
set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set ylabel 'Number of lines'
set title 'CVS Line Statistics'
set yrange [0:*]
set nokey
set terminal $gnuplot_setterm
set output '$gnuplot_linedata'
$linestyle_command
$gnuplot_command
plot '$linedata' using 1:3
set title 'CVS File Statistics'
set ylabel 'Number of files'
set yrange [0:*]
set output '$gnuplot_filedata'
$gnuplot_command
plot '$filedata' using 1:3
exit
EOF
# Pipe this command into gnuplot.
print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
print GNUPLOT $command;
close GNUPLOT;
}
if ($gnuplot_userdata)
{
# Now build up the plot command, for the specified users.
# Determine if authors are being filtered.
my $filtering_users = scalar keys %filterusers != 0;
my $filtering_groups = scalar keys %usergroups != 0;
my $title = 'Per-' . ($filtering_groups ? 'group' : 'user') .
' CVS Line Statistics';
# Produce a plot containing a list of per-user stats.
$command = <<EOF;
set xdata time
set timefmt '%Y/%m/%d %H:%M:%S'
set format x '%m/%y'
set xlabel 'Date'
set ylabel 'Number of lines'
set title '$title'
set yrange [0:*]
set key top left
set terminal $gnuplot_setterm
set output '$gnuplot_userdata'
$linestyle_command
$gnuplot_command
EOF
$command .= "\nplot " ;
my $index = 0;
if ($filtering_groups)
{
foreach my $group ( sort keys %usergroups )
{
$command .= ", " if ($index != 0);
$command .= "'$userdata' using 1:(\$4 == $index ? \$5 : 1/0) t '$group'";
$index++;
}
}
else
{
foreach my $user ( sort keys %total_user_lines )
{
if (! $filtering_users || exists $filterusers{$user})
{
$command .= ", " if ($index != 0);
$command .= "'$userdata' using 1:(\$4 == $index ? \$5 : 1/0) t '$user'";
}
$index++;
}
}
$command .= "\nexit\n";
# Pipe this command into gnuplot.
print "Issuing command:\n\n$command\n\n to gnuplot: $gnuplot\n" if $debug;
open(GNUPLOT, "| $gnuplot") || die "Failed to start gnuplot: $!";
print GNUPLOT $command;
close GNUPLOT;
}
}
###############################################################################
# Process the command line arguments and perform sanity checks.
#
sub process_command_line_arguments
{
for ($i = 0; $i <= $#ARGV; )
{
if ($ARGV[$i] eq "-debug")
{
$debug = 1;
$i++;
}
elsif ($ARGV[$i] eq "-cvs-global-args")
{
$cvs_global_args = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-linestyle")
{
$use_linestyle = 1;
$i++;
}
elsif ($ARGV[$i] eq "-countchangedlines")
{
$count_lines_changed = 1;
$i++;
}
elsif ($ARGV[$i] eq "-include")
{
$pattern_include[++$#pattern_include] = 1;
$pattern_regexp[++$#pattern_regexp] = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-exclude")
{
$pattern_include[++$#pattern_include] = 0;
$pattern_regexp[++$#pattern_regexp] = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-branch")
{
$branch_tag = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-start")
{
$start_date = Date::Manip::ParseDate($ARGV[$i+1]);
$i += 2;
}
elsif ($ARGV[$i] eq "-end")
{
$end_date = Date::Manip::ParseDate($ARGV[$i+1]);
$i += 2;
}
elsif ($ARGV[$i] eq "-cvsdir")
{
$cvsdir = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-rlog")
{
$rlog_module = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-linedata")
{
$linedata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-filedata")
{
$filedata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-userdata")
{
$userdata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-userlist")
{
my @userlist = ();
my $groupname = undef;
if ($ARGV[$i+1] =~ /^(\w+)=(.*)$/)
{
# A list of users assigned to a group.
$groupname = $1;
@userlist = split /,/, $2;
}
else
{
# Just a list of individual users to run over.
@userlist = split /,/, $ARGV[$i+1];
}
foreach my $username (@userlist)
{
$filterusers{$username} = 1;
$usergroups{$groupname}{$username} = 1 if defined $groupname;
}
$i += 2;
}
elsif ($ARGV[$i] eq "-defaultusergroup")
{
$default_user_group = $ARGV[$i+1];
$usergroups{$default_user_group} = ();
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplot")
{
$use_gnuplot = 1;
$gnuplot = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotlinedata")
{
$use_gnuplot = 1;
$gnuplot_linedata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotfiledata")
{
$use_gnuplot = 1;
$gnuplot_filedata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotuserdata")
{
$use_gnuplot = 1;
$gnuplot_userdata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotlinefiledata")
{
$use_gnuplot = 1;
$gnuplot_linefiledata = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotsetterm")
{
$use_gnuplot = 1;
$gnuplot_setterm = $ARGV[$i+1];
$i += 2;
}
elsif ($ARGV[$i] eq "-gnuplotcommand")
{
$use_gnuplot = 1;
$gnuplot_command = $ARGV[$i+1];
$i += 2;
}
else
{
print "Unrecognized option: $ARGV[$i]\n";
usage();
}
}
# If any -include or -exclude options have been specified, check that
# there is at least one -include option, otherwise the resulting
# file set will be empty.
if ($#pattern_include >= 0)
{
$found_include = 0;
for ($i = 0; $i <= $#pattern_include; $i++)
{
if ($pattern_include[$i])
{
$found_include = 1;
last;
}
}
if ($found_include == 0)
{
print "error: empty file set specified: ";
print "missing -include option\n";
print " You probably want to add \"-include \'.*\'\"";
print " to the end of your command.\n";
exit 1;
}
}
# Check the mandatory arguments have been set.
if ($cvsdir eq "" || $linedata eq "" || $filedata eq "")
{
print "error: Not all mandatory arguments specified.\n\n";
usage();
}
# The line and file (or linefile) gnuplot options need to be set if a
# gnuplot option is used.
if ($use_gnuplot)
{
if ($gnuplot_linedata eq "" || $gnuplot_filedata eq "")
{
print "error: Both the -gnuplotlinedata and -gnuplotfiledata ";
print "options must be specified if using gnuplot.\n\n";
usage();
}
}
# If -userlist has been specified, make sure -userdata has.
if (scalar keys %filterusers != 0 && $userdata eq "")
{
print "error: -userdata needs to be specified if -userdata is.\n\n";
usage();
}
# If -gnuplotuseruserdata has been specified, make sure -userdata has.
if ($gnuplot_userdata ne "" && $userdata eq "")
{
print "error: -userdata needs to be specified if -gnuplotuserdata is.\n\n";
usage();
}
# If both the start and end dates are specified, check that the start date
# occurs before the end date.
if ($start_date ne "" && $end_date ne "" &&
&Date_Cmp($start_date, $end_date) >= 0)
{
print "error: Start date specified must occur before the end date.\n\n";
usage();
}
# If the -rlog option has been specified, need to make sure that the
# CVS version install is >= 1.11.1, as it is not supported in earlier
# versions.
if ($rlog_module ne "")
{
my $WTR = gensym();
my $RDR = gensym();
my $ERR = gensym();
my $pid = open3($WTR, $RDR, $ERR, "cvs $cvs_global_args rlog");
my $deprecated_found = 0;
while (<$ERR>)
{
$deprecated_found = 1 if (/deprecated/);
}
close $WTR;
close $RDR;
close $ERR;
waitpid $pid, 0;
if ($deprecated_found)
{
print "error: -rlog option requires CVS version >= 1.11.1\n\n";
exit 1;
}
}
# Open the specified output files.
open (LINEDATA, ">$linedata")
|| die "Failed to create file \"$linedata\": $!";
open (FILEDATA, ">$filedata")
|| die "Failed to create file \"$filedata\": $!";
if ($userdata ne "")
{
open (USERDATA, ">$userdata")
|| die "Failed to create file \"$userdata\": $!";
}
}
###############################################################################
# Print out a usage message.
#
sub usage
{
print "cvsplot version 1.7.4 - ";
print "Copyright David Sitsky: sits\@users.sourceforge.net\n\n";
print "cvsplot collects statistics from CVS controlled files.\n\n";
print "usage:\n";
print "cvsplot.pl -cvsdir <dir> [-rlog <module>] -linedata <file> -filedata <file>\n";
print " [-userdata <file>] [-userlist [groupname=]<user1,user2,...>]*\n";
print " [-defaultusergroup <groupname>]\n";
print " [-cvs-global-args <global-args>]\n";
print " [-countchangedlines]\n";
print " [-branch <branch name>] [-start <date>] [-end <date>]\n";
print " {-include <regexp> | -exclude <regexp>} [-linestyle]\n";
print " [-gnuplotlinedata <output file for line data>\n";
print " -gnuplotfiledata <output file for file data>\n";
print " [-gnuplot <path to gnuplot binary>]\n";
print " [-gnuplotlinefiledata <output file combined line and file data>]\n";
print " [-gnuplotuserdata <output file for user data>\n";
print " [-gnuplotsetterm <gnuplot set terminal expression>]\n";
print " [-gnuplotcommand <general gnuplot command>]]\n\n";
print "See http://cvsplot.sourceforge.net for updates.\n";
exit 1;
}
syntax highlighted by Code2HTML, v. 0.9.1