#!/usr/bin/perl # # dbcoldiff # # Copyright (C) 1991-1998 by John Heidemann # $Id: dbcoldiff,v 1.20 2006/08/25 05:29:42 johnh Exp $ # # This program is distributed under terms of the GNU general # public license, version 2. See the file COPYING # in $dblibdir for details. # sub usage { print <=, =. T-tests assume normal distributions with common variances. Sample input (example 6.12 from Scheaffer and McClave): #h title mean2 stddev2 n2 mean1 stddev1 n1 example6.12 0.17 0.0020 5 0.22 0.0010 4 Sample command: cat data.jdb | dbcoldiff mean2 stddev2 n2 mean1 stddev1 n1 Sample output: #h title mean2 stddev2 n2 mean1 stddev1 n1 diff diff_pct diff_conf_half diff_conf_low diff_conf_high diff_conf_pct_half diff_conf_pct_low diff_conf_pct_high example6.12 0.17 0.0020 5 0.22 0.0010 4 0.05 29.412 0.0026138 0.047386 0.052614 1.5375 27.874 30.949 # | dbcoldiff mean2 stddev2 n2 mean1 stddev1 n1 Sample input (example 7.10 from Scheaffer and McClave): #h title x2 ss2 n2 x1 ss1 n1 example7.10 9 35.22 24.44 9 31.56 20.03 Run: dbcoldiff -h '<=0' x2 ss2 n2 x1 ss1 n1 Output: #h title n1 x1 ss1 n2 x2 ss2 diff diff_pct diff_conf_half diff_conf_low diff_conf_high diff_conf_pct_half diff_conf_pct_low diff_conf_pct_high t_test t_test_result example7.10 9 35.22 24.44 9 31.56 20.03 3.66 0.11597 4.7125 -1.0525 8.3725 0.14932 -0.033348 0.26529 1.6465 not-rejected # | /global/us/edu/ucla/cs/ficus/users/johnh/BIN/DB/dbcoldiff -h <=0 x2 ss2 n2 x1 ss1 n1 END exit 1; } BEGIN { $dblibdir = "/home/johnh/BIN/DB"; push(@INC, $dblibdir); } use DbGetopt; use DbTDistr; require "$dblibdir/dblib.pl"; $conf_pct = 0.95; $format = "%.5g"; $hyp_diff = undef; $hyp_class = undef; @orig_argv = @ARGV; my($prog) = &progname; my($dbopts) = new DbGetopt("c:h:f:?", \@ARGV); my($ch); while ($dbopts->getopt) { $ch = $dbopts->opt; if ($ch eq 'c') { $conf_pct = $dbopts->optarg; } elsif ($ch eq 'h') { die ("$prog: bad hypothesis difference " . $dbopts->optarg . ".\n") unless ($dbopts->optarg =~ /^\s*([<>=!]+)\s*([0-9\.]+)/); ($hyp_class, $hyp_diff) = ($1, $2); } elsif ($ch eq 'f') { $format = $dbopts->optarg; } else { &usage; }; }; if ($hyp_class =~ /\<\=/) { $hyp_class = -1; } elsif ($hyp_class =~ /\>\=/) { $hyp_class = 1; } elsif ($hyp_class =~ /==?/) { $hyp_class = 0; } elsif ($hyp_class ne undef) { die("$prog: bad hypothesis direction (must be <=, >=, or =, was $hyp_class).\n"); }; ($STATS_DIFF, $STATS_CONF, $STATS_T_TEST) = (0..10); # rest of args if ($#ARGV == 5) { ($m1c, $ss1c, $n1c, $m2c, $ss2c, $n2c) = @ARGV; $stats_kind = ($hyp_diff eq undef) ? $STATS_CONF : $STATS_T_TEST; } elsif ($#ARGV == 3) { ($m1c, $n1c, $m2c, $n2c) = @ARGV; $stats_kind = $STATS_DIFF; die("$prog: T-tests require standard deviations.\n") if ($hyp_diff ne undef); } else { &usage; }; # computed off args my $conf_alpha = (1.0 - $conf_pct) / 2.0; &readprocess_header; foreach (@ARGV) { die ("$prog: unknown column ``$_''.\n") if (!defined($colnametonum{$_})); }; ($m1c, $ss1c, $n1c, $m2c, $ss2c, $n2c) = ($colnametonum{$m1c}, $colnametonum{$ss1c}, $colnametonum{$n1c}, $colnametonum{$m2c}, $colnametonum{$ss2c}, $colnametonum{$n2c}); # # new columns # $diff_f = &col_create("diff"); $pctdiff_f = &col_create("diff_pct"); if ($stats_kind >= $STATS_CONF) { $confdelta_f = &col_create("diff_conf_half"); $conflow_f = &col_create("diff_conf_low"); $confhigh_f = &col_create("diff_conf_high"); $pctconfdelta_f = &col_create("diff_conf_pct_half"); $pctconflow_f = &col_create("diff_conf_pct_low"); $pctconfhigh_f = &col_create("diff_conf_pct_high"); }; if ($stats_kind >= $STATS_T_TEST) { $t_test_f = &col_create("t_test"); $t_test_result_f = &col_create("t_test_result"); $t_test_break_f = &col_create("t_test_break"); $t_test_break_pct_f = &col_create("t_test_break_pct"); }; &write_header(); while () { &pass_comments && next; &split_cols; $diff = $f[$m2c] - $f[$m1c]; if ($f[$m1c] == 0.0) { $pctdiff = "n/a"; } else { $pctdiff = $diff / $f[$m1c] * 100.0; }; $confdelta = $conflow = $confhigh = $pctconfdelta = $pctconflow = $pctconfhigh = $t_test = $t_test_result = $t_test_break = $t_test_break_pct = "n/a"; if ($stats_kind >= $STATS_CONF && $f[$n1c] > 1 && $f[$n2c] > 1) { # basic stuff $degrees_of_freedom = ($f[$n1c] + $f[$n2c] - 2); $ssp = (($f[$n1c] - 1) * $f[$ss1c] * $f[$ss1c] + ($f[$n2c] - 1) * $f[$ss2c] * $f[$ss2c]) / $degrees_of_freedom; $sqrt_ssp_inverses = sqrt($ssp * (1/$f[$n1c] + 1/$f[$n2c])); $t_value = &DbTDistr::t_distribution($degrees_of_freedom, $conf_alpha); # confidence intervals $confdelta = $t_value * $sqrt_ssp_inverses; $conflow = $diff - $confdelta; $confhigh = $diff + $confdelta; # t-test if ($stats_kind >= $STATS_T_DIST && $sqrt_ssp_inverses != 0.0) { $t_test = ($diff - $hyp_diff) / $sqrt_ssp_inverses; $t_test_result = "not-rejected"; if ($hyp_class < 0) { $t_test_result = "rejected" if ($t_test > $t_value); } elsif ($hyp_class > 0) { $t_test_result = "rejected" if ($t_test < $t_value); } else { $t_test_result = "rejected" if (&abs($t_test) > $t_value); }; # also compute the break-even point $t_test_break = $diff - $t_value * $sqrt_ssp_inverses; $t_test_break_pct = $t_test_break / $f[$m1c] * 100.0 if ($f[$m1c] != 0.0); }; # percentages if ($f[$m1c] != 0.0) { $pctconfdelta = $confdelta / $f[$m1c] * 100.0; $pctconflow = $conflow / $f[$m1c] * 100.0; $pctconfhigh = $confhigh / $f[$m1c] * 100.0; }; }; $f[$diff_f] = sprintf("$format", $diff); $f[$pctdiff_f] = sprintf("$format", $pctdiff); if ($stats_kind >= $STATS_CONF) { $f[$confdelta_f] = sprintf("$format", $confdelta); $f[$conflow_f] = sprintf("$format", $conflow); $f[$confhigh_f] = sprintf("$format", $confhigh); $f[$pctconfdelta_f] = sprintf("$format", $pctconfdelta); $f[$pctconflow_f] = sprintf("$format", $pctconflow); $f[$pctconfhigh_f] = sprintf("$format", $pctconfhigh); }; if ($stats_kind >= $STATS_T_TEST) { $f[$t_test_f] = sprintf("$format", $t_test); $f[$t_test_result_f] = $t_test_result; $f[$t_test_break_f] = $t_test_break; $f[$t_test_break_pct_f] = sprintf("$format", $t_test_break_pct); }; &write_cols; }; print "# | $prog " . join(" ", @orig_argv) . "\n"; exit 0;