#!/usr/bin/perl -w # # dbcolmultiscale # Copyright (C) 2001 by John Heidemann # $Id: dbcolmultiscale,v 1.6 2003/05/23 04:15:43 johnh Exp $ # # This program is distributed under terms of the GNU general # public license, version 2. See the file COPYING # in $dblibdir for details. # sub usage { print STDERR <getopt) { $ch = $dbopts->opt; if ($ch eq 's') { $smallest_timescale = $dbopts->optarg; } elsif ($ch eq 'd') { $debug = 1; } elsif ($ch eq 'S') { $sorting_required = 0; } else { &usage; }; }; &usage if ($#ARGV != 1); my($timecol, $datacol) = @ARGV; # handle sorting, if necessary if ($sorting_required) { open(SORTED, "$dbbindir/dbsort -n $timecol |") || die("$prog: cannot run dbsort over input.\n"); open(STDIN, "<&SORTED") || die "$0: cannot dup SORTED.\n"; }; &readprocess_header; die ("$prog: unknown column name ``$timecol''.\n") if (!defined($colnametonum{$timecol})); my($timef) = $colnametonum{$timecol}; die ("$prog: unknown column name ``$datacol''.\n") if (!defined($colnametonum{$datacol})); my($dataf) = $colnametonum{$datacol}; # # start the output # &write_header(qw(period_start timescale timescale_index jittered_index sum rate)); my($first_time, $last_time); my(@period_start) = undef; my(@timescale_duration) = ($smallest_timescale); my(@sum) = (0); my(@other_sum) = (undef); my(@other_period_start) = (undef); my($maxscale_summed_i) = 0; # counts from zero sub jitter { my($index, $time, $timescale) = @_; my($jitter) = int(($time - $first_time) / $timescale) % 30; return $index + ($jitter - 15) / 100.0; } sub output_timescale { my($index) = @_; &write_these_cols($period_start[$index], $timescale_duration[$index], $index, jitter($index, $period_start[$index], $timescale_duration[$index]), $sum[$index], $sum[$index] / $timescale_duration[$index]); }; sub complete_timescale { my($index) = @_; # are we finishing the first half or the second? if (!defined($other_sum[$index])) { # first half, just output the record &output_timescale($index); # now start work on the new record $other_sum[$index] = $sum[$index]; $other_period_start[$index] = $period_start[$index]; } else { # ok, we're doing the 2nd half, which means we have to recurse # output the record &output_timescale($index); # compute the next level $sum[$index+1] = $other_sum[$index] + $sum[$index]; $timescale_duration[$index+1] = 2 * $timescale_duration[$index]; $period_start[$index+1] = $other_period_start[$index]; &complete_timescale($index+1); # now reset our level $other_sum[$index] = undef; }; $sum[$index] = 0; $period_start[$index] += $timescale_duration[$index]; } # read data while () { &pass_comments() && next; &split_cols; my($time) = $f[$timef]; my($data) = $f[$dataf]; # initialization if (!defined($first_time)) { $first_time = $last_time = $period_start[0] = $time; }; die "$0: input is not sorted by time ($time < $last_time)\n" if ($time < $last_time); $last_time = $time; # handle the zero level by hand if ($time - $period_start[0] < $smallest_timescale) { # just more data in the current period $sum[0] += $data; next; }; # ...overflowed period, so percolate if necessary # until the data point is in the current period. while ($time - $period_start[0] >= $smallest_timescale) { &complete_timescale(0); # percolate }; # now initialize the new lowest level $sum[0] = $data; }; die "$0: no input\n" if (!defined($first_time)); # Terminte the final entry, assuming it's complete # (even though it may not be since we might have missed # additional data that would have fitted in that timescale). # Note that this only "completes" the lowest timescale # and not necessarily any partially complete large timescales. # It's not clear that this is any more justified. # (But this additional call here does guarantee that if we have # exactly 2^n evenly spaced datapoints we get them all--- # see the dbcolmultiscale_flat test case for an example). &complete_timescale(0); # close up shop print "# | $prog ", join(" ", @orig_argv), "\n"; exit 0; if (0) { ; };