# -*-Perl-*-
#
#  LTU.pm - A Perl module implementing linear threshold units.
#
#     Copyright (c) 1995 Tom Fawcett. All rights reserved.
#     This program is free software; you may redistribute it and/or
#     modify it under the same terms as Perl itself.
#
#  Original C code written by James Callan.
#  Rewritten for Perl from Version 2.4 by Tom Fawcett.
#  Bugs, comments, suggestions:  Tom Fawcett <fawcett@nynexst.com>
#
#  Description:
#
#  This module contains subroutines for creating, using, and destroying
#  linear threshold units (LTUs).  It offers four different training rules:
#
#  1)  The absolute correction rule (ACR), copied from Nils Nilsson's book
#      "Learning Machines:  Foundations of Trainable Pattern-Classification
#      Systems," published by McGraw-Hill in 1965.  This rule is identical
#      to the Perceptron rule.  Its advantage is that it converges quickly
#      when the training instances are linearly separable.
#  2)  The least-mean-square (LMS) rule, devised by Widrow and Hoff in 1960.
#      See the AI Handbook or Duda & Hart for information on the LMS rule.
#      The advantage of the LMS rule over absolute correction or fixed
#      increment is that it tends to minimize the mean-squared error, even
#      when the classes are not linearly separable.
#  3)  The recursive least square (RLS) rule, copied from Peter Young's book
#      "Recursive Estimation and Time-Series Analysis," published by
#      Springer-Verlag in 1984.  This rule is like the LMS rule, but far
#      superior to it.  It's faster, because each instance only needs to
#      be seen once.  It's also more accurate.
#  4)  The thermal perceptron (TACR) rule, copied from Marcus Frean's PhD
#      thesis "Learning in Single Perceptrons," published by University of
#      Edinburgh in 1990.  This rule is like the ACR rule, except that its
#      annealing capabilities enable it to handle classes that are not
#      linearly separable.
#
#  A "maybe-train" training strategy (i.e. train only when the linear
#  threshold unit misclassifies an instance) is implicit in the absolute
#  correction rule, the LMS rule and the thermal perceptron rule.  The RLS
#  rule always trains.
#
#  The training rules are most effective when attribute values are scaled
#  to a fixed range.  When values exceed 1.0, some of the rules (e.g. ACR,
#  TACR) may allow the magnitude of weights to grow without bound.  When
#  different attributes have different ranges, some of the rules (e.g. RLS)
#  give greater influence to attributes with larger ranges.  Therefore, you
#  can request automatic scaling, if you desire it, when the LTU is created.
#  If you enable scaling, your data is automatically scaled, so that its value
#  does not exceed 1.0.  Scales are computed and adjusted when necessary,
#  without any intervention from you.  
#
#  LTU's converge most quickly when the data includes both positive and
#  negative values.  Therefore, when your data is scaled, it is scaled so
#  that the midpoint of the range is 0.0.
#
#  Each time scales are adjusted, the weights in the LTU become inaccurate.
#  The scaling procedure cannot compensate for this inaccuracy.  Therefore,
#  the automatic scaling can affect the speed with which the algorithm
#  converges.  However, once the scales "settle down" (i.e. once the extreme
#  values for each attribute have been seen), the speed of convergence is not
#  affected.
#
#  Automatic scaling adversely affects the RLS rule, because the RLS rule
#  implicitly "remembers" each instance.  When scales are adjusted, the 
#  instances remembered by the LTU become noisy.  If you enable scaling with
#  the RLS rule, you should cycle through your instances several times, at
#  least until the extreme values (min/maxs) of each attribute have been seen.
#
#
##############################################################################
#  $Log: LTU.pm,v $
# Revision 2.7  1996/02/20  15:15:18  fawcett
# Fixed package confusion made in the original version.
# Now exports $LTU_MINUS, LTU_PLUS and LTU_THRESHOLD.
# Created Makefile.PL so that MakeMaker can handle installation,
# cleaning, distribution, etc.
#
#
# Revision 2.5  1995  fawcett
# Initial public offering, released as Statistics::LTU.pm.
# Based on Jamie's LTU.c version 2.4.
#
##############################################################################
package Statistics::LTU;
my($rcs) = ' $Id: LTU.pm,v 2.7 1996/02/20 15:15:18 fawcett Exp $ ' ;
require 5;
use Carp;

require Exporter;
@ISA = ('Exporter');
@EXPORT = qw($LTU_PLUS $LTU_MINUS $LTU_THRESHOLD);

($Statistics::LTU::VERSION) = $rcs =~ /Id: LTU\.pm.* ([\d\.]+) /;
die "Can't determine Statistics::LTU::VERSION from rcs string!"
    unless $Statistics::LTU::VERSION;

#  We understand LTU's dumped with 2.5 or later
my($read_LTU_files_back_to) = 2.5;


##
##  EXPORTED CONSTANTS
##

$LTU_MINUS =       -1.0;
$LTU_PLUS  =        1.0;
$LTU_THRESHOLD =     0;

##
##  PRIVATE CONSTANTS
##

my($LTU_ORIGIN_OFF) =   0;
my($LTU_ORIGIN_ON) =    1;

my($LTU_SCALING_OFF) =  0;
my($LTU_SCALING_ON) =   1;

##
##  Data Structure Definitions
##

##  Field indices for the LTU structure.
##  All index names end in "i";

my($LTU_LENGTH) = 15;		# Number of entries in an LTU

my($LENGTHi, 
   $SCALINGi,
   $ORIGIN_RESTRICTIONi,
   $UNUSEDi,
   $CYCLES_SINCE_WEIGHT_CHANGEi,
   # Remainder are references to vectors
   $WEIGHTi,			
   $WEIGHT_MINi,
   $WEIGHT_MAXi,
   $ATTRIB_MINi,
   $ATTRIB_MAXi,
   $ATTRIB_SCALEi,
   $ATTRIBUTEi,
   $RLS_Pi,
   $RLS_TMP_1i,
   $RLS_TMP_2i,
   ) = 
    (0 .. $LTU_LENGTH-1);

$LTU_ATTRIBUTE_MIN = -0.5;

##
##  new creates a new LTU with weights set to 0.
##
sub new {
    my($type, $nvars, $scaling) = @_;

    ##
    ##  First allocate the LTU, and fill in its basic information.
    ##

    my($self) = [ (0) x $LTU_LENGTH ];

    my($length) = $nvars + 1;

    $self->[$SCALINGi] = $scaling;
    $self->[$LENGTHi]  = $length;
    $self->[$ORIGIN_RESTRICTIONi] = $LTU_ORIGIN_OFF;

    ##
    ##  Create vectors used by all types.
    ##
    $self->[$WEIGHTi]     = [ (0.0) x $length ];
    $self->[$WEIGHT_MINi] = [ (0.0) x $length ];
    $self->[$WEIGHT_MAXi] = [ (0.0) x $length ];
    $self->[$ATTRIBUTEi]  = [ (0.0) x $length ];

    ##
    ##  Allocate scaling vectors if necessary.
    ##
    if ($scaling == $LTU_SCALING_ON) {

	## Signal need to initialize these values by setting attrib_min > max

	$self->[$ATTRIB_MINi]   = [ (1.0) x $length ];
	$self->[$ATTRIB_MAXi]   = [ (-1.0) x $length ];
	$self->[$ATTRIB_SCALEi] = [ (1.0) x $length ];

    };

    bless $self, $type;		# Bless into type
}


##
##  COPY creates a copy of itself, which it returns
##
sub copy {
    my($self) = @_;

    ##
    ##  First, create a new LTU.
    ##

    my($new) = new Statistics::LTU($self->[$LENGTHi]-1, 
		       $self->[$SCALINGi]);

    ##
    ##  Copy the basic information that all LTU's have.  (The attribute vector
    ##  isn't copied because its contents are temporary.)
    ##
    $new->[$CYCLES_SINCE_WEIGHT_CHANGEi] =
	$self->[$CYCLES_SINCE_WEIGHT_CHANGEi];
    $new->[$ORIGIN_RESTRICTIONi] = 
	$self->[$ORIGIN_RESTRICTIONi];

    $new->[$WEIGHTi] =     [@{$self->[$WEIGHTi]}];
    $new->[$WEIGHT_MINi] = [@{$self->[$WEIGHT_MINi]}];
    $new->[$WEIGHT_MAXi] = [@{$self->[$WEIGHT_MAXi]}];

    ##
    ##  Copy scaling data, if necessary.
    ##
    if ($new->[$SCALINGi] == $LTU_SCALING_ON) {
	$new->[$ATTRIB_MINi] =   $self->[$ATTRIB_MINi];
	$new->[$ATTRIB_MAXi] =   $self->[$ATTRIB_MAXi];
	$new->[$ATTRIB_SCALEi] = $self->[$ATTRIB_SCALEi];
    }

    $new;
}


##
##  LTU_DESTROY destroys an existing linear threshold unit.
#####  I don't know if this is necessary but it can't hurt
##
sub destroy {
    my($self) = @_;

    for (0 .. $LTU_LENGTH-1) {
	undef $self->[$_];
    }
    undef $self;

}


##
##  IS_CYCLING returns a boolean value which indicates whether or not the
##  LTU's weights are cycling.  If they are, further training will not produce
##  better results.  This judgement is based upon the heuristic that if the
##  weights are not cycling, then the min/max's of weights will change fairly
##  often.  The caller must specify what it thinks is "fairly often".  This is
##  done with a parameter that specifies how many times the training function
##  can adjust weights before the caller would expect at least one weight to
##  have its min/max adjusted.  Some common values for this parameter are the
##  number of attributes, or the log of the number of attributes.
##

sub is_cycling {
    my($self, $how_often_weights_must_change) = @_;

    $self->[$CYCLES_SINCE_WEIGHT_CHANGEi] > $how_often_weights_must_change;
}


##
##  PRINT describes the LTU and its weights.
##
sub print {
    my($self) = @_;

    print "The linear threshold unit is of type ", ref($self);
    print " and contains ", $self->[$LENGTHi], " weights.\n";

    printf("Origin restriction=%d.\n", 
	    $self->[$ORIGIN_RESTRICTIONi]);
    printf("Cycles since last change of a weight min/max:  %d.\n",
	    $self->[$CYCLES_SINCE_WEIGHT_CHANGEi]);

    ##
    ##  Print 8 weights to a line, just to make it easier to read.
    ##

    my($i);

    for $i (0 .. $self->[$LENGTHi]-1) {
	print $self->[$WEIGHTi]->[$i], " ";

	printf("\n") if $i%8 == 7;
    };

    print "\n";
}


##
##  RESTORE restores a linear threshold unit from the specified file.
##
sub restore {
    my($class, $file_name) = @_;

    if (!open(FILE, "<$file_name")) {
	carp "restore: $file_name: $!\n";
	return(0);
    }

    ##
    ##  Check that the LTU is consistent with this version
    ##
    my($ltu_version);
    $ltu_version = <FILE>;
    chop($ltu_version);

    if ($ltu_version < $read_LTU_files_back_to) {
	carp "restore: $file_name written with LTU version $ltu_version.\n";
	carp "This is version $Statistics::LTU::VERSION, which only understands\n";
	carp "LTU files back to $read_LTU_files_back_to.\n";
	carp "LTU not restored!\n";
	return(0);
    }

    ## First, read the type, the number of weights, and whether or not scaling
    ##  is enabled.  Then, create a linear threshold unit.  Note that the
    ##  number of variables in the training instances is one less than the
    ##  number of weights (one weight is a constant).
    ##

    my($line);
    $line = <FILE>;
    chop($line);
    my($type, $length, $scaling, $cycles_since_weight_change, $origin) =
	split(/ /, $line);

    my($new) = $type->new($length-1, $scaling);

    $new->[$CYCLES_SINCE_WEIGHT_CHANGEi] = $cycles_since_weight_change;
    $new->[$ORIGIN_RESTRICTIONi]         = $origin;

    ##
    ##  Now, read the weights, the minimum value of the attributes, and the
    ##  maximum value of the attributes, from the file, and store them in the
    ##  linear threshold unit.

    my($new_weight) = 		$new->[$WEIGHTi];
    my($new_weight_min) = 	$new->[$WEIGHT_MINi];
    my($new_weight_max) = 	$new->[$WEIGHT_MAXi];
    my($new_attrib_min) = 	$new->[$ATTRIB_MINi];
    my($new_attrib_max) = 	$new->[$ATTRIB_MAXi];
    my($new_attrib_scale) = 	$new->[$ATTRIB_SCALEi];
    
    my($i, @fields);

    for $i (0 .. $length-1) {
	$line = <FILE>;  chop($line);

	@fields =  split(/ /, $line);

	($new_weight->[$i], $new_weight_min->[$i], $new_weight_max->[$i]) =
	    @fields;

	if ($scaling == $LTU_SCALING_ON) {
	    
	    ($new_attrib_min->[$i], $new_attrib_max->[$i]) = @fields[3,4];

	    if ($new_attrib_max->[$i] > $new_attrib_min->[$i]) {
		$new_attrib_scale->[$i] = 1.0 /
		    ($new_attrib_max->[$i] - $new_attrib_min->[$i]);
	    }
	}
    }
	
    #####  Pick up final theta value
    $line = <FILE>;  chop($line);
    $new->[$WEIGHTi]->[$length-1] = $line;

    close(FILE);
    
    $new;
}


##
##  SAVE saves the linear threshold unit in the specified file.
##  If the file can't be created for some reason, 0 is returned.  Otherwise, 1
##  is returned.
##
sub save {
    my($self, $file_name) = @_;
    my($length) = $self->[$LENGTHi];

    if (!open(FILE, ">$file_name")) {
	carp "Statistics::LTU::save: $file_name: $!";
	return(0);
    }

    ##
    ##  Stamp the file with a version number.
    ##
    print FILE $Statistics::LTU::VERSION, "\n";

    ## First, print the type, the number of weights and whether or not scaling
    ##  is enabled.  Then, print the weights, the minimum value of the
    ##  attribute, and the maximum value of the attribute.  Print 1
    ##  attribute/line, just to make it easier to read.

    print FILE join(' ', ((ref $self), 
			  $self->[$LENGTHi],
			  $self->[$SCALINGi],
			  $self->[$CYCLES_SINCE_WEIGHT_CHANGEi],
			  $self->[$ORIGIN_RESTRICTIONi]
			  )), 
			      "\n";

    my($i);
    for $i (0 .. $length-1) {

	printf FILE "%lf %lf %lf ", 
		$self->[$WEIGHTi]->[$i],
		$self->[$WEIGHT_MINi]->[$i],
		$self->[$WEIGHT_MAXi]->[$i];

	if ($self->[$SCALINGi] == $LTU_SCALING_ON) {
	    printf FILE "%lf %lf ", 
	    $self->[$ATTRIB_MINi]->[$i],
	    $self->[$ATTRIB_MAXi]->[$i];
	}

	print FILE "\n";
    }

    printf FILE "%lf\n", $self->[$WEIGHTi]->[$length-1];

    close(FILE);

    1;
}


##
##  TEST computes and returns the result of applying the linear threshold
##  unit to the instance_vector.
##

sub test {
    my($self, $instance_vector) = @_;

    my($length) =    $self->[$LENGTHi];
    my($weight) =    $self->[$WEIGHTi];
    my($attribute) = $self->[$ATTRIBUTEi];

    $self->_scale_attributes($instance_vector);

    my($result) = $weight->[$length-1];

    my($i);

    for $i (0 .. $length-2) {
	$result += $attribute->[$i] * $weight->[$i];
    }

    $result;

}


##
##  _SCALE_ATTRIBUTES translates and scales the attributes comprising an
##  instance vector.  The scaled attributes are stored in the attributes slot
##  on the LTU, so that the original instance_vector will not be changed.
##
sub _scale_attributes {
    my($self, $instance_vector) = @_;
    my($length) = $self->[$LENGTHi];

    my($attribute) =    $self->[$ATTRIBUTEi];
    my($attrib_min) =   $self->[$ATTRIB_MINi];
    my($attrib_scale) = $self->[$ATTRIB_SCALEi];

    ##
    ##  If scaling is disabled, just copy the instance attribute values into
    ##  the temporary attribute vector.  This isolates the input values from
    ##  the caller's storage and converts to double precision.  Otherwise
    ##  translate and scale the attributes.  When min=max, only one value has
    ##  been seen for the attribute.  In that case, the attribute is mapped
    ##  to the minimum value.  It is advantageous to use one of the endpoints,
    ##  instead of the midpoint 0.0, because it lets training begin.  
    ##

    my($i);

    if ($self->[$SCALINGi] == $LTU_SCALING_OFF) {

	for $i (0 .. $#$instance_vector) {
	    $attribute->[$i] = $instance_vector->[$i];
	}
    } else {

	for $i (0 .. $#$instance_vector) {
	    $attribute->[$i] = (($instance_vector->[$i] - $attrib_min->[$i]) *
				$attrib_scale->[$i]) + $LTU_ATTRIBUTE_MIN;
	}
    }

    ##
    ##  A constant attribute must be added to the end of the instance vector
    ##  in order to learn some kinds of functions (e.g. f(x)=mx+b, the function
    ##  for a line in 2D space).  The constant attribute enables the training
    ##  function to learn a weight that translates the hyperplane.  However,
    ##  it is undesirable for other kinds of functions (e.g. f(x)=mx, which is
    ##  what some preference predicates look like).  Decide what to do.
    ##

    if ($self->[$ORIGIN_RESTRICTIONi] == $LTU_ORIGIN_ON) {
	$attribute->[$length-1] = 0.0;

    } else {
	$attribute->[$length-1] = 1.0;
    }

}


##
##  MAINTAIN_SCALING_FACTORS checks that the specified instance falls within
##  the min-max ranges of the specified LTU.  If it does, nothing changes.
##  If it does not, the min-max ranges and the scaling factors are adjusted
##  to accomodate this new instance.
##
sub maintain_scaling_factors {
    my($self, $instance_vector) = @_;

    ##
    ##  If scaling is disabled, don't do anything.
    ##
    return if $self->[$SCALINGi] == $LTU_SCALING_OFF;

    my($length) = $self->[$LENGTHi];
    my($attrib_min) =   $self->[$ATTRIB_MINi];
    my($attrib_max) =   $self->[$ATTRIB_MAXi];
    my($attrib_scale) = $self->[$ATTRIB_SCALEi];

    my($i);
    for $i (0 .. $length-2) {
	if ($attrib_min->[$i] > $attrib_max->[$i]) {

	    ##
	    ##  Initialization.
	    ##
	    $attrib_min->[$i] = $attrib_max->[$i] = 
		$instance_vector->[$i];

	} elsif ($instance_vector->[$i] > $attrib_max->[$i]) {

	    $attrib_max->[$i] = $instance_vector->[$i];
	    $attrib_scale->[$i] =
		1.0 / ($attrib_max->[$i] - $attrib_min->[$i]);

	} elsif ($instance_vector->[$i] < $self->[$ATTRIB_MINi]->[$i]) {

	    $attrib_min->[$i] = $instance_vector->[$i];
	    $attrib_scale->[$i] =
		1.0 / ($attrib_max->[$i] - $attrib_min->[$i]);
	}
    }
}

##
##   SET_ORIGIN_RESTRICTION
##
sub set_origin_restriction {
    my($self, $value) = @_;

    if (($value == $LTU_ORIGIN_OFF) or ($value == $LTU_ORIGIN_ON)) {
	$self->[$ORIGIN_RESTRICTIONi] = $value;
    } else {
	carp "$value is an unacceptable value for the origin restriction\n";
    }
}


##
##   LTU_WEIGHTS returns a ref to copy of the LTU weights.
##
sub weights {
    my($self) = @_;
    
    [@{$self->[$WEIGHTi]}];
}


##
##  UPDATE_WEIGHT_MIN_MAX updates an LTU's weight min/max's and its
##  cycles_since_weight_change field.  Although it is simple, it is
##  implemented as a separate routine because each training routine
##  should perform this function before returning the LTU to the caller.
##
sub update_weight_min_max {
    my($self) = @_;

    my($boundary_changed) = 0;
    
    my($weight)     = $self->[$WEIGHTi];
    my($weight_max) = $self->[$WEIGHT_MAXi];
    my($weight_min) = $self->[$WEIGHT_MINi];

    my($i);
    for $i (0 .. $self->[$LENGTHi]-2) {

	if ($weight_max->[$i] < $weight->[$i]) {
	    $weight_max->[$i] = $weight->[$i];
	    $boundary_changed = 1;

	} elsif ($weight_min->[$i] > $weight->[$i]) {
	    $weight_min->[$i] = $weight->[$i];
	    $boundary_changed = 1;
	}
    }    
	
    if ($boundary_changed) {
	$self->[$CYCLES_SINCE_WEIGHT_CHANGEi] = 0;
    } else {
	$self->[$CYCLES_SINCE_WEIGHT_CHANGEi]++;
    }
}


##
##  $LTU->correctly_classifies($instance, $value) 
##  Returns 1 iff $instance is on the same side of Statistics::LTU::THRESHOLD
##  as $value is.
##
sub correctly_classifies {
    my($self, $instance, $desired_value) = @_;

    my($actual_value) = $self->test($instance);
    
    ((($actual_value   < $LTU_THRESHOLD) and
      ($desired_value  < $LTU_THRESHOLD)) or
     (($actual_value  >= $LTU_THRESHOLD) and
      ($desired_value >= $LTU_THRESHOLD)));
}

##
##  ltu->eval_on_set(examples)
##
##  Evaluates an LTU on a set of example, returning 4 integers:
##  True negatives, false positives, false negatives and true positives.
##
##  Argument is a ref to a list of examples.
##  Each example is a ref to an array of [Vector, Value].
##  Each Vector is a feature vector.
##
##  Example:
##     @Results = $LTU->eval_on_set([[[1,4,-2],1], [[1,-2,2],-1]]);
##     ($TN, $FP, $FN, $TP) = @Results;
##
sub eval_on_set {
    my($self, $Examples) = @_;

    my($TN, $FP, $FN, $TP) = (0,0,0,0);
    my($example, $Instance, $DesiredValue, $ActualValue);

    foreach $example (@{$Examples}) {
	($Instance, $DesiredValue) = @{$example};
	my($ActualValue) = $self->test($Instance);
	if ($ActualValue >= $LTU_THRESHOLD) {
	    if ($DesiredValue >= $LTU_THRESHOLD) {
		$TP++;
	    } else {
		$FP++;
	    }
	} elsif ($DesiredValue >= $LTU_THRESHOLD) {
	    $FN++;
	} else {
	    $TN++;
	}
    }

    ($TN, $FP, $FN, $TP);
}


###
###  Specific LTU types built upon LTU.
###  1.  The absolute correction rule.

package Statistics::LTU::ACR;
@Statistics::LTU::ACR::ISA = qw( Statistics::LTU );
use Statistics::LTU;

##
##  ACR::TRAIN trains the specified linear threshold unit on a particular
##  instance_vector.  It returns 1 if the linear threshold unit already
##  classified the instance_vector correctly, otherwise it returns 0.
##  The training rule is taken from Nilsson's "Learning Machines" book.
##

sub train { 
    my($self, $instance_vector, $desired_value) = @_;
    my($length) = $self->[$LENGTHi];

    ##
    ##  Only train the linear threshold unit if it does not classify correctly.
    ##

    my($actual_value) = $self->test($instance_vector);

    die "\$Statistics::LTU::LTU_THRESHOLD undefined in ACR" unless defined($Statistics::LTU::LTU_THRESHOLD);

    return(1) if ((($actual_value   < $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value  < $Statistics::LTU::LTU_THRESHOLD)) ||
		  (($actual_value  >= $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));

    ## The scale factor can only be changed when the weights are being
    ##  changed, because a change to the scale factor invalidates the current
    ##  set of weights.

    $self->maintain_scaling_factors($instance_vector);
    $self->_scale_attributes($instance_vector);

    ##
    ##  Decide how much to adjust the weights by.  The absolute correction rule
    ##  (which provides the fastest learning) requires the dot product of the
    ##  instance vector, so do that first.
    ##
    
    my($dot_product) = 0;
   
    foreach (@{$self->[$ATTRIBUTEi]}) {
	$dot_product += $_ * $_;
    }

    my($delta) = $dot_product==0 ? .1 
	: 1.0 + (int(abs($actual_value) / $dot_product));
    $delta = -$delta   if $desired_value < $Statistics::LTU::LTU_THRESHOLD;

    ##
    ##  Now, adjust the weights.  The last weight is handled differently, 
    ##  because it is always applied to the constant 1.
    ##

    my($weight) =    $self->[$WEIGHTi];
    my($attribute) = $self->[$ATTRIBUTEi];

    my($i);
    for $i (0 .. $length-1) {
	$weight->[$i] += $delta * $attribute->[$i];
    }
    $self->update_weight_min_max;

    0;
}


###
###  Recursive Least Squares linear threshold units
###
package Statistics::LTU::RLS;
use Statistics::LTU;
use Carp;

@Statistics::LTU::RLS::Inherit::ISA = @Statistics::LTU::RLS::ISA = 
    qw( Statistics::LTU );

sub new {
    my($type, $nvars, $scaling) = @_;

    #  I don't know how to get inheritance to work automatically here
    my($self) = new Statistics::LTU($nvars, $scaling);

    my($length) = $nvars + 1;

    ##
    ##  Allocate temporary space for the RLS rule.
    ##

    my($p) =               [ (0.0) x ($length * $length) ];
    $self->[$RLS_Pi] =     $p;
    $self->[$RLS_TMP_1i] = [ (0.0) x $length ];
    $self->[$RLS_TMP_2i] = [ (0.0) x $length ];

    ##  [Used to be rls_init_p]
    ##  Initialize the p matrix to an arbitrarily large value along the
    ##  diagonal, and zero everywhere else.  Suggested in the algorithm, p 27.
    ##
    ##  Young says that "large diagonal elements [for the p matrix] (say 10^6
    ##  in general) will yield convergence and performance comensurate with the
    ##  stage-wise solution of the same problem" (p 27).  I have found that
    ##  values larger than 10^6 sometimes converge to more accurate weights;
    ##  however, values that are too large (e.g. 2.0 * 10^15) produce less
    ##  accurate weights.  The value defined below is intended to be as large
    ##  as is possible without adversely affecting performance.  It was
    ##  determined empirically.
    ##

    my($DIAGONAL_VALUE) = 10.0e+6;

    my($i, $j);
    for $i (0 .. $length-1) {

	$p->[$i * $length + $i] = $DIAGONAL_VALUE;

	for $j ($i+1 .. $length-1) {

	    $p->[$i * $length + $j] = 0.0;
	    $p->[$j * $length + $i] = 0.0;
	}
    }

    bless $self;
}


sub copy {
    my($self) = @_;

    my($new) = $self->Statistics::LTU::RLS::Inherit::copy($self);

    ##
    ##  Copy the RLS data.  (The tmp_1 and tmp_2 vectors
    ##  aren't copied because their contents are temporary.)
    ##

    $new->[$RLS_Pi] = [@{$self->[$RLS_Pi]}];

    bless $new;
}

##
##  TRAIN trains the specified linear threshold unit on a particular
##  instance_vector.  The training rule is taken from Young's "Recursive
##  Estimation and Time-Series Analysis" book, pp26-27.
##  Return value is undefined.

sub train {
    my($self, $instance_vector, $desired_value) = @_;

    ##
    ##  First, translate and scale the attributes.  The scale factor can only
    ##  be changed when the weights are being changed, because a change to the
    ##  scale factor invalidates the current set of weights.
    ##

    $self->maintain_scaling_factors($instance_vector);
    $self->_scale_attributes($instance_vector);

    ##
    ##  Now, update the weights.  This is done in two parts, 
    ##  updating the P matrix and A vector, respectively.
    ##
    $self->rls_update_p;
    $self->rls_update_a($desired_value);

    $self->update_weight_min_max;
    undef;
}


##
##  RLS_UPDATE_P implements equation II(1) on p26.  This is the first half of
##  the RLS algorithm.
##
##  The terminology matches what is in the book.  The algorithm computes
##  new values for array p at time k, based upon the vector x at time k and
##  the array p at time k-1.
##
##  The temporary arrays pkm1_xk and xkT_pkm1 are provided by the caller, to
##  eliminate the overhead of creating and destroying temporary arrays.
##

sub rls_update_p {
    my($self) = @_;

    my($p) =        $self->[$RLS_Pi];
    my($length) =   $self->[$LENGTHi];
    my($xk) =       $self->[$ATTRIBUTEi];
    my($pkm1_xk) =  $self->[$RLS_TMP_1i];
    my($xkT_pkm1) = $self->[$RLS_TMP_2i];

    my($i, $j);

    ##
    ##  Multiply pkm1 (the array p at time k-1) by xk (x at time k).
    ##

    for ($i=0; $i<$length; $i++) {
	$pkm1_xk->[$i] = 0.0;

	for ($j=0; $j<$length; $j++) {
	    $pkm1_xk->[$i] += $p->[$i * $length + $j] * $xk->[$j];
	}
    }

    ##
    ##  Get the scalar value [1 + xkT_pkm1_xk] ^ -1.  Call it scalar_value.
    ##  This can't be done until after pkm1_xk has been computed.
    ##
    my($scalar_value) = 1.0;

    for ($i=0; $i<$length; $i++) {
	$scalar_value += $pkm1_xk->[$i] * $xk->[$i];
    }

    $scalar_value = 1.0 / $scalar_value;

    ##
    ##  Fold the scalar_value into pkm1_xk.  This is more efficient than doing
    ##  it later, because pkm1_xk is an nx1 array and later arrays will be nxn.
    ##

    for $i (0 .. $length-1) {
	$pkm1_xk->[$i] *= $scalar_value;
    }

    ##
    ##  Multiply xkT (x at time k, transposed) by pkm1 (the array p 
    ##  at time k-1).
    ##

    for $i (0 .. $length-1) {
	$xkT_pkm1->[$i] = 0.0;

	for $j (0 .. $length-1) {
	    $xkT_pkm1->[$i] += $xk->[$j] * $p->[$j * $length + $i];
	}
    }

    ##
    ##  Multiply pkm1_xk by xkT_pkm1.  The result is used to update p, so it
    ##  does not need to be stored explicitly.
    ##

    for $i (0 .. $length-1) {
	for $j (0 .. $length-1) {
	    $p->[$i * $length + $j] -= $pkm1_xk->[$i] * $xkT_pkm1->[$j];
	}
    }
}

##
##  RLS_UPDATE_A implements equation II(2) on p26.  This is the second half of
##  the RLS algorithm.
##
sub rls_update_a {
    my($self, $yk) = @_;

    my($a) = 		$self->[$WEIGHTi];
    my($p) = 		$self->[$RLS_Pi];
    my($length) = 	$self->[$LENGTHi];
    my($xk) = 		$self->[$ATTRIBUTEi];
    
    ##
    ##  Multiply xkT (x at time k, transposed) by akm1 (a at time k-1).  The
    ##  result is a scalar value.  Subtract yk (y at time k).
    ##

    my($scalar_value) = 0.0 - $yk;
    my($i, $j);

    for ($i=0 ; $i<$length ; $i++) {
	$scalar_value += $xk->[$i] * $a->[$i];
    }

    ##
    ##  Multiply pk (p at time k) by xk (x at time k).  This is what the
    ##  algorithm calls Kk.  Matrix Kk is used to update a, so it does not
    ##  need to be stored explicitly.
    ##

    my($sum_of_products);

    for $i (0 .. $length-1) {

	$sum_of_products = 0.0;

	for $j (0 .. $length-1) {

	    $sum_of_products += $p->[$i * $length + $j] * $xk->[$j];
	}

	$a->[$i] -= $sum_of_products * $scalar_value;
    }
}


##
##  RESTORE restores a linear threshold unit from the specified file.
##

sub restore {
  my($class, $file_name) = @_;
  
  if (!open(FILE, "<$file_name")) {
    carp("restore: $file_name: $!\n");
    return(0);
  }
  
  ##
  ##  Check that the LTU is consistent with this version.
  ##
  my($ltu_version);
  $ltu_version = <FILE>;
  chop($ltu_version);
  
  if ($ltu_version < $read_LTU_files_back_to) {
    carp "restore: $file_name written with LTU version $ltu_version.\n";
    carp "This is version $Statistics::LTU::VERSION, which only understands\n";
    carp "LTU files back to version $read_LTU_files_back_to\n";
    carp "LTU not restored!\n";
    return(0);
  }
  
  ## First, read the type, the number of weights, and whether or not scaling
  ##  is enabled.  Then, create a linear threshold unit.  Note that the
  ##  number of variables in the training instances is one less than the
  ##  number of weights (one weight is a constant).
  ##
  
  my($line);
  $line = <FILE>;
  chop($line);
  my($type, $length, $scaling, $cycles_since_weight_change, $origin) =
      split(/ /, $line);
  
  my($new) = $type->new($length-1, $scaling);
  
  $new->[$CYCLES_SINCE_WEIGHT_CHANGEi] = $cycles_since_weight_change;
  $new->[$ORIGIN_RESTRICTIONi]         = $origin;
  
  ##
  ##  Now, read the weights, the minimum value of the attributes, and the
  ##  maximum value of the attributes, from the file, and store them in the
  ##  linear threshold unit.
  
  my($new_weight) = 		$new->[$WEIGHTi];
  my($new_weight_min) = 	$new->[$WEIGHT_MINi];
  my($new_weight_max) = 	$new->[$WEIGHT_MAXi];
  my($new_attrib_min) = 	$new->[$ATTRIB_MINi];
  my($new_attrib_max) = 	$new->[$ATTRIB_MAXi];
  my($new_attrib_scale) = 	$new->[$ATTRIB_SCALEi];
  
  my($i, @fields);
  
  for $i (0 .. $length-1) {
    $line = <FILE>;  chop($line);
    
    @fields =  split(/ /, $line);
    
    ($new_weight->[$i], $new_weight_min->[$i], $new_weight_max->[$i]) =
	@fields;
    
    if ($scaling == $LTU_SCALING_ON) {
      
      ($new_attrib_min->[$i], $new_attrib_max->[$i]) = @fields[3,4];
      
      if ($new_attrib_max->[$i] > $new_attrib_min->[$i]) {
	$new_attrib_scale->[$i] = 1.0 /
	    ($new_attrib_max->[$i] - $new_attrib_min->[$i]);
      }
    }
  }
  
  #####  Pick up final theta value
  $line = <FILE>;  chop($line);
  $new->[$WEIGHTi]->[$length-1] = $line;
  
  ##
  ##  Read information specific to RLS LTU's.
  ##
  
  $line = <FILE>; chop($line);
  $new->[$RLS_Pi] = [(split(/ /, $line))];
  
  close(FILE);
  
  $new;
}


##
##  SAVE saves the linear threshold unit in the specified file.
##  If the file can't be created for some reason, 0 is returned.  Otherwise, 1
##  is returned.
##

sub save {
    my($self, $file_name) = @_;
    my($length) = $self->[$LENGTHi];

    if (!open(FILE, ">$file_name")) {
	carp("Statistics::LTU::save: $file_name: $!");
	return(0);
    }

    ##
    ##  Stamp the file with a version number.
    ##
    print FILE $Statistics::LTU::VERSION, "\n";

    ## First, print the type, the number of weights and whether or not scaling
    ##  is enabled.  Then, print the weights, the minimum value of the
    ##  attribute, and the maximum value of the attribute.  Print 1
    ##  attribute/line, just to make it easier to read.

    print FILE join(' ', ((ref $self),
			  $self->[$LENGTHi],
			  $self->[$SCALINGi],
			  $self->[$CYCLES_SINCE_WEIGHT_CHANGEi],
			  $self->[$ORIGIN_RESTRICTIONi]
			  )), 
			      "\n";

    my($i);
    for $i (0 .. $length-1) {

	printf FILE "%lf %lf %lf ", 
		$self->[$WEIGHTi]->[$i],
		$self->[$WEIGHT_MINi]->[$i],
	$self->[$WEIGHT_MAXi]->[$i];

	if ($self->[$SCALINGi] == $LTU_SCALING_ON) {
	    printf FILE "%lf %lf ",
		    $self->[$ATTRIB_MINi]->[$i],
	    $self->[$ATTRIB_MAXi]->[$i];
	}

	print FILE "\n";
    }

    printf FILE "%lf\n", $self->[$WEIGHTi]->[$length-1];

    ##
    ##  Write out information specific to RLS LTU's.
    ##

    print FILE join(' ', @{$self->[$RLS_Pi]}), "\n";
    
    close(FILE);

    1;
}


package Statistics::LTU::LMS;
use Statistics::LTU;

@Statistics::LTU::LMS::ISA = qw( Statistics::LTU );

##
##  TRAIN trains the specified linear threshold unit on a particular
##  instance_vector.  It returns 1 if the linear threshold unit already
##  classified the instance_vector correctly, otherwise it returns 0.
##  The training rule is the least-mean-square (LMS) rule, devised by
##  Widrow and Hoff in 1960.  The advantage of the LMS rule over absolute
##  correction or fixed increment is that it tends to minimize the
##  mean-squared error, even when the classes are not linearly separable.
##  See the AI Handbook or Duda & Hart for more information.
##

sub train {
    my($self, $instance_vector, $desired_value, $rho) = @_;

    my($length) =    $self->[$LENGTHi];
    my($weight) =    $self->[$WEIGHTi];
    my($attribute) = $self->[$ATTRIBUTEi];
    ##
    ##  Make sure that rho makes sense.  If it doesn't, default to 0.2.  This
    ##  number was chosen empirically, on the basis of limited experimentation.
    ##

    $rho = 0.2 if !defined($rho) or $rho <= 0.0;

    ##
    ##  Only train the linear threshold unit if it does not classify correctly.
    ##

    my($actual_value) = $self->test($instance_vector);

    return(1) if ((($actual_value   < $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value  < $Statistics::LTU::LTU_THRESHOLD)) ||
		  (($actual_value  >= $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));

    ## The scale factor can only be changed when the weights are being
    ##  changed, because a change to the scale factor invalidates the current
    ##  set of weights.

    $self->maintain_scaling_factors($instance_vector);
    $self->_scale_attributes($instance_vector);

    ##
    ##  Decide how much to adjust the weights by.  If the actual_value is 0,
    ##  then the least-mean square rule won't change the weights (it multiplies
    ##  by 0).  Therefore, use the fixed-increment rule (which is slower, but
    ##  also guaranteed to converge, if convergence is possible) when the
    ##  actual value is 0.  As far as I know, this modification of the LMS rule
    ##  is original (and harmless).
    ##

    my($delta) = 
	($actual_value == 0.0) ?  
	    $rho * $desired_value
		:  $rho * (- $actual_value);

    ##
    ##  Now, adjust the weights.  The last weight is handled differently, 
    ##  because it is always applied to the constant 1.
    ##

    my($i);
    for $i (0 .. $length-1) {
	$weight->[$i] += $delta * $attribute->[$i];
    }
    $self->update_weight_min_max;

    0;

}


package Statistics::LTU::TACR;
use Statistics::LTU;

@Statistics::LTU::TACR::ISA = qw( Statistics::LTU );

##
##  TRAIN trains the specified linear threshold unit on a particular
##  instance_vector.  It returns 1 if the linear threshold unit already
##  classified the instance_vector correctly, otherwise it returns 0.
##  The training rule is the Thermal Absolute Correction Rule, taken from
##  Frean's "Learning in Single Perceptrons" dissertation.
##

sub train {
    my($self, $instance_vector, $desired_value, $temp, $rate) = @_;

    my($length) = $self->[$LENGTHi];

    ##
    ##  Only train the linear threshold unit if it does not classify correctly.
    ##

    my($actual_value) = $self->test($instance_vector);

    return(1) if ((($actual_value   < $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value  < $Statistics::LTU::LTU_THRESHOLD)) ||
		  (($actual_value  >= $Statistics::LTU::LTU_THRESHOLD) && 
		   ($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));

    ##
    ##  If either temp or rate is 0, then no weight adjustment takes place.
    ##  This shouldn't occur, but if it does, handle it easily and quickly.
    ##  Note that not handling it allows a divide by zero later.
    ##

    return(0) if ($temp <= 0) or ($rate <= 0);

    ##
    ##  The scale factor can only be changed when the weights are being 
    ##  changed, because a change to the scale factor invalidates the current 
    ##  set of weights.
    ##
    
    $self->maintain_scaling_factors($instance_vector);
    $self->_scale_attributes($instance_vector);

    ##
    ##  Decide how much to adjust the weights by.  The absolute correction rule
    ##  (which provides the fastest learning) requires the dot product of the
    ##  instance vector, so do that first.
    ##
    my($attribute) = $self->[$ATTRIBUTEi];
    my($dot_product) = 0.0;
    my($i);
    
    for $i (0 .. $length-1) {
	$dot_product += ($attribute->[$i] * $attribute->[$i]);
    }
    my($delta) = 1.0 + int(abs($actual_value) / $dot_product);

    $delta = -$delta if $desired_value < $Statistics::LTU::LTU_THRESHOLD;

    ##
    ##  This is the thermal part of the rule.  
    ##
    $delta *= $rate * exp((- abs($actual_value) / $temp));

    ##
    ##  Now, adjust the weights.  The last weight is handled differently, 
    ##  because it is always applied to the constant 1.
    ##

    my($weight) = $self->[$WEIGHTi];

    for $i (0 .. $length-1) {
	$weight->[$i] += ($delta * $attribute->[$i]);
    }
    $self->update_weight_min_max;

    0;
}


{ package main; eval join('',<DATA>) || die $@ unless caller(); }

1;##### End of LTU.pm
__END__


#
#  Test code
#
package main;
use Statistics::LTU;

srand;

@LTUs = (new Statistics::LTU::ACR(2, 1),
	 new Statistics::LTU::RLS(2, 1),
	 new Statistics::LTU::TACR(2, 1),
	 new Statistics::LTU::LMS(2, 1)
	 );

#  Create examples
my($x, $y, $class);
for (1 .. 20) {
    $x = rand;  $y = rand;
    $class = (($x-.5) > $y) ? 1 : -1;
    push(@::Examples, [[$x,$y],$class]);
}

my($ltu, $save_name, $ltu_restored, $ltu_copied, $tolerance);

$tolerance = 0.0001;

my(@OtherArgs);
my($temp, $rate, $rho);
$temp = 0.1;
$rate = 0.01;
$rho = 0.1;


foreach $ltu (@::LTUs) {

    $ltu->set_origin_restriction(0);

    if (ref($ltu) =~ /TACR/) {
	@OtherArgs = ($temp, $rate);
    } elsif (ref($ltu) =~ /LMS/) {
	@OtherArgs = ($rho);
    } else {
	@OtherArgs = ();
    }

    my($example);

    foreach $example (@::Examples) {
	$ltu->maintain_scaling_factors($example->[0]);
    }

    for (1 .. 10) {
	print "\n\nITERATION $_\n";

	foreach $example (@::Examples) {
	    $ltu->train($example->[0], $example->[1], @OtherArgs);
	}
	$ltu->print;
    }

    $save_name = ref($ltu) . ".saved";
    $ltu->save($save_name);
    
    $ltu_restored = $ltu->restore($save_name);
    $ltu_copied   = $ltu->copy;
    
    foreach $example (@::Examples) {
	if (abs($ltu->test($example->[0]) - $ltu_restored->test($example->[0]))
	    > $tolerance) {
	    warn "Original and restored LTUs disagree!";
	    $ltu->print;
	    $ltu_restored->print;
	    die "SAVE/RESTORE TEST FAILED!";
	    }

	if (abs($ltu->test($example->[0]) - $ltu_copied->test($example->[0]))
	    > $tolerance) {
	    warn "Original and copied LTUs disagree!";
	    $ltu->print;
	    $ltu_copied->print;
	    die "COPY TEST FAILED!";
	}
    }
    $ltu->destroy;
    $ltu_restored->destroy;
    $ltu_copied->destroy;
}

print "Tests passed\n";

1;