package Data::TemporaryBag;

use strict;

use Fcntl qw/:DEFAULT :seek/;
use Carp;
use File::Temp 'tempfile';

use overload '""' => \&value, '.=' => \&add, '=' => \&clone, fallback => 1;
use constant BUFFER      => 0;
use constant FILENAME    => 1;
use constant FILEHANDLE  => 2;
use constant STARTPOS    => 3;
use constant RECENTNESS  => 4;
use constant FINGERPRINT => 4;
use constant LENGTH      => 5;

our ($VERSION, $Threshold, $TempPath, $MaxOpen);

$VERSION = '0.09';

$Threshold = 10; # KB
#$TempPath  = $::ENV{'TEMP'}||$::ENV{'TMP'}||'.';
$TempPath = '';
$MaxOpen = 10;

my %OpenFiles;

sub new {
    my $class = shift;
    my $self = [''];
    
    bless $self, ref($class)||$class;
    
    $self->[LENGTH] = 0;
    $self->add(@_) if @_;
    $self;
}

sub clear {
    my $self = $_[0];

    &_clear_buffer;
    $self->[LENGTH] = 0;
}

sub _clear_buffer {
    my $self = shift;
    my $fn = $self->[FILENAME];

    if ($fn) {
	$self->_close if $self->[FILEHANDLE];
	unlink $fn; 
	@{$self}[FILENAME..FINGERPRINT] = ();
    }
    $self->[BUFFER] = '';
}

sub add {
    my ($self, $data) = @_;
    my $buf = \$$self[BUFFER];

    $data = '' unless defined $data;
    $self->[LENGTH] += CORE::length($data);

    if ($self->[FILENAME]) {
	my $fh = $self->_open;
	seek $fh, 0, SEEK_END;
	print $fh $data;
    } else {
	if (CORE::length($data) + CORE::length($$buf) > $Threshold * 1024) {
	    my $fh = $self->_open;
	    seek $fh, 0, SEEK_END;
	    print $fh $$buf, $data;
	} else {
	    $$buf .= $data;
	}
    }
    $self;
}

sub substr {
    my ($self, $pos, $size, $replace) = @_;
    my $len = $self->[LENGTH];
   
    $pos  = $len + $pos  if $pos  < 0;
    if (not defined $size or $size+$pos > $len) {
	$size = $len - $pos;
    } elsif ($size < 0) { 
	$size = $len + $size;
    }
    my $rsize = defined($replace) ? CORE::length($replace) : 0;
    my $offset = $size - $rsize;
    my $newlen = $len - $offset;

    if ($self->[FILENAME]) {
	my $data;
	my $fh = $self->_open;
	my $startpos = $self->[STARTPOS];

	return '' if $pos >= $len;
	seek($fh, $startpos+$pos, SEEK_SET);
	read($fh, $data, $size);
	if (defined $replace) {

	    if ($offset == 0) {
		my $fh = $self->_open;
		seek($fh, $pos + $startpos, SEEK_SET);
		print $fh $replace;
	    } elsif ($newlen < $Threshold * 800) {
		my $data1 = $self->substr(0, $pos);
		my $data2 = $self->substr($pos + $size);
		$self->_clear_buffer;
		$self->[BUFFER] = $data1.$replace.$data2;
		$self->[LENGTH] = $newlen;
	    } elsif ($pos == 0 and $startpos >= -$offset) {
		$self->[STARTPOS] += $offset;
		if ($rsize>0) {
		    seek($fh, $self->[STARTPOS], SEEK_SET);
		    print $fh $replace;
		}
	    } elsif ($pos+$size == $len) {
		seek($fh, $startpos+$pos, SEEK_SET);
		print $fh $replace;
		truncate($fh, $startpos+$newlen) if $newlen<$len;
	    } elsif ($offset > 0) {
		my ($data, $pos2);

		if ($pos < $len - $pos - $size) {
		    seek($fh, $startpos+$pos+$offset, SEEK_SET);
		    print $fh $replace;
		    _blktf_fw($fh, $startpos, $pos, $offset);
		    $self->[STARTPOS] += $offset;
		} else {
		    seek($fh, $startpos+$pos, SEEK_SET);
		    print $fh $replace;
		    my $start = $startpos+$pos+$size;
		    _blktf_bw($fh, $startpos+$pos+$size, $len-$pos-$size, $offset);
		    truncate($fh, $startpos+$newlen);
		}
	    } else {
		my $offset = $rsize-$size;
		my ($data, $pos2);

		if ($startpos >= $offset) {
		    _blktf_bw($fh, $startpos, $pos, $offset);
		    seek($fh, $startpos+$pos-$offset, SEEK_SET);
		    print $fh $replace;
		    $self->[STARTPOS] -= $offset;
		} else {
		    _blktf_fw($fh, $startpos+$pos+$size, $len-$pos-$size, $offset);
		    seek($fh, $startpos+$pos, SEEK_SET);
		    print $fh $replace;
		}
	    }
	    $self->[LENGTH] = $newlen;
	}
	return $data;
    } else {
	if (defined $replace) {
	    $self->[LENGTH] = $newlen;
	    substr($self->[BUFFER], $pos, $size, $replace);
	} else {
	    substr($self->[BUFFER], $pos, $size);
	}
    }
}

sub _blktf_fw {
    my ($fh, $start, $size, $offset) = @_;
    my ($pos2, $data);

    for ($pos2 = $start + $size-1024; $pos2 > $start; $pos2-=1024) {
	seek($fh, $pos2, SEEK_SET);
	read($fh, $data, 1024);
	seek($fh, $pos2+$offset, SEEK_SET);
	print $fh $data;
    }
    seek($fh, $start, SEEK_SET);
    read($fh, $data, $pos2 - $start+1024);
    seek($fh, $start+$offset, SEEK_SET);
    print $fh $data;
}

sub _blktf_bw {
    my ($fh, $start, $size, $offset) = @_;
    my ($pos2, $data);

    for($pos2 = $start; $pos2 < $start+$size-1024; $pos2+=1024) {
	seek($fh, $pos2, SEEK_SET);
	read($fh, $data, 1024);
	seek($fh, $pos2-$offset, SEEK_SET);
	print $fh $data;
    }
    seek($fh, $pos2, SEEK_SET);
    read($fh, $data, $start+$size-$pos2);
    seek($fh, $pos2-$offset, SEEK_SET);
    print $fh $data;
}


sub clone {
    my ($self, $stream)=@_;
    my $size = $self->[LENGTH];
    my $pos = 0;
    my $new = $self->new;

    while ($size > $pos) {
	$new->add($self->substr($pos, 1024));
	$pos += 1024;
    }
    $new->[LENGTH] = $size;
    $new;
}

sub value {
    my ($self, $stream)=@_;
    my $size = $self->length;
    my $pos = 0;
    my $data = '';

    while ($size > $pos) {
	$data .= $self->substr($pos, 1024);
	$pos += 1024;
    }
    $data;
}

sub length {
    shift->[LENGTH];

=pod

    my $self = shift;
    my $fn = $self->[FILENAME];
    my $fh = $self->[FILEHANDLE];

    if ($fh) {
	seek $fh, 0, SEEK_END;
	return tell($fh)- $self->[STARTPOS];
    } elsif ($fn) {
	return (-s $fn) - $self->[STARTPOS];
    } else {
	return length($self->[BUFFER]);
    }

=cut

}

sub defined {
    defined shift->[BUFFER];
}

sub _open {
    my ($self, $mode) = @_;
    my ($fh, $fn);

    if (defined ($fh = $self->[FILEHANDLE])) {
	my $recent = $self->[RECENTNESS];
	return $fh if $recent == 1;
	$self->[RECENTNESS] = 0;
	while(my (undef, $obj) = each %OpenFiles) {
	    if ($obj->[RECENTNESS] <= $recent) {
		$obj->[RECENTNESS]++;
	    }
	}
	return $fh;
    }
    if (defined ($fn = $self->[FILENAME])) {
	croak "TemporaryBag object seems to be collapsed " if (!-e $fn) or (!-f _);
	sysopen($fh, $fn, O_RDWR) or croak "TemporaryBag object seems to be collapsed OP";
	croak "TemporaryBag object seems to be collapsed " if (-l $fn);
	binmode $fh;
	$self->[FILEHANDLE] = $fh;
	$self->_check_fingerprint or croak "TemporaryBag object seems to be collapsed CH";
    } else {
	($fh, $fn) = tempfile();
	$self->[STARTPOS] = 0;
	croak "TemporaryBag object seems to be collapsed CR" unless defined $fh;
	binmode $fh;
	$self->[FILEHANDLE] = $fh;
	$self->[FILENAME] = $fn;
    }

    while(my (undef, $obj) = each %OpenFiles) {
	++$obj->[RECENTNESS];
    }
    
    if (keys %OpenFiles >= $MaxOpen) {
	my $to_close;
	while(my (undef, $obj) = each %OpenFiles) {
	    if ($obj->[RECENTNESS] > $MaxOpen) {
		$to_close = $obj;
		last;
	    }
	}
	$to_close->_close;
    }

    $self->[RECENTNESS] = 1;
    $OpenFiles{overload::StrVal($self)} = $self;
    return $fh;
}

sub _close {
    my $self = shift;
    my $recent = $self->[RECENTNESS];
    my $fh = $self->[FILEHANDLE];
    my $i;

    delete $OpenFiles{overload::StrVal($self)};

    while(my (undef, $obj) = each %OpenFiles) {
	if (defined $obj and $obj->[RECENTNESS] > $recent) {
	    $obj->[RECENTNESS]--;
	}
    }
    $self->_set_fingerprint;
    undef $self->[FILEHANDLE];
    close $fh or croak "TemporaryBag object seems to be collapsed CL";
}


sub is_saved {
    return shift->[FILENAME];
}

sub _set_fingerprint {
    my $self = shift;
    my $fingerprint;
    my $fh =  $self->[FILEHANDLE];
    seek $fh, 0, SEEK_END;
    my $range = tell($fh) - $self->[STARTPOS] - 1024;

    for (1..3) {
	my $r = int(rand($range))+1024;
	my $data;
	seek $fh, -$r, SEEK_END;
	read($fh, $data, 1024);
	$fingerprint .= "[$r]".unpack('%32C*',$data);
    }
    $self->[FINGERPRINT] = $fingerprint;
}

sub _check_fingerprint {
    my $self = shift;
    my $fh =  $self->[FILEHANDLE];
    my $fingerprint = $self->[FINGERPRINT];
    my $flag = 1;

    while($fingerprint=~/\[([^]]+)\]([^[]+)/g) {
	my $pos = $1;
	my $sum = $2;
	my $data;

	seek $fh, -$pos, SEEK_END;
	read($fh, $data, 1024);
	$flag &&= (unpack('%32C*',$data) == $sum);
    }
    return $flag;
}



sub DESTROY {
    my $self = shift;
#    close $self->[FILEHANDLE] if defined $self->[FILEHANDLE];
    $self->_close if defined $self->[FILEHANDLE];
    unlink $self->[FILENAME] if defined $self->[FILENAME];
}



1;
__END__

=head1 NAME

Data::TemporaryBag - Handle long size data using temporary file .

=head1 SYNOPSIS

  use Data::TemporaryBag;

  $data = Data::TemporaryBag->new;
  # add long string
  $data->add('ABC' x 1000);
  # You can use an overridden operator
  $data .= 'DEF' x 1000;
  ...
  $substr = $data->substr(2997, 6);  # ABCDEF

=head1 DESCRIPTION

I<Data::TemporaryBag> module provides a I<bag> object class handling long size 
data.  The short size data are kept on memory.  When the data size becomes 
over I<$Threshold> size, they are saved into a temporary file internally.

=head2 METHOD

=over 4

=item Data::TemporaryBag->new( [$data] )

Creates a I<bag> object.

=item $bag->clear

Clears I<$bag>.

=item $bag->add( $data )

Adds I<$data> to I<$bag>.
You can use an assignment operator '.=' instead.

=item $bag->substr( $offset, $length, $replace )

Extracts a substring out of I<$bag>.  It behaves similar to 
CORE::substr except that it can't be an lvalue.

=item $bag->clone

Creates a clone of I<$bag>.

=item $bag->value

Gets data of I<$bag> as a string.  It is possible that the string is 
extremely long.

=item $bag->length

Gets length of data.

=item $bag->defined

Returns if the data in I<$bag> are defined or not.

=item $bag->is_saved

Returns the file name if I<$bag> is saved in a temporary file.

=back

=head2 GLOBAL VARIABLES

=over 4

=item $Data::TemporaryBag::Threshold

The threshold of the data size in kilobytes whether saved into file or not.
Default is 10.

=item $data::TemporaryBag::MaxOpen

The maximum number of the opened temporary files.
Default is 10.

=back

=head1 COPYRIGHT

Copyright 2001 Yasuhiro Sasama (ySas), <ysas@nmt.ne.jp>

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=cut


syntax highlighted by Code2HTML, v. 0.9.1