# Sort::Array.pm # # Copyright (c) 2001 Michael Diekmann . All rights # reserved. This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # Documentation could be found at the bottom or use (after install): # > perldoc Sort::Array package Sort::Array; require 5.003_03; require Exporter; use strict; use vars qw(@EXPORT_OK @ISA $VERSION $error); @ISA = qw(Exporter); # we export nothing by default :) @EXPORT_OK = qw( Sort_Table Discard_Duplicates ); $VERSION = '0.26'; #///////////////////////////////////////////////////////////////////////# # # #///////////////////////////////////////////////////////////////////////# sub Discard_Duplicates { # Get the args and put them into a Hash. my (%arg) = @_; $error = 0; # Check for content that should be sorted, # else return error-code. if (scalar(@{$arg{data}}) == 0) { $error = 104; return undef; } my $use_warn = 0; # Turn warnings off, because we do first a '<=>' and if that # fails, we do a 'cmp'. And then a warning comes up. # After working, we turn $^W to the same as before. if ($^W) { $use_warn = $^W; $^W = 0; } # Find duplicates and sort them out. my %seen = (); my @unique = grep { ! $seen{$_}++ } @{$arg{data}}; %seen = (); # Check if is set, if empty do not sort them. if ($arg{sorting} eq 'ascending') { # Sorting content ascending order. @unique = sort { $a <=> $b || $a cmp $b } @unique; } elsif ($arg{sorting} eq 'descending') { # Sorting content descending order. @unique = sort { $b <=> $a || $b cmp $a } @unique; } # Turn warnings to the same as before. if ($use_warn) { $^W = $use_warn; } # Remove all empty fields, if wished. if ($arg{empty_fields} eq 'delete') { @_ = (); foreach (@unique) { push(@_, $_) if $_; } @unique = @_; } # return @unique; @{$arg{data}} = @unique; } #///////////////////////////////////////////////////////////////////////# # # #///////////////////////////////////////////////////////////////////////# sub Sort_Table { # Get the args and put them into a Hash. my (%arg) = @_; $error = 0; # Check if is set, # else return error-code. if ((! $arg{cols}) && ($arg{cols} !~ /0-9/)) { $error = 100; return undef; } # Check if is set, # else return error-code. if ((! $arg{field}) && ($arg{field} !~ /0-9/)) { $error = 101; return undef; } # Check if is set, # else return error-code. if ((! $arg{sorting}) && (($arg{sorting} ne 'ascending') || ($arg{sorting} ne 'descending'))) { $error = 102; return undef; } # Check if set, # else return error-code. if (! $arg{structure}) { $error = 103; return undef; } # Check for content that should be sorted, # else return error-code. if (scalar(@{$arg{data}}) == 0) { $error = 104; return undef; } # Check is set, # else set the standard > ";" if (! $arg{separator}) { $arg{separator} = ';'; } # Subtract 1 for better readable Arrayfields -> # beginning count at 1 (not 0). ;) $arg{cols}--; $arg{field}--; if ($arg{structure} eq 'single') { # Array is not semicolon-separated and we must # convert it to semicolon-separated. @_ = (); my $i=0; while (defined ${$arg{data}}[$i] ne '') { my $tmp=''; for (0..$arg{cols}) { $tmp .= "${$arg{data}}[$i+$_]"; if ($_ != $arg{cols}) { $tmp .= "$arg{separator}"; } } push(@_, $tmp); $i += $arg{cols} + 1; } @{$arg{data}} = @_; } my $use_warn = 0; # Turn warnings off, because we do first a '<=>' and if that # fails, we do a 'cmp' and then a warning comes up. # After sorting, we turn $^W to the same as before. if ($^W) { $use_warn = $^W; $^W = 0; } if ($arg{sorting} eq 'ascending') { # Sorting content ascending order. @{$arg{data}} = map { $_->[0] } sort { $a->[1] <=> $b->[1] || $a->[1] cmp $b->[1] } map { [ $_, (split(/$arg{separator}/))[$arg{field}] ] } @{$arg{data}}; } elsif ($arg{sorting} eq 'descending') { # Sorting content descending order. @{$arg{data}} = map { $_->[0] } sort { $b->[1] <=> $a->[1] || $b->[1] cmp $a->[1] } map { [ $_, (split(/$arg{separator}/))[$arg{field}] ] } @{$arg{data}}; } # Turn warnings to the same as before. if ($use_warn) { $^W = $use_warn; } # Return the sorted Array in the # same format as input. if ($arg{structure} eq 'csv') { return @{$arg{data}}; } elsif ($arg{structure} eq 'single') { @_ = (); foreach (@{$arg{data}}) { push(@_, split(/$arg{separator}/)); } return @_; } } 1; __END__ #///////////////////////////////////////////////////////////////////////# # # #///////////////////////////////////////////////////////////////////////# =head1 NAME Sort::Array - This extended sorting algorithm allows you to a) sort an array by ANY field number, not only the first. b) find duplicates in your data-set and sort them out. The function is case-sensitive. Future versions might come without this limitation. =head1 SYNOPSIS use Sort::Array qw( Sort_Table Discard_Duplicates ); @data = Sort_Table( cols => '4', field => '4', sorting => 'descending', structure => 'csv', separator => '\*', data => \@data, ); @languages = Discard_Duplicates( sorting => 'ascending', empty_fields => 'delete', data => \@languages, ); =head1 DESCRIPTION Sort_Table() is capable of sorting table-form arrays by a particular value. Discard_Duplicates() discards doubles from an array and returns the sorted array. =head2 Usage @data = Sort_Table( cols => '4', field => '4', sorting => 'descending', structure => 'csv', separator => '\*', data => \@data, ); @languages = Discard_Duplicates( sorting => 'ascending', empty_fields => 'delete', data => \@languages, ); =over 1 =item cols How many columns in a line. Integer beginning at 1 (not 0) (for better readability). e.g.: '4' = Four fields at one line. ($array[0..3]) - Utilizable only in Sort_Table() - Must be declared =item field Which column should be used for sorting. Integer beginning at 1 (not 0). e.g.: '4' = Sorting the fourth field. ($array[3]) - Utilizable only in Sort_Table() - Must be declared =item sorting In which order should be sorted. e.g.: 'ascending' or 'descending' - Utilizable in Sort_Table() - Must be declared - Utilizable in Discard_Duplicates() - Can be declared (if empty, it does not sort the array) =item empty_fields Should empty fields removed e.g.: 'delete' or not specified - Utilizable only in Discard_Duplicates() - Can be declared =item structure Structure of that Array. e.g.: 'csv' or 'single' - Utilizable only in Sort_Table() - Must be declared =item separator Which separator should be used? Only needed when structure => 'csv' is set. If left empty default is ";". For ?+*{} as a separator you must mask it since it is a RegEx. e.g.: \? or \* ... - Utilizable only in Sort_Table() - Must be declared when using 'csv' or ';' will be used. =item data Reference to the array that should be sorted. - Utilizable in Sort_Table() and Discard_Duplicates() - Must be declared =back If everything went right, Sort_Table() returns an array containing your sorted Array. The structure from the imput-array is kept although it's sorted. ;) =head2 Returncodes If an error occurs, than will be returned an undefinied array and set $Sort::Array::error with one of the following code. Normally $Sort::Array::error is 0. The following codes are returned, if an error occurs: =over 2 =item '100' is empty or not set or contains wrong content. =item '101' is emtpy or not set or contains wrong content. =item '102' is empty or contains not 'ascending' or 'descending'. =item '103' is empty or contains not 'csv' or 'single'. =item '104' is empty (your reference array). =back =head1 EXAMPLES Here are some short samples. These should help you getting used to Sort::Array =head2 Sorting CSV-Lines in an array my @data = ( '00003*layout-3*19990803*0.30', '00002*layout-2*19990802*0.20', '00004*layout-4*19990804*0.40', '00001*layout-1*19990801*0.10', '00005*layout-5*19990805*0.50', '00007*layout-7*19990807*0.70', '00006*layout-6*19990806*0.60', ); @data = Sort_Table( cols => '4', field => '4', sorting => 'descending', structure => 'csv', separator => '\*', data => \@data, ); Returns an array (with CSV-Lines): 00007*layout-7*19990807*0.70 00006*layout-6*19990806*0.60 00005*layout-5*19990805*0.50 00004*layout-4*19990804*0.40 00003*layout-3*19990803*0.30 00002*layout-2*19990802*0.20 00001*layout-1*19990801*0.10 =head2 Sorting single-fields in an array my @data = ( '00003', 'layout-3', '19990803', '0.30', '00002', 'layout-2', '19990802', '0.20', '00004', 'layout-4', '19990804', '0.40', '00001', 'layout-1', '19990801', '0.10', '00005', 'layout-5', '19990805', '0.50', '00007', 'layout-7', '19990807', '0.70', '00006', 'layout-6', '19990806', '0.60', ); @data = Sort_Table( cols => '4', field => '4', sorting => 'descending', structure => 'single', data => \@data, ); Returns an array (with single fields) 00007 layout-7 19990807 0.70 00006 layout-6 19990806 0.60 00005 layout-5 19990805 0.50 00004 layout-4 19990804 0.40 00003 layout-3 19990803 0.30 00002 layout-2 19990802 0.20 00001 layout-1 19990801 0.10 =head2 Discard duplicates in an array: my @languages = ( '', 'German', 'Dutch', 'English', 'Spanish', '', 'German', 'Spanish', 'English', 'Dutch', ); @languages = Discard_Duplicates( sorting => 'ascending', empty_fields => 'delete', data => \@languages, ); Returns an array (with single fields): Dutch English German Spanish =head1 BUGS No Bugs known for now. ;) =head1 HISTORY =item - 2001-08-25 / 0.26 File permission fixed, now anybody can extract the archive, not only the user 'root'. =item - 2001-08-23 / 0.25 Changed the Discard_Duplicates() function to discard duplicates and only sort the array if wished. You can set to 'asending', 'desending' or let them empty to disable sorting. Some misspelling corrected. =item - 2001-08-17 / 0.24 Error codes are no longer returned in an array (that array that contains the sorted Data). $Sort::Array::error is used with the code instead. =item - 2001-07-28 / 0.23 First beta-release, non-public =head1 AUTHOR Michael Diekmann, =head1 THANKS Rainer Luedtke, =head1 COPYRIGHT Copyright (c) 2001 Michael Diekmann . All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO perl(1). =cut