"""
Statistics related stuff in igraph
"""
__license__ = """
Copyright (C) 2006-2007 Gabor Csardi <csardi@rmki.kfki.hu>,
Tamas Nepusz <ntamas@rmki.kfki.hu>
MTA RMKI, Konkoly-Thege Miklos st. 29-33, Budapest 1121, Hungary
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
"""
class Interval:
"""A class representing an interval over the real numbers"""
def __init__(self, left, right):
"""Constructor.
@param left: the left edge of the interval
@param right: the right edge of the interval"""
if left == right:
raise ArgumentError, "Interval can't have zero length"""
self._left = min(left, right)
self._right = max(left, right)
def __contains__(self, x):
"""Returns C{True} if x is in the interval, C{False} otherwise"""
return (x >= self._left and x < self._right)
def __eq__(self, x):
"""Tests for the equality of two intervals"""
return (isinstance(x, Interval) and \
x.left == self._left and x.right == self._right)
def __cmp__(self, x): return cmp(self._left, x.left)
def __getattr__(self, attr):
if attr == "left": return self._left
if attr == "right": return self._right
return object.__getattr__(self, attr)
def __hash__(self): return hash(self._left) | hash(self._right)
def __str__(self): return "[%f, %f)" % (self._left, self._right)
class Histogram(object):
"""Generic histogram class for real numbers
Example:
>>> h = Histogram(5) # Initializing, bin width = 5
>>> h << [2,3,2,7,8,5,5,0,7,9] # Adding more items
>>> print h
N = 10, mean +- sd: 4.8000 +- 2.9740
[ 0.000, 5.000): ****
[ 5.000, 10.000): ******
"""
def __init__(self, bin_width = 1, data = []):
"""Initializes the histogram with the given data set.
@param bin_width: the bin width of the histogram.
@param data: the data set to be used. Must contain real numbers.
"""
self._bin_width = float(bin_width)
self.clear()
self.add_many(data)
def _get_bin(self, num, create = False):
"""Returns the bin corresponding to the given number.
@param num: the number for which the bin is being sought
@param create: whether to create a new bin if no bin exists yet.
@return: the range of the bin or C{None} if no bin exists yet and
{create} is C{False}."""
for bin in self._bins.keys():
if num in bin: return bin
if create:
left = (num // self._bin_width) * self._bin_width
right = left + self._bin_width
bin = Interval(left, right)
self._bins[bin] = 0
if self._min is None or left < self._min: self._min = left
if self._max is None or right > self._max: self._max = right
return bin
return None
def _get_n(self): return self._n
def _get_mean(self): return self._mean
def _get_sd(self): return self._sd
def _get_var(self): return self._sd ** 2
n = property(_get_n, doc="Number of elements in the histogram")
mean = property(_get_mean, doc="Mean of the elements")
sd = property(_get_sd, doc="Standard deviation of the elements")
var = property(_get_var, doc="Variance of the elements")
def add(self, num):
"""Adds a single number to the histogram.
@param num: the number to be added"""
bin = self._get_bin(num, True)
self._bins[bin] += 1
self._n += 1
delta = num - self._mean
self._mean += (delta / self._n)
self._s += delta * (num - self._mean)
if self._n > 1:
self._sd = (self._s / (self._n-1)) ** 0.5
def add_many(self, data):
"""Adds a single number or elements of an iterable to the histogram.
@param data: the data to be added"""
try:
it = iter(data)
except:
it = iter([data])
for x in it: self.add(x)
__lshift__ = add_many
def clear(self):
"""Clears the collected data"""
self._bins = {}
self._min = None
self._max = None
self._n = 0
self._mean = 0.0
self._s = 0.0
self._sd = 0.0
def __str__(self):
"""Returns the string representation of the histogram"""
if self._min is None or self._max is None: return str()
num_length = max(len("%.3f" % self._min), \
len("%.3f" % self._max))
format_string = "[%%%d.3f, %%%d.3f): %%s" % (num_length, num_length)
#bins = self._bins.keys()
#bins.sort()
maxval = max(self._bins.itervalues())
scale = maxval // (70-2*num_length)
if scale<1: scale = 1
result=["N = %d, mean +- sd: %.4f +- %.4f " % \
(self.n, self.mean, self.sd)]
if scale>1: result.append("Each * represents %d items" % scale)
x = self._min
while x < self._max:
bin = self._get_bin(x)
if bin is None:
cnt = 0
else:
cnt = self._bins[bin] // scale
result.append(format_string % (x, x+self._bin_width, '*'*cnt))
x += self._bin_width
return "\n".join(result)
syntax highlighted by Code2HTML, v. 0.9.1