// //* \file rng.cc //* \brief Random number generator. // // This file is part of the libecc package. // Copyright (C) 2002, by // // Carlo Wood, Run on IRC // RSA-1024 0x624ACAD5 1997-01-26 Sign & Encrypt // Fingerprint16 = 32 EC A7 B6 AC DB 65 A6 F6 F6 55 DD 1C DC FF 61 // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // #include "sys.h" #include "debug.h" #include namespace libecc { /**\class rng rng.h libecc/rng.h * * \brief Pseudo Random Number Generator. * * This random number generator was designed from scratch. * It is fast (2.5 million bit/second on a 900 Mhz PC), * has a long period (2^521 - 1) and passes all known statistical * tests for randomness. * * \note * \htmlonly\endhtmlonly * The chosen period of 2^521 - 1 is by far large enough for any purpose; * when 15 billion people each using 10,000 PCs with a clock frequency of 100 Ghz * generate random numbers for the next 10 million years - then the chance that \e any two of * them ever generated a (partly) overlapping sequence of bits is less than * 0.000...(108 zeroes)...0001. * In order words, if you chose a good random seed, you \e will be * the only one who ever uses the resulting sequence of output bits. * \htmlonly\endhtmlonly * *

Theory

* * The basis of this Random Number Generator (RNG) is a shift register of 521 bit using nine feedback points. * Because such a feedback is linear, it is possible to * write any amount of shift as NEWSTATE = MATRIX * OLDSTATE. * The matrix that corresponds with a single shift can easily be determined * as function of the feedback points. * Let's call this matrix M0. * The matrix corresponding to a shift of 2 then corresponds to M1 = M0 * M0. * A shift of 4 corresponds to M2 = M1 * M1, etcetera. * * The period of the RNG will be equal to the number of different internal states that will * be used. In the optimal case all possible internal states, except all zeroes which would * lead to an output of only zeroes, will be used and the period of the RNG * will be equal to 2⁵²¹ - 1. * This hypothesis can be checked by confirming that M521 = M0^2⁵²¹ == M0 * and because 2⁵²¹ - 1 is a prime this value then must be the real period of the RNG. * Of course, one of the feedback points is fixed at bit 520 (feeding to bit 0). * By choosing the other feedback points close to bit 0, a very fast mangling of the bits is achieved. * * \note * \htmlonly\endhtmlonly * Even with a seed of '1' (a single bit) the first resulting 512 bits of output appear to be totally random already. * After 9 calls to generate_512_bits(), each bit in the pool has been set at least once. * \htmlonly\endhtmlonly * *

Verifying the period, an example

* * Suppose we have a shift register of 3 bits (2³-1 = 7 is a prime). * Let's use one feedback point from bit 1 (and from bit 2 of course), then we have: * *

 * bit        0    1    2(output)
 * state0:    a    b    c
 * state1:    b    c   a^b
 * state2:    c   a^b  b^c
 *   ...
 *

* * Which gives, with a starting seed of 001: * *

 * 001
 * 010
 * 101
 * 011
 * 111
 * 110
 * 100
 * and back to the top
 *

* * Or rotated 90 degrees: * *

 * bit
 *  0  0010111
 *  1  0101110
 *  2  1011100
 *

* * Each column can be considered as a vector: * *

 * a
 * b
 * c
 *

* * The matrix representing one shift now is: * *

 * m11 m12 m13  a   a m11 + b m12 + c m13    b
 * m21 m22 m23  b = a m21 + b m22 + c m23 =  c
 * m31 m32 m33  c   a m31 + b m32 + c m33    a+b mod 2 = a^b
 *

* * From which we can deduce the matrix * *

 *      0 1 0
 * M0 = 0 0 1
 *      1 1 0
 *

* * and that we have to do our calculation modulo 2. * * Note that * *

 *                0 1 0   0 1 0   0 0 1
 * M1 = M0 * M0 = 0 0 1   0 0 1 = 1 1 0
 *                1 1 0   1 1 0   0 1 1
 *
 *                0 0 1   0 0 1   0 1 1
 * M2 = M1 * M1 = 1 1 0   1 1 0 = 1 1 1
 *                0 1 1   0 1 1   1 0 1
 *

* * and * *

 *                0 1 1   0 1 1   0 1 0
 * M3 = M2 * M2 = 1 1 1   1 1 1 = 0 0 1 = M0
 *                1 0 1   1 0 1   1 1 0
 *
 *

* * Which proves that this repeats after 2³-1 = 7 times. * *

Predictability

* * Because the state of the RNG changes in a linear way (we can use matrices), * the output is predictable: the internal state can be calculated from a * small amount of output. * * In the above example assume that the RNG produces the output sequence xyz. * If the orginal state was abc then after three bits of output the state would * be: * *

 *       a   0 1 0   0 0 1   a   1 1 0   a   a+b
 * M0 M1 b = 0 0 1   1 1 0   b = 0 1 1   b = b+c      (mod 2)
 *       c   1 1 0   0 1 1   c   1 1 1   c   a+b+c
 *

* * while the output would be * *

 * x   0 0 1    a     0 0 0    a     0 0 0    a   1 1 0  a
 * y = 0 0 0 M0¹ b  +  0 0 1 M0² b  +  0 0 0 M0³ b = 0 1 1  b
 * z   0 0 0    c     0 0 0    c     0 0 1    c   1 1 1  c
 *

* * The inverse of the latter allows one to crack the RNG (calculate the current state from x,y,z). * *

Matrix compression

* * Although it might sound easy to check if the period of a given * RNG is maximal, in practise it still isn't when we want to use * a huge number of bits. * I wrote a program that does 4 Gigabit operations per second, * it was therefore able to do 3 times 127 matrix multiplications * of 127x127 matrices per second. * That means that it is reasonably fast to look for working * feedback points. Let's say, one minute was enough. * But if then we try to use this program to find feedback points * for a shift register of 1279 bits, it suddenly takes (1279/127)⁴ = 10286 minutes = 1 week! * And 1279 bits is still not really big... Moreover, storing large matrices * uses a lot of memory. A matrix of 19937 bits (2¹⁹⁹³⁷ - 1 is the period * of the Mersenne Twister) would * use 47.4 Mb per matrix already. * * Fortunately, the matrices that we need to prove the period of are not arbitrary. * As can be deduced from the paragraphs above, M0 can be written as * *

 *      0 1 0 0 0 0 0 ... 0 0
 *      0 0 1 0 0 0 0 ... 0 0
 *      0 0 0 1 0 0 0 ... 0 0
 *            . 
 * M0 =       . 
 *            . 
 *      0 0 0 0 0 0 0 ... 1 0
 *      0 0 0 0 0 0 0 ... 0 1
 *      1 0 0 ... 1 ... 1 ...
 *

* * Where the 1's on the bottom row are at the feedback point places. * * As a result, we can write M0 (for a pxp matrix) as * a vector of rows as follows * *

 *       (0 1 0 0 0 0 0 ... 0)
 *       (0 1 0 0 0 0 0 ... 0) M0¹
 *       (0 1 0 0 0 0 0 ... 0) M0²
 *       (0 1 0 0 0 0 0 ... 0) M0³
 * M0 =       . 
 *            . 
 *            . 
 *       (0 1 0 0 0 0 0 ... 0) M0^p-1
 *

* * If we multiply this an arbitrary number of times with M0 (say k times), * then we get: * *

 *         (0 1 0 0 0 0 0 ... 0) M0^k           (0 1 0 0 0 0 0 ... 0) M0^k
 *         (0 1 0 0 0 0 0 ... 0) M0¹ M0^k        (0 1 0 0 0 0 0 ... 0) M0^k M0¹
 *         (0 1 0 0 0 0 0 ... 0) M0² M0^k        (0 1 0 0 0 0 0 ... 0) M0^k M0²
 *         (0 1 0 0 0 0 0 ... 0) M0³ M0^k        (0 1 0 0 0 0 0 ... 0) M0^k M0³
 * M0 M0^k =       .                         =          . 
 *               .                                    . 
 *               .                                    . 
 *         (0 1 0 0 0 0 0 ... 0) M0^p-1 M0^k       (0 1 0 0 0 0 0 ... 0) M0^k M0^p-1
 *

* * which proves that every matrix that is a power of M0 can be expressed in terms of the top row * of the matrix and M0 as follows: * *

 * Row(N+1) = Row(N) M0
 *

* * Example * * Consider a RNG of 7 bits with a single feedback points at bit 7 - 3 = 4. * The first 7 powers of M0 for that RNG are * *

 *      1 0 0 0 0 0 0
 *      0 1 0 0 0 0 0
 *      0 0 1 0 0 0 0
 * M0⁰ = 0 0 0 1 0 0 0
 *      0 0 0 0 1 0 0
 *      0 0 0 0 0 1 0
 *      0 0 0 0 0 0 1
 *
 *      0 1 0 0 0 0 0          0 0 1 0 0 0 0          0 0 0 1 0 0 0
 *      0 0 1 0 0 0 0          0 0 0 1 0 0 0          0 0 0 0 1 0 0
 *      0 0 0 1 0 0 0          0 0 0 0 1 0 0          0 0 0 0 0 1 0
 * M0¹ = 0 0 0 0 1 0 0  ,  M0² = 0 0 0 0 0 1 0  ,  M0³ = 0 0 0 0 0 0 1
 *      0 0 0 0 0 1 0          0 0 0 0 0 0 1          1 0 0 0 1 0 0
 *      0 0 0 0 0 0 1          1 0 0 0 1 0 0          0 1 0 0 0 1 0
 *      1 0 0 0 1 0 0          0 1 0 0 0 1 0          0 0 1 0 0 0 1
 *
 *      0 0 0 0 1 0 0         0 0 0 0 0 1 0          0 0 0 0 0 0 1
 *      0 0 0 0 0 1 0         0 0 0 0 0 0 1          1 0 0 0 1 0 0
 *      0 0 0 0 0 0 1         1 0 0 0 1 0 0          0 1 0 0 0 1 0
 * M0⁴ = 1 0 0 0 1 0 0 ,  M0⁵ = 0 1 0 0 0 1 0  ,  M0⁶ = 0 0 1 0 0 0 1
 *      0 1 0 0 0 1 0         0 0 1 0 0 0 1          1 0 0 1 1 0 0
 *      0 0 1 0 0 0 1         1 0 0 1 1 0 0          0 1 0 0 1 1 0
 *      1 0 0 1 1 0 0         0 1 0 0 1 1 0          0 0 1 0 0 1 1
 *

* * And indeed, each second row of a sequential power of M0 is equivalent to a next row of M0 itself. * * Now let's consider an arbitrary power k of M0 (in the example below k = 48). * *

 *      0 0 1 1 0 1 0
 *      0 0 0 1 1 0 1
 *      1 0 0 0 0 1 0
 * M0^k = 0 1 0 0 0 0 1
 *      1 0 1 0 1 0 0
 *      0 1 0 1 0 1 0
 *      0 0 1 0 1 0 1
 *

* * and let's call the first row C_k^T * *

* * then * *

 * M0^k = C_k^T P
 *

* * where P is the vector of matrices * *

 *     M0⁰
 *     M0¹
 *     M0²
 * P = M0³
 *     M0⁴
 *     M0⁵
 *     M0⁶
 *

* *

Multiplying compressed matrices

* * Let I_n be the n-th column of I * and P_n = P I_n, a vector of the n-th column of each of the matrices of P. * Then the n-th column of M0^k is * *

 * M0^k I_n = C_k^T P I_n = C_k^T P_n
 *

* * or * *

 * I_n^T (M0^k)^T = P_n^T C_k
 *

* * Recall that C_k^T is the top row of M0^k. * That means we can write * *

 * C_k = (I₀^T M0^k)^T = (M0^k)^T I₀
 *

* * and can deduce that * *

 * C_m+k = (M0^m+k)^T I₀ = (M0^m M0^k)^T I₀ = (M0^k)^T (M0^m)^T I₀ = (M0^k)^T (I₀^T M0^m)^T = (M0^k)^T C_m =
 * [sum over n] (I_n^T (M0^k)^T C_m I_n) =
 * [sum over n] (P_n^T C_k C_m I_n) =
 * [sum over n] (I_n^T P^T C_k C_m I_n) =
 * P^T C_k C_m
 *

* * from which we conclude that bit n of C_k+m is * *

 * I_n^T C_k+m =
 * I_n^T P^T C_k C_m =
 * P_n^T C_k C_m
 *

* * Note that P_n^T is a row of rows, hence * P_n^T C_k is a row * which can also be written as C_k^T Q. * The n-th bit of C_k+m then becomes * *

 * I_n^T C_k+m = C_k^T Q_n C_m
 *

* * where Q_n is a matrix consisting of the n-th columns of the elements of P, * in other words * *

 * Q_n = Q I_n
 *

* * where * *

 * Q = (M0⁰ M0¹ M0² ... M0^p-1)
 *

* * Note that Q_n is symmetric so that Q_n^T = Q_n and therefore * C_k^T Q_n C_m = C_m^T Q_n C_k, * as it should be since M0^k M0^m = M0^m M0^k. * * Using this method it was possible to write a program that checks the period of the RNG * in a time of the order O(p² * f³), for pxp matrices with f feedback points, * instead of the order O(p⁴). Note that the following observations have been used * as well; the period of a RNG with feedback points f1, f2, f3 ... is equal to the period * of a RNG with feedback points p - f1, p - f2, p - f3, ... This allowed us to drastically reduce * the order of the number of feedback points. Finally note the observation that a RNG only has a * period of 2^p - 1 solution when using an odd number of feedback points. * * Considering that it took me something of the order of an hour to find working feedback points * for a RNG with 521 bits, it would now take about 9 weeks to find feedback points for a RNG * of 19937 bits instead of 262 year as would be the case when using ordinairy matrices. * * Implementation * * The implemented Random Number Generator of libecc uses 521 bits and 9 feedback points * at 2, 3, 7, 13, 31, 61, 131, 151 and 251. These feedback points are chosen to be primes * in order to garantee the least interference. The distance between the feedback points * is every time increased by a factor of two (except for the feedback point at 151 which was * added in order to make the RNG have its maximum period). The reason for this is again * to have the least interference between feedback frequencies, this way the different feedback * points nicely supplement each other in achieving bit mangling over the full range. */ static unsigned int const feed1 = rng::S_pool_size - 2; static unsigned int const feed2 = rng::S_pool_size - 3; static unsigned int const feed3 = rng::S_pool_size - 7; static unsigned int const feed4 = rng::S_pool_size - 13; static unsigned int const feed5 = rng::S_pool_size - 31; static unsigned int const feed6 = rng::S_pool_size - 61; static unsigned int const feed7 = rng::S_pool_size - 131; static unsigned int const feed8 = rng::S_pool_size - 151; static unsigned int const feed9 = rng::S_pool_size - 251; /** * \brief Constructor. * * \param seed A bitset of 521 bits, any value is allowed except all zeroes. */ rng::rng(pool_type const& seed) : M_out_cnt(0), M_entropy_ptr(M_pool), M_entropy_ptr_end(M_pool + sizeof(M_pool) / 4 - 1), M_head(M_pool, 0), M_fbp1(M_pool, feed1), M_fbp2(M_pool, feed2), M_fbp3(M_pool, feed3), M_fbp4(M_pool, feed4), M_fbp5(M_pool, feed5), M_fbp6(M_pool, feed6), M_fbp7(M_pool, feed7), M_fbp8(M_pool, feed8), M_fbp9(M_pool, feed9) { for (unsigned int d = 0; d < sizeof(M_pool) / 4; ++d) M_pool[d] = seed.digit32(d); } /** \brief Generate 512 bits. * * Fills the internal output buffer with 512 new random bits. * You can retrieve this output with the member function rng::get_512_bits(). */ void rng::generate_512_bits(void) { do { bitset_digit_t c = 0; for (bitset_digit_t out_mask = 1; out_mask != 0; out_mask <<= 1) { uint32_t a = M_head.increment_and_test(M_pool); a ^= M_fbp1.increment_and_test(M_pool); a ^= M_fbp2.increment_and_test(M_pool); a ^= M_fbp3.increment_and_test(M_pool); a ^= M_fbp4.increment_and_test(M_pool); a ^= M_fbp5.increment_and_test(M_pool); a ^= M_fbp6.increment_and_test(M_pool); a ^= M_fbp7.increment_and_test(M_pool); a ^= M_fbp8.increment_and_test(M_pool); a ^= M_fbp9.increment_and_test(M_pool); uint32_t b = a >> 16; a ^= b; b = a >> 8; a ^= b; if (libecc::oddnumberofbits[a & 0xff]) { M_head.set(); c |= out_mask; } else M_head.clear(); } M_out.rawdigit(M_out_cnt++) = c; M_out_cnt %= (512 / bitset_digit_bits); } while(M_out_cnt != 0); } /** \brief Add entropy to the random number generator pool. * * By adding entropy to the random number generator, the output * becomes more unpredictable. Its not really necessary to do * this however, its hard enough to 'guess' the 521 bits of * seed. It is advisable to use SHA-1 on the output though, * in order to make it harder to reverse engineer the internal * state of the rng from its output. */ void rng::add_entropy(uint32_t const* noise, unsigned int number_of_ints) { for (unsigned int cnt = 0; cnt < number_of_ints; ++cnt) { *M_entropy_ptr ^= noise[cnt]; if (++M_entropy_ptr == M_entropy_ptr_end) M_entropy_ptr = M_pool; } } unsigned int const rng::S_pool_size; } // namespace libecc