/*--------------------------------------------------------------------
This source distribution is placed in the public domain by its author,
Jason Papadopoulos. You may use it for any purpose, free of charge,
without having to notify anyone. I disclaim any responsibility for any
errors.
Optionally, please be nice and tell me if you find this source to be
useful. Again optionally, if you add to the functionality present here
please consider making those additions public too, so that others may
benefit from your work.
--jasonp@boo.net 6/3/07
--------------------------------------------------------------------*/
#include <common.h>
/* This SQUFOF implementation will race as many multipliers
of the input as will fit into 62 bits */
static const uint32 multipliers[] = {1, 3, 5, 7,
11, 3*5, 3*7, 3*11,
5*7, 5*11, 7*11,
3*5*7, 3*5*11, 3*7*11,
5*7*11, 3*5*7*11 };
#define MAX_MULTIPLIERS sizeof(multipliers)/sizeof(uint32)
#define QSIZE 50
/* the maximum number of inner loop iterations for all
multipliers combined */
#define MAX_CYCLES 100000
/* the number of iterations to do before switching to the
next multiplier */
#define ONE_CYCLE_ITER 300
typedef struct {
uint64 n;
uint32 sqrtn[MAX_MULTIPLIERS];
uint32 cutoff[MAX_MULTIPLIERS];
uint32 q0[MAX_MULTIPLIERS];
uint32 p1[MAX_MULTIPLIERS];
uint32 q1[MAX_MULTIPLIERS];
uint32 num_saved[MAX_MULTIPLIERS];
uint16 saved[MAX_MULTIPLIERS][QSIZE];
uint8 failed[MAX_MULTIPLIERS];
} squfof_data_t;
/*-----------------------------------------------------------------------*/
uint32 squfof_one_cycle(squfof_data_t *data, uint32 mult_idx,
uint32 num_iter, uint32 *factor) {
/* Perform one unit of work for SQUFOF with one multiplier */
uint32 sqrtn = data->sqrtn[mult_idx];
uint32 cutoff = data->cutoff[mult_idx];
uint32 num_saved = data->num_saved[mult_idx];
uint32 multiplier = 2 * multipliers[mult_idx];
uint32 coarse_cutoff = cutoff * multiplier;
uint16 *saved = data->saved[mult_idx];
uint32 q0 = data->q0[mult_idx];
uint32 p1 = data->p1[mult_idx];
uint32 q1 = data->q1[mult_idx];
uint32 i, j;
uint32 p0 = 0;
uint32 sqrtq = 0;
uint64 scaledn;
for (i = 0; i < num_iter; i++) {
uint32 q, bits, tmp;
/* compute (sqrtn+p1)/q1; since this is unity
more than half the time, special case the
division to save some effort */
tmp = sqrtn + p1 - q1;
q = 1;
if (tmp >= q1)
q += tmp / q1;
/* compute the next numerator and denominator */
p0 = q * q1 - p1;
q0 = q0 + (p1 - p0) * q;
/* In order to avoid trivial factorizations,
save q1 values that are small in a list. Only
values that do not contain factors of the
multiplier must be saved; however, the GCD
is 5x more expensive than all the rest of the
work performed in the loop. To avoid most GCDs,
only attempt one if q1 is less than the cutoff
times the full multiplier
If the queue overflows (very rare), signal that
this multiplier failed and move on to another one */
if (q1 < coarse_cutoff) {
tmp = q1 / mp_gcd_1(q1, multiplier);
if (tmp < cutoff) {
if (num_saved >= QSIZE) {
data->failed[mult_idx] = 1;
return i;
}
saved[num_saved++] = (uint16)tmp;
}
}
/* if q0 is a perfect square, then the factorization
has probably succeeded. Most of the squareness
tests out there require multiple divisions and
complicated loops. We can approximate these tests
by doing two things: testing that the number of
trailing zeros in q0 is even, and then testing
if q0 shifted right this many places is 1 mod 8. */
bits = 0;
tmp = q0;
while(!(tmp & 1)) {
bits++;
tmp >>= 1;
}
if (!(bits & 1) && ((tmp & 7) == 1)) {
/* q0 is probably a perfect square. Take the
square root by cheating */
sqrtq = (uint32)(sqrt((double)q0));
if (sqrtq * sqrtq == q0) {
/* it *is* a perfect square. If it has
not appeared previously in the list
for this multiplier, then we're almost
finished */
for (j = 0; j < num_saved; j++) {
if (saved[j] == sqrtq)
break;
}
if (j == num_saved)
break;
}
}
/* perform the odd half of the SQUFOF cycle */
tmp = sqrtn + p0 - q0;
q = 1;
if (tmp >= q0)
q += tmp / q0;
p1 = q * q0 - p0;
q1 = q1 + (p0 - p1) * q;
if (q0 < coarse_cutoff) {
tmp = q0 / mp_gcd_1(q0, multiplier);
if (tmp < cutoff) {
if (num_saved >= QSIZE) {
data->failed[mult_idx] = 1;
return i;
}
saved[num_saved++] = (uint16)tmp;
}
}
}
if (sqrtq == 1) {
/* the above found a trivial factor, so this
multiplier has failed */
data->failed[mult_idx] = 1;
return i;
}
else if (i == num_iter) {
/* no square root found; save the parameters
and go on to the next multiplier */
data->q0[mult_idx] = q0;
data->p1[mult_idx] = p1;
data->q1[mult_idx] = q1;
data->num_saved[mult_idx] = num_saved;
return i;
}
/* square root found; the algorithm cannot fail now.
Compute the inverse quadratic form and iterate */
q0 = sqrtq;
p1 = p0 + sqrtq * ((sqrtn - p0) / sqrtq);
scaledn = data->n * (uint64)multipliers[mult_idx];
q1 = (scaledn - (uint64)p1 * (uint64)p1) / (uint64)q0;
while (1) {
uint32 q, tmp;
tmp = sqrtn + p1 - q1;
q = 1;
if (tmp >= q1)
q += tmp / q1;
p0 = q * q1 - p1;
q0 = q0 + (p1 - p0) * q;
if (p0 == p1) {
q0 = q1;
break;
}
tmp = sqrtn + p0 - q0;
q = 1;
if (tmp >= q0)
q += tmp / q0;
p1 = q * q0 - p0;
q1 = q1 + (p0 - p1) * q;
if (p0 == p1)
break;
}
/* q0 is the factor of n. Remove factors that exist
in the multiplier and save whatever's left */
q0 = q0 / mp_gcd_1(q0, multiplier);
*factor = q0;
return i;
}
/*-----------------------------------------------------------------------*/
uint32 squfof(mp_t *n) {
/* Factor a number up to 62 bits in size using SQUFOF.
For n the product of two primes, this routine will
succeed with very high probability, although the
likelihood of failure goes up as n increases in size.
Empirically, 62-bit factorizations fail about 5% of the
time; for smaller n the failure rate is nearly zero.
If a factor is found, it is returned. If it is a trivial
factor, the return value is one. If SQUFOF failed, the
return value is zero */
uint32 num_mult;
uint32 factor_found = 0;
mp_t scaledn, sqrt_scaledn;
uint32 i, num_iter, num_failed;
squfof_data_t data;
/* turn n into a uint64 */
data.n = (uint64)(n->val[1]) << 32 | (uint64)(n->val[0]);
/* for each multiplier */
for (i = 0; i < MAX_MULTIPLIERS; i++) {
/* use the multiplier if the multiplier times n
will fit in 62 bits. Because multipliers are
initialized in order of increasing size, when
one is too big then all the rest are also too big */
mp_mul_1(n, multipliers[i], &scaledn);
if (mp_bits(&scaledn) > 62)
break;
mp_isqrt(&scaledn, &sqrt_scaledn);
/* initialize the rest of the fields for this
multiplier */
data.sqrtn[i] = sqrt_scaledn.val[0];
data.cutoff[i] = (uint32)(sqrt(2.0 * (double)data.sqrtn[i]));
data.num_saved[i] = 0;
data.failed[i] = 0;
data.q0[i] = 1;
data.p1[i] = data.sqrtn[i];
data.q1[i] = scaledn.val[0] - data.p1[i] * data.p1[i];
/* if n is a perfect square, don't run the algorithm;
the factorization has already taken place */
if (data.q1[i] == 0)
return data.p1[i];
}
if (i == 0)
return 0;
/* perform a block of work using each multiplier in
turn, until our budget of work for factoring n is
exhausted */
num_mult = i;
num_iter = 0;
num_failed = 0;
while (num_iter < MAX_CYCLES) {
/* for each cycle of multipliers, begin with the
multiplier that is largest. These have a higher
probability of factoring n quickly */
for (i = num_mult - 1; (int)i >= 0 ; i--) {
if (data.failed[i])
continue;
/* work on this multiplier for a little while */
num_iter += squfof_one_cycle(&data, i, ONE_CYCLE_ITER,
&factor_found);
/* if all multipliers have failed, then SQUFOF
has failed */
if (data.failed[i]) {
if (++num_failed == num_mult)
return 0;
}
if (factor_found)
return factor_found;
}
}
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1