/*-------------------------------------------------------------------- This source distribution is placed in the public domain by its author, Jason Papadopoulos. You may use it for any purpose, free of charge, without having to notify anyone. I disclaim any responsibility for any errors. Optionally, please be nice and tell me if you find this source to be useful. Again optionally, if you add to the functionality present here please consider making those additions public too, so that others may benefit from your work. --jasonp@boo.net 6/3/07 --------------------------------------------------------------------*/ #include /* This SQUFOF implementation will race as many multipliers of the input as will fit into 62 bits */ static const uint32 multipliers[] = {1, 3, 5, 7, 11, 3*5, 3*7, 3*11, 5*7, 5*11, 7*11, 3*5*7, 3*5*11, 3*7*11, 5*7*11, 3*5*7*11 }; #define MAX_MULTIPLIERS sizeof(multipliers)/sizeof(uint32) #define QSIZE 50 /* the maximum number of inner loop iterations for all multipliers combined */ #define MAX_CYCLES 100000 /* the number of iterations to do before switching to the next multiplier */ #define ONE_CYCLE_ITER 300 typedef struct { uint64 n; uint32 sqrtn[MAX_MULTIPLIERS]; uint32 cutoff[MAX_MULTIPLIERS]; uint32 q0[MAX_MULTIPLIERS]; uint32 p1[MAX_MULTIPLIERS]; uint32 q1[MAX_MULTIPLIERS]; uint32 num_saved[MAX_MULTIPLIERS]; uint16 saved[MAX_MULTIPLIERS][QSIZE]; uint8 failed[MAX_MULTIPLIERS]; } squfof_data_t; /*-----------------------------------------------------------------------*/ uint32 squfof_one_cycle(squfof_data_t *data, uint32 mult_idx, uint32 num_iter, uint32 *factor) { /* Perform one unit of work for SQUFOF with one multiplier */ uint32 sqrtn = data->sqrtn[mult_idx]; uint32 cutoff = data->cutoff[mult_idx]; uint32 num_saved = data->num_saved[mult_idx]; uint32 multiplier = 2 * multipliers[mult_idx]; uint32 coarse_cutoff = cutoff * multiplier; uint16 *saved = data->saved[mult_idx]; uint32 q0 = data->q0[mult_idx]; uint32 p1 = data->p1[mult_idx]; uint32 q1 = data->q1[mult_idx]; uint32 i, j; uint32 p0 = 0; uint32 sqrtq = 0; uint64 scaledn; for (i = 0; i < num_iter; i++) { uint32 q, bits, tmp; /* compute (sqrtn+p1)/q1; since this is unity more than half the time, special case the division to save some effort */ tmp = sqrtn + p1 - q1; q = 1; if (tmp >= q1) q += tmp / q1; /* compute the next numerator and denominator */ p0 = q * q1 - p1; q0 = q0 + (p1 - p0) * q; /* In order to avoid trivial factorizations, save q1 values that are small in a list. Only values that do not contain factors of the multiplier must be saved; however, the GCD is 5x more expensive than all the rest of the work performed in the loop. To avoid most GCDs, only attempt one if q1 is less than the cutoff times the full multiplier If the queue overflows (very rare), signal that this multiplier failed and move on to another one */ if (q1 < coarse_cutoff) { tmp = q1 / mp_gcd_1(q1, multiplier); if (tmp < cutoff) { if (num_saved >= QSIZE) { data->failed[mult_idx] = 1; return i; } saved[num_saved++] = (uint16)tmp; } } /* if q0 is a perfect square, then the factorization has probably succeeded. Most of the squareness tests out there require multiple divisions and complicated loops. We can approximate these tests by doing two things: testing that the number of trailing zeros in q0 is even, and then testing if q0 shifted right this many places is 1 mod 8. */ bits = 0; tmp = q0; while(!(tmp & 1)) { bits++; tmp >>= 1; } if (!(bits & 1) && ((tmp & 7) == 1)) { /* q0 is probably a perfect square. Take the square root by cheating */ sqrtq = (uint32)(sqrt((double)q0)); if (sqrtq * sqrtq == q0) { /* it *is* a perfect square. If it has not appeared previously in the list for this multiplier, then we're almost finished */ for (j = 0; j < num_saved; j++) { if (saved[j] == sqrtq) break; } if (j == num_saved) break; } } /* perform the odd half of the SQUFOF cycle */ tmp = sqrtn + p0 - q0; q = 1; if (tmp >= q0) q += tmp / q0; p1 = q * q0 - p0; q1 = q1 + (p0 - p1) * q; if (q0 < coarse_cutoff) { tmp = q0 / mp_gcd_1(q0, multiplier); if (tmp < cutoff) { if (num_saved >= QSIZE) { data->failed[mult_idx] = 1; return i; } saved[num_saved++] = (uint16)tmp; } } } if (sqrtq == 1) { /* the above found a trivial factor, so this multiplier has failed */ data->failed[mult_idx] = 1; return i; } else if (i == num_iter) { /* no square root found; save the parameters and go on to the next multiplier */ data->q0[mult_idx] = q0; data->p1[mult_idx] = p1; data->q1[mult_idx] = q1; data->num_saved[mult_idx] = num_saved; return i; } /* square root found; the algorithm cannot fail now. Compute the inverse quadratic form and iterate */ q0 = sqrtq; p1 = p0 + sqrtq * ((sqrtn - p0) / sqrtq); scaledn = data->n * (uint64)multipliers[mult_idx]; q1 = (scaledn - (uint64)p1 * (uint64)p1) / (uint64)q0; while (1) { uint32 q, tmp; tmp = sqrtn + p1 - q1; q = 1; if (tmp >= q1) q += tmp / q1; p0 = q * q1 - p1; q0 = q0 + (p1 - p0) * q; if (p0 == p1) { q0 = q1; break; } tmp = sqrtn + p0 - q0; q = 1; if (tmp >= q0) q += tmp / q0; p1 = q * q0 - p0; q1 = q1 + (p0 - p1) * q; if (p0 == p1) break; } /* q0 is the factor of n. Remove factors that exist in the multiplier and save whatever's left */ q0 = q0 / mp_gcd_1(q0, multiplier); *factor = q0; return i; } /*-----------------------------------------------------------------------*/ uint32 squfof(mp_t *n) { /* Factor a number up to 62 bits in size using SQUFOF. For n the product of two primes, this routine will succeed with very high probability, although the likelihood of failure goes up as n increases in size. Empirically, 62-bit factorizations fail about 5% of the time; for smaller n the failure rate is nearly zero. If a factor is found, it is returned. If it is a trivial factor, the return value is one. If SQUFOF failed, the return value is zero */ uint32 num_mult; uint32 factor_found = 0; mp_t scaledn, sqrt_scaledn; uint32 i, num_iter, num_failed; squfof_data_t data; /* turn n into a uint64 */ data.n = (uint64)(n->val[1]) << 32 | (uint64)(n->val[0]); /* for each multiplier */ for (i = 0; i < MAX_MULTIPLIERS; i++) { /* use the multiplier if the multiplier times n will fit in 62 bits. Because multipliers are initialized in order of increasing size, when one is too big then all the rest are also too big */ mp_mul_1(n, multipliers[i], &scaledn); if (mp_bits(&scaledn) > 62) break; mp_isqrt(&scaledn, &sqrt_scaledn); /* initialize the rest of the fields for this multiplier */ data.sqrtn[i] = sqrt_scaledn.val[0]; data.cutoff[i] = (uint32)(sqrt(2.0 * (double)data.sqrtn[i])); data.num_saved[i] = 0; data.failed[i] = 0; data.q0[i] = 1; data.p1[i] = data.sqrtn[i]; data.q1[i] = scaledn.val[0] - data.p1[i] * data.p1[i]; /* if n is a perfect square, don't run the algorithm; the factorization has already taken place */ if (data.q1[i] == 0) return data.p1[i]; } if (i == 0) return 0; /* perform a block of work using each multiplier in turn, until our budget of work for factoring n is exhausted */ num_mult = i; num_iter = 0; num_failed = 0; while (num_iter < MAX_CYCLES) { /* for each cycle of multipliers, begin with the multiplier that is largest. These have a higher probability of factoring n quickly */ for (i = num_mult - 1; (int)i >= 0 ; i--) { if (data.failed[i]) continue; /* work on this multiplier for a little while */ num_iter += squfof_one_cycle(&data, i, ONE_CYCLE_ITER, &factor_found); /* if all multipliers have failed, then SQUFOF has failed */ if (data.failed[i]) { if (++num_failed == num_mult) return 0; } if (factor_found) return factor_found; } } return 0; }