/*--------------------------------------------------------------------
This source distribution is placed in the public domain by its author,
Jason Papadopoulos. You may use it for any purpose, free of charge,
without having to notify anyone. I disclaim any responsibility for any
errors.
Optionally, please be nice and tell me if you find this source to be
useful. Again optionally, if you add to the functionality present here
please consider making those additions public too, so that others may
benefit from your work.
--jasonp@boo.net 6/3/07
--------------------------------------------------------------------*/
#include <common.h>
#include "mpqs.h"
int sort_ascending(const void *x, const void *y);
/*--------------------------------------------------------------------*/
void poly_init(sieve_conf_t *conf, uint32 sieve_size) {
uint32 i, j;
uint32 start_bits;
uint32 num_factors, rem;
uint32 num_derived_poly;
mp_t t0, t1;
msieve_obj *obj = conf->obj;
/* compute the optimal A value for MPQS polynomials.
This is sqrt(2*n)/sieve_size, and the closer a generated
A value gets to this number the smaller the average sieve
value will be */
mp_add(conf->n, conf->n, &t0);
mp_isqrt(&t0, &t1);
mp_add_1(&t1, 1, &t1);
mp_divrem_1(&t1, sieve_size, &t1);
mp_copy(&t1, &conf->target_a);
conf->a_bits = mp_bits(&t1);
/* Determine the allowable range for factors
of the 'a' coefficient in MPQS polynomials. We
simplify the factor selection code later by
marking the offsets where factor base primes
increase in size by one bit.
factor_bounds[i] gives the index in the factor
base of the first i-bit prime. If i-bit primes
really are present in the factor base, then
factor_bounds[i] and factor_bounds[i+1] must both
be nonzero. Otherwise the factor base *ends* in
an i-bit prime.
Note that any factor selected for use in 'a'
values is not sieved. However, for primes above
conf->sieve_large_fb_start the sieve code uses a hashtable
that doesn't know this. Hence the pool of primes
that SIQS can use is limited to factor base offsets
less than sieve_large_fb_start. This is desirable anyway,
since these primes are small and we get to choose
more of them, reusing 'a' values for longer */
i = 10;
memset(conf->factor_bounds, 0, sizeof(conf->factor_bounds));
memset(conf->factor_bits, 0, sizeof(conf->factor_bits));
memset(conf->poly_factors, 0, sizeof(conf->poly_factors));
memset(conf->poly_tmp_b, 0, sizeof(conf->poly_tmp_b));
start_bits = conf->factor_base[i].logprime;
conf->factor_bounds[start_bits] = i;
while (i < conf->fb_size) {
if (i >= conf->sieve_large_fb_start ||
conf->factor_base[i].logprime > 15)
break;
if (conf->factor_base[i].logprime > start_bits) {
start_bits = conf->factor_base[i].logprime;
conf->factor_bounds[start_bits] = i;
}
i++;
}
conf->factor_bounds[start_bits+1] = i;
/* Determine the number of factors that each 'a' value
will have, and also the number of bits in each factor.
Factors can have 7-14 bits, favoring a bit size that
produces many polynomials but not *too* many */
if (conf->a_bits > 210)
start_bits = 15;
else if (conf->a_bits > 210)
start_bits = 14;
else if (conf->a_bits > 190)
start_bits = 13;
else if (conf->a_bits > 180)
start_bits = 12;
else
start_bits = 11;
for (i = start_bits, num_factors = rem = 0; i >= 7; i--) {
num_factors = conf->a_bits / i;
rem = conf->a_bits % i;
/* stop searching for factor sizes if the
factor base can handle the current set */
if (conf->factor_bounds[i] == 0 || num_factors == 1)
continue;
if (rem == 0) {
/* the number of bits in the combined
list is exactly the right size */
if (num_factors > 2 && conf->factor_bounds[i+1] > 0)
break;
}
else if (rem <= num_factors) {
/* the number of bits in the combined
list is not large enough, but is
close enough so that some primes can
be one bit larger to compensate. This
will work if the pool of available primes
has candidates in the i and i+1 bin */
if (num_factors > 2 &&
conf->factor_bounds[i+1] > 0 &&
conf->factor_bounds[i+2] > 0)
break;
}
else if ((i - rem) <= num_factors) {
/* the number of bits in the combined
list is too small by more than the number
of primes. Add an extra prime to the
list and make several primes one bit smaller.
This will work if the pool of available primes
has candidates in the i and i-1 bin */
if (conf->factor_bounds[i+1] > 0 &&
conf->factor_bounds[i-1] > 0)
break;
}
}
if (i < 7 || num_factors < 2 || num_factors > MAX_POLY_FACTORS) {
logprintf(obj, "fatal error: poly selection failed\n");
exit(-1);
}
/* explicitly list out the number of bits each prime
in the list will have. Some primes may be one bit
too large or too small */
for (j = 0; j < num_factors; j++)
conf->factor_bits[j] = i;
if (rem <= num_factors) {
for (j = 0; j < rem; j++)
conf->factor_bits[j]++;
}
else {
conf->factor_bits[j] = i;
num_factors++;
for (j = 0; j < (i - rem); j++)
conf->factor_bits[j]--;
}
conf->num_poly_factors = num_factors;
/* verify the collection adds up to what is expected */
for (i = j = 0; j < num_factors; j++) {
i += conf->factor_bits[j];
}
if (i != conf->a_bits) {
logprintf(obj, "failure assigning bits of poly factors\n");
exit(-1);
}
/* for big factorizations, where there are a lot of primes,
it becomes really important to generate A values that are
as close as possible to the 'optimal' A value. In this
case, we make a few of the primes selected deliberately
too small, so that the poly generation code has room to
pick just the right factors */
if (num_factors >= 8 && num_factors < 15) {
if (conf->factor_bits[0] >
conf->factor_bits[num_factors - 1]) {
switch (num_factors) {
default:
conf->factor_bits[3]--;
conf->factor_bits[2]--;
case 9:
conf->factor_bits[1]--;
}
conf->factor_bits[0]--;
}
else {
switch (num_factors) {
default:
conf->factor_bits[num_factors-4]--;
conf->factor_bits[num_factors-3]--;
case 9:
conf->factor_bits[num_factors-2]--;
}
conf->factor_bits[num_factors-1]--;
}
}
/* allocate scratch structures */
num_derived_poly = 1 << (num_factors - 1);
conf->root_correction = (uint32 *)malloc(num_derived_poly *
sizeof(uint32));
conf->next_poly_action = (uint8 *)malloc(num_derived_poly *
sizeof(uint8));
conf->curr_b = (mp_t *)malloc(num_derived_poly * sizeof(mp_t));
conf->poly_b_small[0] = (uint32 *)malloc(conf->sieve_large_fb_start *
num_factors * sizeof(uint32));
for (i = 1; i < num_factors; i++) {
conf->poly_b_small[i] = conf->poly_b_small[i-1] +
conf->sieve_large_fb_start;
}
conf->poly_b_array = NULL;
if (conf->fb_size > conf->sieve_large_fb_start) {
conf->poly_b_array = (uint32 *)malloc(
num_factors * sizeof(uint32) *
(conf->fb_size - conf->sieve_large_fb_start));
}
logprintf(obj, "polynomial 'A' values have %u factors\n", num_factors);
}
/*--------------------------------------------------------------------*/
void poly_free(sieve_conf_t *conf) {
free(conf->root_correction);
free(conf->next_poly_action);
free(conf->poly_b_array);
free(conf->poly_b_small[0]);
free(conf->curr_b);
}
/*--------------------------------------------------------------------*/
void build_derived_poly(sieve_conf_t *conf) {
/* Given a config struct with conf->curr_a and
conf->poly_factors already filled in, calculate
all of the 'b' values corresponding to this 'a'
and also the two peices of scratch information
associated with each 'b'. */
uint32 i;
mp_t tmp;
mp_t *a = &conf->curr_a;
mp_t *b = conf->curr_b;
mp_t *poly_tmp_b = conf->poly_tmp_b;
uint32 num_factors = conf->num_poly_factors;
uint32 *poly_factors = conf->poly_factors;
fb_t *factor_base = conf->factor_base;
uint32 correction;
uint32 num_derived_poly;
uint32 *modsqrt_array = conf->modsqrt_array;
/* compute the intermediate values that will be
used to form the 'b' coefficient of this and
the next 1<<(num_factors-1) polynomials */
for (i = 0; i < num_factors; i++) {
mp_t adivp;
uint32 rem;
fb_t *fbptr = factor_base + poly_factors[i];
uint32 prime = fbptr->prime;
uint32 modsqrt = modsqrt_array[poly_factors[i]];
mp_divrem_1(a, prime, &adivp);
rem = mp_mod_1(&adivp, prime);
rem = mp_modinv_1(rem, prime);
rem = mp_modmul_1(rem, modsqrt, prime);
if (rem > prime / 2)
rem = prime - rem;
mp_mul_1(&adivp, rem, poly_tmp_b + i);
}
/* The first 'b' coefficient is the sum of all the
temporary b values. Also double each temporary value. */
mp_clear(b);
for (i = 0; i < num_factors; i++) {
mp_add(b, poly_tmp_b + i, b);
mp_add(poly_tmp_b + i, poly_tmp_b + i, poly_tmp_b + i);
}
/* There's a lot about self-initializing MPQS that is not
mentioned in Contini's paper. First, ordinary MPQS reduces
every 'b' value mod a, but we can't do that here. That's
because the precomputations below need to work with un-
reduced numbers, since (b mod a) mod p != b mod p in
general. But this also means that b can exceed a, and can
be negative. My homebrew multiple precision library
cannot deal with negative numbers, so the rest of the code
wants b to always be positive.
We work around the problem by cheating. The whole idea here
is to find a and b such that we can compute the values x for
which (a*x+b)^2-n is divisible by small primes. If b is between
a and 2*a, and we know the corresponding set of x values
(which the precomputing below will find), then we can implicitly
work with (a*(x+1) + (b-a))^2 - n. This forms a new b value
that is smaller than a, and we can find the set of x values
by just adding 1 to each x value that we had for the original b.
Negative values of b are treated similarly.
So we need to store a correction, equal to the integer quotient
(current b)/a, that is added to all the x values computed by
later polynomials. Every time a new 'b' value is computed, we
update the correction and then reduce b mod a. In this way,
the rest of the code gets the 'b' values it wants, and we still
initialize polynomials fast.
The first b value is always slightly larger than a, so actual
division is not needed to find the first correction */
correction = 0;
while (mp_cmp(b, a) >= 0) {
mp_sub(b, a, b);
correction++;
}
conf->root_correction[0] = correction;
/* Now derive all of the 'b' values that will use
the same 'a' value */
num_derived_poly = 1 << (num_factors - 1);
for (i = 1; i < num_derived_poly; i++) {
/* Determine which temporary value will be used to
compute the next b, and whether it will be added or
subtracted. See Contini's SIQS paper for details
on why this works. Also modify the correction to
account for the new b value.
When generating the roots for later polynomials,
we have to remember which temporary value was used;
its identity, along with a bit to choose addition
or subtraction, goes into conf->next_poly_action[] */
uint32 bitpos = 0;
b = conf->curr_b + i;
while ((i & (1 << bitpos)) == 0)
bitpos++;
conf->next_poly_action[i] = bitpos;
if (i & (1 << (bitpos+1)) ) {
mp_add(b-1, poly_tmp_b + bitpos, b);
if (mp_cmp(b, a) >= 0) {
mp_sub(b, a, b);
correction++;
}
}
else {
mp_copy(b-1, &tmp);
if (mp_cmp(poly_tmp_b + bitpos, &tmp) >= 0) {
mp_add(&tmp, a, &tmp);
correction--;
}
mp_sub(&tmp, poly_tmp_b + bitpos, b);
conf->next_poly_action[i] |= 0x80;
}
conf->root_correction[i] = correction;
}
}
/*--------------------------------------------------------------------*/
void build_base_poly(sieve_conf_t *conf) {
/* Build the next MPQS polynomial and prepare the
factor base for using it */
msieve_obj *obj = conf->obj;
char buf[256];
uint32 i, j, k;
mp_t *a = &conf->curr_a;
mp_t *b = conf->curr_b;
mp_t tmp;
fb_t *factor_base = conf->factor_base;
uint32 *poly_factors = conf->poly_factors;
uint32 num_factors = conf->num_poly_factors;
uint8 *factor_bits = conf->factor_bits;
uint32 *factor_bounds = conf->factor_bounds;
mp_t *poly_tmp_b;
uint32 curr_poly_factor;
uint32 *poly_b_array;
uint32 **poly_b_small;
uint32 sieve_size = conf->num_sieve_blocks *
conf->sieve_block_size / 2;
uint32 *modsqrt_array = conf->modsqrt_array;
/* compute the factors for this polynomial;
filter out duplicate factors. Note that we
store factor base offsets and not the primes
themselves
Also compute 'a', the product of these factors */
mp_clear(a);
a->nwords = a->val[0] = 1;
i = 0;
while (i < num_factors - 1) {
uint8 bits = factor_bits[i];
uint32 range = factor_bounds[bits+1] - factor_bounds[bits];
poly_factors[i] = factor_bounds[bits] +
get_rand(&obj->seed1, &obj->seed2) % range;
for (j = 0; j < i; j++) {
if (poly_factors[j] == poly_factors[i])
break;
}
if (j == i) { /* factor is accepted */
uint32 prime = factor_base[poly_factors[i]].prime;
mp_mul_1(a, prime, a);
i++;
}
}
/* the last factor is chosen to bring 'a' as close as
possible to the value of conf->target_a. The 'yield'
of the polynomial (in terms of number of relations to
expect) depends critically on 'a' being neither too
large nor too small. The sieve code will only work
if this factor is one of the first sieve_large_fb_start
factor base primes */
mp_div(&conf->target_a, a, &tmp);
i = tmp.val[0];
/* first bracket the desired factor */
for (j = MIN_FB_OFFSET+1; j < conf->sieve_large_fb_start - 3; j++) {
uint32 left = conf->factor_base[j].prime;
uint32 right = conf->factor_base[j+1].prime;
if (i >= left && i <= right)
break;
}
for (; j < conf->sieve_large_fb_start - 2; j++) {
uint32 left = conf->factor_base[j].prime;
uint32 right = conf->factor_base[j+1].prime;
uint32 prime, fb_offset;
/* pick the factor base prime closest to the
ideal value (stored in 'i') */
if (abs((int32)(i - right)) < abs((int32)(i - left))) {
fb_offset = j + 1;
prime = right;
}
else {
fb_offset = j;
prime = left;
}
/* keep it if it's not a duplicate */
for (k = 0; k < num_factors - 1; k++) {
if (poly_factors[k] == fb_offset)
break;
}
if (k == num_factors - 1) {
poly_factors[k] = fb_offset;
mp_mul_1(a, prime, a);
break;
}
}
qsort(poly_factors, (size_t)num_factors,
sizeof(uint32), sort_ascending);
/* Build all of the 'b' values corresponding to this 'a' */
build_derived_poly(conf);
/* Initialize the factor base */
curr_poly_factor = 0;
poly_b_array = conf->poly_b_array;
poly_b_small = conf->poly_b_small;
poly_tmp_b = conf->poly_tmp_b;
for (i = MIN_FB_OFFSET + 1; i < conf->fb_size; i++) {
fb_t *fbptr = factor_base + i;
uint32 root1, root2;
uint32 prime = fbptr->prime;
if (prime < 100 && conf->multiplier % prime == 0) {
/* prime is part of the multiplier. Don't
sieve with it */
root1 = root2 = INVALID_ROOT;
}
else if (curr_poly_factor < num_factors &&
i == poly_factors[curr_poly_factor]) {
/* the factor base prime divides 'a'. Skip it
(we can compute the necessary roots here
but it's harder for later polynomials) */
root1 = root2 = INVALID_ROOT;
curr_poly_factor++;
}
else {
/* compute the two polynomial-specific square
roots for each factor base prime. Note that
the roots represent an offset from -sieve_size,
*not* from zero.
Also, if B is the value of b that ordinary
SIQS must compute, we are actually finding
the roots of (a*(x+k) + (B-k*a))^2, for whatever
value of k makes the second term less than a.
Doing this right would require subtracting
k (actually conf->root_correction[0]) from the
roots generated below, but we can cheat and
do that during the sieving when it's cheaper */
uint32 a_modp = mp_mod_1(a, prime);
uint32 b_modp = prime - mp_mod_1(b, prime);
uint32 modsqrt = modsqrt_array[i];
root1 = b_modp + modsqrt;
root2 = b_modp + prime - modsqrt;
j = sieve_size % prime;
a_modp = mp_modinv_1(a_modp, prime);
root1 = mp_modmul_1(root1, a_modp, prime);
root2 = mp_modmul_1(root2, a_modp, prime);
root1 = mp_modadd_1(root1, j, prime);
root2 = mp_modadd_1(root2, j, prime);
/* also perform the precomputations for the
set of future polynomials that will reuse this
'a' value. */
if (i >= conf->sieve_large_fb_start) {
for (j = 0; j < num_factors; j++) {
b_modp = mp_mod_1(poly_tmp_b + j,
prime);
poly_b_array[j] = mp_modmul_1(a_modp,
b_modp, prime);
}
poly_b_array += num_factors;
}
else {
for (j = 0; j < num_factors; j++) {
b_modp = mp_mod_1(poly_tmp_b + j,
prime);
poly_b_small[j][i] = mp_modmul_1(
a_modp, b_modp, prime);
}
}
}
/* The sieving code uses shortcuts that
depend on root2 >= root1 at all times */
if (root1 < root2) {
fbptr->root1 = root1;
fbptr->root2 = root2;
}
else {
fbptr->root1 = root2;
fbptr->root2 = root1;
}
}
/* dump the 'a' value to disk */
i = sprintf(buf, "A");
for (j = 0; j < conf->num_poly_factors; j++)
i += sprintf(buf + i, " %x", conf->poly_factors[j]);
i += sprintf(buf + i, "\n");
qs_print_to_savefile(obj, buf);
}
/*--------------------------------------------------------------------*/
int sort_ascending(const void *x, const void *y) {
uint32 *xx = (uint32 *)x;
uint32 *yy = (uint32 *)y;
return xx[0] - yy[0];
}
syntax highlighted by Code2HTML, v. 0.9.1