Spaces:
Sleeping
Sleeping
| from sympy.core.random import _randint | |
| from sympy.external.gmpy import gcd, invert, sqrt as isqrt | |
| from sympy.ntheory.residue_ntheory import _sqrt_mod_prime_power | |
| from sympy.ntheory import isprime | |
| from math import log, sqrt | |
| class SievePolynomial: | |
| def __init__(self, modified_coeff=(), a=None, b=None): | |
| """This class denotes the seive polynomial. | |
| If ``g(x) = (a*x + b)**2 - N``. `g(x)` can be expanded | |
| to ``a*x**2 + 2*a*b*x + b**2 - N``, so the coefficient | |
| is stored in the form `[a**2, 2*a*b, b**2 - N]`. This | |
| ensures faster `eval` method because we dont have to | |
| perform `a**2, 2*a*b, b**2` every time we call the | |
| `eval` method. As multiplication is more expensive | |
| than addition, by using modified_coefficient we get | |
| a faster seiving process. | |
| Parameters | |
| ========== | |
| modified_coeff : modified_coefficient of sieve polynomial | |
| a : parameter of the sieve polynomial | |
| b : parameter of the sieve polynomial | |
| """ | |
| self.modified_coeff = modified_coeff | |
| self.a = a | |
| self.b = b | |
| def eval(self, x): | |
| """ | |
| Compute the value of the sieve polynomial at point x. | |
| Parameters | |
| ========== | |
| x : Integer parameter for sieve polynomial | |
| """ | |
| ans = 0 | |
| for coeff in self.modified_coeff: | |
| ans *= x | |
| ans += coeff | |
| return ans | |
| class FactorBaseElem: | |
| """This class stores an element of the `factor_base`. | |
| """ | |
| def __init__(self, prime, tmem_p, log_p): | |
| """ | |
| Initialization of factor_base_elem. | |
| Parameters | |
| ========== | |
| prime : prime number of the factor_base | |
| tmem_p : Integer square root of x**2 = n mod prime | |
| log_p : Compute Natural Logarithm of the prime | |
| """ | |
| self.prime = prime | |
| self.tmem_p = tmem_p | |
| self.log_p = log_p | |
| self.soln1 = None | |
| self.soln2 = None | |
| self.a_inv = None | |
| self.b_ainv = None | |
| def _generate_factor_base(prime_bound, n): | |
| """Generate `factor_base` for Quadratic Sieve. The `factor_base` | |
| consists of all the points whose ``legendre_symbol(n, p) == 1`` | |
| and ``p < num_primes``. Along with the prime `factor_base` also stores | |
| natural logarithm of prime and the residue n modulo p. | |
| It also returns the of primes numbers in the `factor_base` which are | |
| close to 1000 and 5000. | |
| Parameters | |
| ========== | |
| prime_bound : upper prime bound of the factor_base | |
| n : integer to be factored | |
| """ | |
| from sympy.ntheory.generate import sieve | |
| factor_base = [] | |
| idx_1000, idx_5000 = None, None | |
| for prime in sieve.primerange(1, prime_bound): | |
| if pow(n, (prime - 1) // 2, prime) == 1: | |
| if prime > 1000 and idx_1000 is None: | |
| idx_1000 = len(factor_base) - 1 | |
| if prime > 5000 and idx_5000 is None: | |
| idx_5000 = len(factor_base) - 1 | |
| residue = _sqrt_mod_prime_power(n, prime, 1)[0] | |
| log_p = round(log(prime)*2**10) | |
| factor_base.append(FactorBaseElem(prime, residue, log_p)) | |
| return idx_1000, idx_5000, factor_base | |
| def _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000, seed=None): | |
| """This step is the initialization of the 1st sieve polynomial. | |
| Here `a` is selected as a product of several primes of the factor_base | |
| such that `a` is about to ``sqrt(2*N) / M``. Other initial values of | |
| factor_base elem are also initialized which includes a_inv, b_ainv, soln1, | |
| soln2 which are used when the sieve polynomial is changed. The b_ainv | |
| is required for fast polynomial change as we do not have to calculate | |
| `2*b*invert(a, prime)` every time. | |
| We also ensure that the `factor_base` primes which make `a` are between | |
| 1000 and 5000. | |
| Parameters | |
| ========== | |
| N : Number to be factored | |
| M : sieve interval | |
| factor_base : factor_base primes | |
| idx_1000 : index of prime number in the factor_base near 1000 | |
| idx_5000 : index of prime number in the factor_base near to 5000 | |
| seed : Generate pseudoprime numbers | |
| """ | |
| randint = _randint(seed) | |
| approx_val = sqrt(2*N) / M | |
| # `a` is a parameter of the sieve polynomial and `q` is the prime factors of `a` | |
| # randomly search for a combination of primes whose multiplication is close to approx_val | |
| # This multiplication of primes will be `a` and the primes will be `q` | |
| # `best_a` denotes that `a` is close to approx_val in the random search of combination | |
| best_a, best_q, best_ratio = None, None, None | |
| start = 0 if idx_1000 is None else idx_1000 | |
| end = len(factor_base) - 1 if idx_5000 is None else idx_5000 | |
| for _ in range(50): | |
| a = 1 | |
| q = [] | |
| while(a < approx_val): | |
| rand_p = 0 | |
| while(rand_p == 0 or rand_p in q): | |
| rand_p = randint(start, end) | |
| p = factor_base[rand_p].prime | |
| a *= p | |
| q.append(rand_p) | |
| ratio = a / approx_val | |
| if best_ratio is None or abs(ratio - 1) < abs(best_ratio - 1): | |
| best_q = q | |
| best_a = a | |
| best_ratio = ratio | |
| a = best_a | |
| q = best_q | |
| B = [] | |
| for val in q: | |
| q_l = factor_base[val].prime | |
| gamma = factor_base[val].tmem_p * invert(a // q_l, q_l) % q_l | |
| if gamma > q_l / 2: | |
| gamma = q_l - gamma | |
| B.append(a//q_l*gamma) | |
| b = sum(B) | |
| g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b) | |
| for fb in factor_base: | |
| if a % fb.prime == 0: | |
| continue | |
| fb.a_inv = invert(a, fb.prime) | |
| fb.b_ainv = [2*b_elem*fb.a_inv % fb.prime for b_elem in B] | |
| fb.soln1 = (fb.a_inv*(fb.tmem_p - b)) % fb.prime | |
| fb.soln2 = (fb.a_inv*(-fb.tmem_p - b)) % fb.prime | |
| return g, B | |
| def _initialize_ith_poly(N, factor_base, i, g, B): | |
| """Initialization stage of ith poly. After we finish sieving 1`st polynomial | |
| here we quickly change to the next polynomial from which we will again | |
| start sieving. Suppose we generated ith sieve polynomial and now we | |
| want to generate (i + 1)th polynomial, where ``1 <= i <= 2**(j - 1) - 1`` | |
| where `j` is the number of prime factors of the coefficient `a` | |
| then this function can be used to go to the next polynomial. If | |
| ``i = 2**(j - 1) - 1`` then go to _initialize_first_polynomial stage. | |
| Parameters | |
| ========== | |
| N : number to be factored | |
| factor_base : factor_base primes | |
| i : integer denoting ith polynomial | |
| g : (i - 1)th polynomial | |
| B : array that stores a//q_l*gamma | |
| """ | |
| from sympy.functions.elementary.integers import ceiling | |
| v = 1 | |
| j = i | |
| while(j % 2 == 0): | |
| v += 1 | |
| j //= 2 | |
| if ceiling(i / (2**v)) % 2 == 1: | |
| neg_pow = -1 | |
| else: | |
| neg_pow = 1 | |
| b = g.b + 2*neg_pow*B[v - 1] | |
| a = g.a | |
| g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b) | |
| for fb in factor_base: | |
| if a % fb.prime == 0: | |
| continue | |
| fb.soln1 = (fb.soln1 - neg_pow*fb.b_ainv[v - 1]) % fb.prime | |
| fb.soln2 = (fb.soln2 - neg_pow*fb.b_ainv[v - 1]) % fb.prime | |
| return g | |
| def _gen_sieve_array(M, factor_base): | |
| """Sieve Stage of the Quadratic Sieve. For every prime in the factor_base | |
| that does not divide the coefficient `a` we add log_p over the sieve_array | |
| such that ``-M <= soln1 + i*p <= M`` and ``-M <= soln2 + i*p <= M`` where `i` | |
| is an integer. When p = 2 then log_p is only added using | |
| ``-M <= soln1 + i*p <= M``. | |
| Parameters | |
| ========== | |
| M : sieve interval | |
| factor_base : factor_base primes | |
| """ | |
| sieve_array = [0]*(2*M + 1) | |
| for factor in factor_base: | |
| if factor.soln1 is None: #The prime does not divides a | |
| continue | |
| for idx in range((M + factor.soln1) % factor.prime, 2*M, factor.prime): | |
| sieve_array[idx] += factor.log_p | |
| if factor.prime == 2: | |
| continue | |
| #if prime is 2 then sieve only with soln_1_p | |
| for idx in range((M + factor.soln2) % factor.prime, 2*M, factor.prime): | |
| sieve_array[idx] += factor.log_p | |
| return sieve_array | |
| def _check_smoothness(num, factor_base): | |
| """Here we check that if `num` is a smooth number or not. If `a` is a smooth | |
| number then it returns a vector of prime exponents modulo 2. For example | |
| if a = 2 * 5**2 * 7**3 and the factor base contains {2, 3, 5, 7} then | |
| `a` is a smooth number and this function returns ([1, 0, 0, 1], True). If | |
| `a` is a partial relation which means that `a` a has one prime factor | |
| greater than the `factor_base` then it returns `(a, False)` which denotes `a` | |
| is a partial relation. | |
| Parameters | |
| ========== | |
| a : integer whose smootheness is to be checked | |
| factor_base : factor_base primes | |
| """ | |
| vec = [] | |
| if num < 0: | |
| vec.append(1) | |
| num *= -1 | |
| else: | |
| vec.append(0) | |
| #-1 is not included in factor_base add -1 in vector | |
| for factor in factor_base: | |
| if num % factor.prime != 0: | |
| vec.append(0) | |
| continue | |
| factor_exp = 0 | |
| while num % factor.prime == 0: | |
| factor_exp += 1 | |
| num //= factor.prime | |
| vec.append(factor_exp % 2) | |
| if num == 1: | |
| return vec, True | |
| if isprime(num): | |
| return num, False | |
| return None, None | |
| def _trial_division_stage(N, M, factor_base, sieve_array, sieve_poly, partial_relations, ERROR_TERM): | |
| """Trial division stage. Here we trial divide the values generetated | |
| by sieve_poly in the sieve interval and if it is a smooth number then | |
| it is stored in `smooth_relations`. Moreover, if we find two partial relations | |
| with same large prime then they are combined to form a smooth relation. | |
| First we iterate over sieve array and look for values which are greater | |
| than accumulated_val, as these values have a high chance of being smooth | |
| number. Then using these values we find smooth relations. | |
| In general, let ``t**2 = u*p modN`` and ``r**2 = v*p modN`` be two partial relations | |
| with the same large prime p. Then they can be combined ``(t*r/p)**2 = u*v modN`` | |
| to form a smooth relation. | |
| Parameters | |
| ========== | |
| N : Number to be factored | |
| M : sieve interval | |
| factor_base : factor_base primes | |
| sieve_array : stores log_p values | |
| sieve_poly : polynomial from which we find smooth relations | |
| partial_relations : stores partial relations with one large prime | |
| ERROR_TERM : error term for accumulated_val | |
| """ | |
| sqrt_n = isqrt(N) | |
| accumulated_val = log(M * sqrt_n)*2**10 - ERROR_TERM | |
| smooth_relations = [] | |
| proper_factor = set() | |
| partial_relation_upper_bound = 128*factor_base[-1].prime | |
| for idx, val in enumerate(sieve_array): | |
| if val < accumulated_val: | |
| continue | |
| x = idx - M | |
| v = sieve_poly.eval(x) | |
| vec, is_smooth = _check_smoothness(v, factor_base) | |
| if is_smooth is None:#Neither smooth nor partial | |
| continue | |
| u = sieve_poly.a*x + sieve_poly.b | |
| # Update the partial relation | |
| # If 2 partial relation with same large prime is found then generate smooth relation | |
| if is_smooth is False:#partial relation found | |
| large_prime = vec | |
| #Consider the large_primes under 128*F | |
| if large_prime > partial_relation_upper_bound: | |
| continue | |
| if large_prime not in partial_relations: | |
| partial_relations[large_prime] = (u, v) | |
| continue | |
| else: | |
| u_prev, v_prev = partial_relations[large_prime] | |
| partial_relations.pop(large_prime) | |
| try: | |
| large_prime_inv = invert(large_prime, N) | |
| except ZeroDivisionError:#if large_prime divides N | |
| proper_factor.add(large_prime) | |
| continue | |
| u = u*u_prev*large_prime_inv | |
| v = v*v_prev // (large_prime*large_prime) | |
| vec, is_smooth = _check_smoothness(v, factor_base) | |
| #assert u*u % N == v % N | |
| smooth_relations.append((u, v, vec)) | |
| return smooth_relations, proper_factor | |
| #LINEAR ALGEBRA STAGE | |
| def _build_matrix(smooth_relations): | |
| """Build a 2D matrix from smooth relations. | |
| Parameters | |
| ========== | |
| smooth_relations : Stores smooth relations | |
| """ | |
| matrix = [] | |
| for s_relation in smooth_relations: | |
| matrix.append(s_relation[2]) | |
| return matrix | |
| def _gauss_mod_2(A): | |
| """Fast gaussian reduction for modulo 2 matrix. | |
| Parameters | |
| ========== | |
| A : Matrix | |
| Examples | |
| ======== | |
| >>> from sympy.ntheory.qs import _gauss_mod_2 | |
| >>> _gauss_mod_2([[0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 1, 1]]) | |
| ([[[1, 0, 1], 3]], | |
| [True, True, True, False], | |
| [[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 1]]) | |
| Reference | |
| ========== | |
| .. [1] A fast algorithm for gaussian elimination over GF(2) and | |
| its implementation on the GAPP. Cetin K.Koc, Sarath N.Arachchige""" | |
| import copy | |
| matrix = copy.deepcopy(A) | |
| row = len(matrix) | |
| col = len(matrix[0]) | |
| mark = [False]*row | |
| for c in range(col): | |
| for r in range(row): | |
| if matrix[r][c] == 1: | |
| break | |
| mark[r] = True | |
| for c1 in range(col): | |
| if c1 == c: | |
| continue | |
| if matrix[r][c1] == 1: | |
| for r2 in range(row): | |
| matrix[r2][c1] = (matrix[r2][c1] + matrix[r2][c]) % 2 | |
| dependent_row = [] | |
| for idx, val in enumerate(mark): | |
| if val == False: | |
| dependent_row.append([matrix[idx], idx]) | |
| return dependent_row, mark, matrix | |
| def _find_factor(dependent_rows, mark, gauss_matrix, index, smooth_relations, N): | |
| """Finds proper factor of N. Here, transform the dependent rows as a | |
| combination of independent rows of the gauss_matrix to form the desired | |
| relation of the form ``X**2 = Y**2 modN``. After obtaining the desired relation | |
| we obtain a proper factor of N by `gcd(X - Y, N)`. | |
| Parameters | |
| ========== | |
| dependent_rows : denoted dependent rows in the reduced matrix form | |
| mark : boolean array to denoted dependent and independent rows | |
| gauss_matrix : Reduced form of the smooth relations matrix | |
| index : denoted the index of the dependent_rows | |
| smooth_relations : Smooth relations vectors matrix | |
| N : Number to be factored | |
| """ | |
| idx_in_smooth = dependent_rows[index][1] | |
| independent_u = [smooth_relations[idx_in_smooth][0]] | |
| independent_v = [smooth_relations[idx_in_smooth][1]] | |
| dept_row = dependent_rows[index][0] | |
| for idx, val in enumerate(dept_row): | |
| if val == 1: | |
| for row in range(len(gauss_matrix)): | |
| if gauss_matrix[row][idx] == 1 and mark[row] == True: | |
| independent_u.append(smooth_relations[row][0]) | |
| independent_v.append(smooth_relations[row][1]) | |
| break | |
| u = 1 | |
| v = 1 | |
| for i in independent_u: | |
| u *= i | |
| for i in independent_v: | |
| v *= i | |
| #assert u**2 % N == v % N | |
| v = isqrt(v) | |
| return gcd(u - v, N) | |
| def qs(N, prime_bound, M, ERROR_TERM=25, seed=1234): | |
| """Performs factorization using Self-Initializing Quadratic Sieve. | |
| In SIQS, let N be a number to be factored, and this N should not be a | |
| perfect power. If we find two integers such that ``X**2 = Y**2 modN`` and | |
| ``X != +-Y modN``, then `gcd(X + Y, N)` will reveal a proper factor of N. | |
| In order to find these integers X and Y we try to find relations of form | |
| t**2 = u modN where u is a product of small primes. If we have enough of | |
| these relations then we can form ``(t1*t2...ti)**2 = u1*u2...ui modN`` such that | |
| the right hand side is a square, thus we found a relation of ``X**2 = Y**2 modN``. | |
| Here, several optimizations are done like using multiple polynomials for | |
| sieving, fast changing between polynomials and using partial relations. | |
| The use of partial relations can speeds up the factoring by 2 times. | |
| Parameters | |
| ========== | |
| N : Number to be Factored | |
| prime_bound : upper bound for primes in the factor base | |
| M : Sieve Interval | |
| ERROR_TERM : Error term for checking smoothness | |
| threshold : Extra smooth relations for factorization | |
| seed : generate pseudo prime numbers | |
| Examples | |
| ======== | |
| >>> from sympy.ntheory import qs | |
| >>> qs(25645121643901801, 2000, 10000) | |
| {5394769, 4753701529} | |
| >>> qs(9804659461513846513, 2000, 10000) | |
| {4641991, 2112166839943} | |
| References | |
| ========== | |
| .. [1] https://pdfs.semanticscholar.org/5c52/8a975c1405bd35c65993abf5a4edb667c1db.pdf | |
| .. [2] https://www.rieselprime.de/ziki/Self-initializing_quadratic_sieve | |
| """ | |
| ERROR_TERM*=2**10 | |
| idx_1000, idx_5000, factor_base = _generate_factor_base(prime_bound, N) | |
| smooth_relations = [] | |
| ith_poly = 0 | |
| partial_relations = {} | |
| proper_factor = set() | |
| threshold = 5*len(factor_base) // 100 | |
| while True: | |
| if ith_poly == 0: | |
| ith_sieve_poly, B_array = _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000) | |
| else: | |
| ith_sieve_poly = _initialize_ith_poly(N, factor_base, ith_poly, ith_sieve_poly, B_array) | |
| ith_poly += 1 | |
| if ith_poly >= 2**(len(B_array) - 1): # time to start with a new sieve polynomial | |
| ith_poly = 0 | |
| sieve_array = _gen_sieve_array(M, factor_base) | |
| s_rel, p_f = _trial_division_stage(N, M, factor_base, sieve_array, ith_sieve_poly, partial_relations, ERROR_TERM) | |
| smooth_relations += s_rel | |
| proper_factor |= p_f | |
| if len(smooth_relations) >= len(factor_base) + threshold: | |
| break | |
| matrix = _build_matrix(smooth_relations) | |
| dependent_row, mark, gauss_matrix = _gauss_mod_2(matrix) | |
| N_copy = N | |
| for index in range(len(dependent_row)): | |
| factor = _find_factor(dependent_row, mark, gauss_matrix, index, smooth_relations, N) | |
| if factor > 1 and factor < N: | |
| proper_factor.add(factor) | |
| while(N_copy % factor == 0): | |
| N_copy //= factor | |
| if isprime(N_copy): | |
| proper_factor.add(N_copy) | |
| break | |
| if(N_copy == 1): | |
| break | |
| return proper_factor | |