1 /* factor -- print prime factors of n.
2    Copyright (C) 1986-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18    Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19    Arbitrary-precision code adapted by James Youngman from Torbjörn
20    Granlund's factorize.c, from GNU MP version 4.2.2.
21    In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22    Contains code from GNU MP.  */
23 
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25    or, with GMP, numbers of any size.
26 
27   Code organization:
28 
29     There are several variants of many functions, for handling one word, two
30     words, and GMP's mpz_t type.  If the one-word variant is called foo, the
31     two-word variant will be foo2, and the one for mpz_t will be mp_foo.  In
32     some cases, the plain function variants will handle both one-word and
33     two-word numbers, evidenced by function arguments.
34 
35     The factoring code for two words will fall into the code for one word when
36     progress allows that.
37 
38   Algorithm:
39 
40     (1) Perform trial division using a small primes table, but without hardware
41         division since the primes table store inverses modulo the word base.
42         (The GMP variant of this code doesn't make use of the precomputed
43         inverses, but instead relies on GMP for fast divisibility testing.)
44     (2) Check the nature of any non-factored part using Miller-Rabin for
45         detecting composites, and Lucas for detecting primes.
46     (3) Factor any remaining composite part using the Pollard-Brent rho
47         algorithm or if USE_SQUFOF is defined to 1, try that first.
48         Status of found factors are checked again using Miller-Rabin and Lucas.
49 
50     We prefer using Hensel norm in the divisions, not the more familiar
51     Euclidean norm, since the former leads to much faster code.  In the
52     Pollard-Brent rho code and the prime testing code, we use Montgomery's
53     trick of multiplying all n-residues by the word base, allowing cheap Hensel
54     reductions mod n.
55 
56     The GMP code uses an algorithm that can be considerably slower;
57     for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58     2^{127}-3 takes about 50 ms with the two-word algorithm but would
59     take about 750 ms with the GMP code.
60 
61   Improvements:
62 
63     * Use modular inverses also for exact division in the Lucas code, and
64       elsewhere.  A problem is to locate the inverses not from an index, but
65       from a prime.  We might instead compute the inverse on-the-fly.
66 
67     * Tune trial division table size (not forgetting that this is a standalone
68       program where the table will be read from secondary storage for
69       each invocation).
70 
71     * Implement less naive powm, using k-ary exponentiation for k = 3 or
72       perhaps k = 4.
73 
74     * Try to speed trial division code for single uintmax_t numbers, i.e., the
75       code using DIVBLOCK.  It currently runs at 2 cycles per prime (Intel SBR,
76       IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77       using gcc 4.6 and 4.7.  Some software pipelining should help; 1, 2, and 4
78       respectively cycles ought to be possible.
79 
80     * The redcify function could be vastly improved by using (plain Euclidean)
81       pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82       GMP's gmp-impl.h).  The redcify2 function could be vastly improved using
83       similar methods.  These functions currently dominate run time when using
84       the -w option.
85 */
86 
87 /* Whether to recursively factor to prove primality,
88    or run faster probabilistic tests.  */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
92 
93 /* Faster for certain ranges but less general.  */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
97 
98 /* Output SQUFOF statistics.  */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
102 
103 
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #include <gmp.h>
108 
109 #include "system.h"
110 #include "assure.h"
111 #include "full-write.h"
112 #include "quote.h"
113 #include "readtokens.h"
114 #include "xstrtol.h"
115 
116 /* The official name of this program (e.g., no 'g' prefix).  */
117 #define PROGRAM_NAME "factor"
118 
119 #define AUTHORS \
120   proper_name ("Paul Rubin"),                                           \
121   proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"),   \
122   proper_name_lite ("Niels Moller", "Niels M\303\266ller")
123 
124 /* Token delimiters when reading from a file.  */
125 #define DELIM "\n\t "
126 
127 #ifndef USE_LONGLONG_H
128 /* With the way we use longlong.h, it's only safe to use
129    when UWtype = UHWtype, as there were various cases
130    (as can be seen in the history for longlong.h) where
131    for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
132    to avoid compile time or run time issues.  */
133 # if LONG_MAX == INTMAX_MAX
134 #  define USE_LONGLONG_H 1
135 # endif
136 #endif
137 
138 #if USE_LONGLONG_H
139 
140 /* Make definitions for longlong.h to make it do what it can do for us */
141 
142 /* bitcount for uintmax_t */
143 # if UINTMAX_MAX == UINT32_MAX
144 #  define W_TYPE_SIZE 32
145 # elif UINTMAX_MAX == UINT64_MAX
146 #  define W_TYPE_SIZE 64
147 # elif UINTMAX_MAX == UINT128_MAX
148 #  define W_TYPE_SIZE 128
149 # endif
150 
151 # define UWtype  uintmax_t
152 # define UHWtype unsigned long int
153 # undef UDWtype
154 # if HAVE_ATTRIBUTE_MODE
155 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
156 typedef          int SItype     __attribute__ ((mode (SI)));
157 typedef unsigned int USItype    __attribute__ ((mode (SI)));
158 typedef          int DItype     __attribute__ ((mode (DI)));
159 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
160 # else
161 typedef unsigned char UQItype;
162 typedef          long SItype;
163 typedef unsigned long int USItype;
164 #  if HAVE_LONG_LONG_INT
165 typedef long long int DItype;
166 typedef unsigned long long int UDItype;
167 #  else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
168 typedef long int DItype;
169 typedef unsigned long int UDItype;
170 #  endif
171 # endif
172 # define LONGLONG_STANDALONE     /* Don't require GMP's longlong.h mdep files */
173 # define ASSERT(x)               /* FIXME make longlong.h really standalone */
174 # define __GMP_DECLSPEC          /* FIXME make longlong.h really standalone */
175 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
176 # ifndef __GMP_GNUC_PREREQ
177 #  define __GMP_GNUC_PREREQ(a,b) 1
178 # endif
179 
180 /* These stub macros are only used in longlong.h in certain system compiler
181    combinations, so ensure usage to avoid -Wunused-macros warnings.  */
182 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
183 ASSERT (1)
184 __GMP_DECLSPEC
185 # endif
186 
187 # if _ARCH_PPC
188 #  define HAVE_HOST_CPU_FAMILY_powerpc 1
189 # endif
190 # include "longlong.h"
191 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
192 const unsigned char factor_clz_tab[129] =
193 {
194   1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
195   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
196   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
197   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
198   9
199 };
200 # endif
201 
202 #else /* not USE_LONGLONG_H */
203 
204 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
205 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
206 # define __ll_lowpart(t)  ((uintmax_t) (t) & (__ll_B - 1))
207 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
208 
209 #endif
210 
211 #if !defined __clz_tab && !defined UHWtype
212 /* Without this seemingly useless conditional, gcc -Wunused-macros
213    warns that each of the two tested macros is unused on Fedora 18.
214    FIXME: this is just an ugly band-aid.  Fix it properly.  */
215 #endif
216 
217 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
218 #define MAX_NFACTS 26
219 
220 enum
221 {
222   DEV_DEBUG_OPTION = CHAR_MAX + 1
223 };
224 
225 static struct option const long_options[] =
226 {
227   {"exponents", no_argument, nullptr, 'h'},
228   {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
229   {GETOPT_HELP_OPTION_DECL},
230   {GETOPT_VERSION_OPTION_DECL},
231   {nullptr, 0, nullptr, 0}
232 };
233 
234 /* If true, use p^e output format.  */
235 static bool print_exponents;
236 
237 struct factors
238 {
239   uintmax_t     plarge[2]; /* Can have a single large factor */
240   uintmax_t     p[MAX_NFACTS];
241   unsigned char e[MAX_NFACTS];
242   unsigned char nfactors;
243 };
244 
245 struct mp_factors
246 {
247   mpz_t             *p;
248   unsigned long int *e;
249   idx_t nfactors;
250   idx_t nalloc;
251 };
252 
253 static void factor (uintmax_t, uintmax_t, struct factors *);
254 
255 #ifndef umul_ppmm
256 # define umul_ppmm(w1, w0, u, v)                                        \
257   do {                                                                  \
258     uintmax_t __x0, __x1, __x2, __x3;                                   \
259     unsigned long int __ul, __vl, __uh, __vh;                           \
260     uintmax_t __u = (u), __v = (v);                                     \
261                                                                         \
262     __ul = __ll_lowpart (__u);                                          \
263     __uh = __ll_highpart (__u);                                         \
264     __vl = __ll_lowpart (__v);                                          \
265     __vh = __ll_highpart (__v);                                         \
266                                                                         \
267     __x0 = (uintmax_t) __ul * __vl;                                     \
268     __x1 = (uintmax_t) __ul * __vh;                                     \
269     __x2 = (uintmax_t) __uh * __vl;                                     \
270     __x3 = (uintmax_t) __uh * __vh;                                     \
271                                                                         \
272     __x1 += __ll_highpart (__x0);/* This can't give carry.  */		\
273     __x1 += __x2;		/* But this indeed can.  */		\
274     if (__x1 < __x2)		/* Did we get it?  */			\
275       __x3 += __ll_B;		/* Yes, add it in the proper pos.  */	\
276                                                                         \
277     (w1) = __x3 + __ll_highpart (__x1);                                 \
278     (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0);             \
279   } while (0)
280 #endif
281 
282 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
283 /* Define our own, not needing normalization.  This function is
284    currently not performance critical, so keep it simple.  Similar to
285    the mod macro below.  */
286 # undef udiv_qrnnd
287 # define udiv_qrnnd(q, r, n1, n0, d)                                    \
288   do {                                                                  \
289     uintmax_t __d1, __d0, __q, __r1, __r0;                              \
290                                                                         \
291     __d1 = (d); __d0 = 0;                                               \
292     __r1 = (n1); __r0 = (n0);                                           \
293     affirm (__r1 < __d1);                                               \
294     __q = 0;                                                            \
295     for (int __i = W_TYPE_SIZE; __i > 0; __i--)                         \
296       {                                                                 \
297         rsh2 (__d1, __d0, __d1, __d0, 1);                               \
298         __q <<= 1;                                                      \
299         if (ge2 (__r1, __r0, __d1, __d0))                               \
300           {                                                             \
301             __q++;                                                      \
302             sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0);            \
303           }                                                             \
304       }                                                                 \
305     (r) = __r0;                                                         \
306     (q) = __q;                                                          \
307   } while (0)
308 #endif
309 
310 #if !defined add_ssaaaa
311 # define add_ssaaaa(sh, sl, ah, al, bh, bl)                             \
312   do {                                                                  \
313     uintmax_t _add_x;                                                   \
314     _add_x = (al) + (bl);                                               \
315     (sh) = (ah) + (bh) + (_add_x < (al));                               \
316     (sl) = _add_x;                                                      \
317   } while (0)
318 #endif
319 
320 #define rsh2(rh, rl, ah, al, cnt)                                       \
321   do {                                                                  \
322     (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt));           \
323     (rh) = (ah) >> (cnt);                                               \
324   } while (0)
325 
326 #define lsh2(rh, rl, ah, al, cnt)                                       \
327   do {                                                                  \
328     (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt)));             \
329     (rl) = (al) << (cnt);                                               \
330   } while (0)
331 
332 #define ge2(ah, al, bh, bl)                                             \
333   ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
334 
335 #define gt2(ah, al, bh, bl)                                             \
336   ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
337 
338 #ifndef sub_ddmmss
339 # define sub_ddmmss(rh, rl, ah, al, bh, bl)                             \
340   do {                                                                  \
341     uintmax_t _cy;                                                      \
342     _cy = (al) < (bl);                                                  \
343     (rl) = (al) - (bl);                                                 \
344     (rh) = (ah) - (bh) - _cy;                                           \
345   } while (0)
346 #endif
347 
348 #ifndef count_leading_zeros
349 # define count_leading_zeros(count, x) do {                             \
350     uintmax_t __clz_x = (x);                                            \
351     int __clz_c;                                                        \
352     for (__clz_c = 0;                                                   \
353          (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0;      \
354          __clz_c += 8)                                                  \
355       __clz_x <<= 8;                                                    \
356     for (; (intmax_t)__clz_x >= 0; __clz_c++)                           \
357       __clz_x <<= 1;                                                    \
358     (count) = __clz_c;                                                  \
359   } while (0)
360 #endif
361 
362 #ifndef count_trailing_zeros
363 # define count_trailing_zeros(count, x) do {                            \
364     uintmax_t __ctz_x = (x);                                            \
365     int __ctz_c = 0;                                                    \
366     while ((__ctz_x & 1) == 0)                                          \
367       {                                                                 \
368         __ctz_x >>= 1;                                                  \
369         __ctz_c++;                                                      \
370       }                                                                 \
371     (count) = __ctz_c;                                                  \
372   } while (0)
373 #endif
374 
375 /* Requires that a < n and b <= n */
376 #define submod(r,a,b,n)                                                 \
377   do {                                                                  \
378     uintmax_t _t = - (uintmax_t) (a < b);                               \
379     (r) = ((n) & _t) + (a) - (b);                                       \
380   } while (0)
381 
382 #define addmod(r,a,b,n)                                                 \
383   submod ((r), (a), ((n) - (b)), (n))
384 
385 /* Modular two-word addition and subtraction.  For performance reasons, the
386    most significant bit of n1 must be clear.  The destination variables must be
387    distinct from the mod operand.  */
388 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0)                         \
389   do {                                                                  \
390     add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0));                    \
391     if (ge2 ((r1), (r0), (n1), (n0)))                                   \
392       sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0));                  \
393   } while (0)
394 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0)                         \
395   do {                                                                  \
396     sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0));                    \
397     if ((intmax_t) (r1) < 0)                                            \
398       add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0));                  \
399   } while (0)
400 
401 #define HIGHBIT_TO_MASK(x)                                              \
402   (((intmax_t)-1 >> 1) < 0                                              \
403    ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1))                    \
404    : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1))                        \
405       ? UINTMAX_MAX : (uintmax_t) 0))
406 
407 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
408    Requires that d1 != 0.  */
409 static uintmax_t
mod2(uintmax_t * r1,uintmax_t a1,uintmax_t a0,uintmax_t d1,uintmax_t d0)410 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
411 {
412   int cntd, cnta;
413 
414   affirm (d1 != 0);
415 
416   if (a1 == 0)
417     {
418       *r1 = 0;
419       return a0;
420     }
421 
422   count_leading_zeros (cntd, d1);
423   count_leading_zeros (cnta, a1);
424   int cnt = cntd - cnta;
425   lsh2 (d1, d0, d1, d0, cnt);
426   for (int i = 0; i < cnt; i++)
427     {
428       if (ge2 (a1, a0, d1, d0))
429         sub_ddmmss (a1, a0, a1, a0, d1, d0);
430       rsh2 (d1, d0, d1, d0, 1);
431     }
432 
433   *r1 = a1;
434   return a0;
435 }
436 
437 ATTRIBUTE_CONST
438 static uintmax_t
gcd_odd(uintmax_t a,uintmax_t b)439 gcd_odd (uintmax_t a, uintmax_t b)
440 {
441   if ((b & 1) == 0)
442     {
443       uintmax_t t = b;
444       b = a;
445       a = t;
446     }
447   if (a == 0)
448     return b;
449 
450   /* Take out least significant one bit, to make room for sign */
451   b >>= 1;
452 
453   for (;;)
454     {
455       uintmax_t t;
456       uintmax_t bgta;
457 
458       while ((a & 1) == 0)
459         a >>= 1;
460       a >>= 1;
461 
462       t = a - b;
463       if (t == 0)
464         return (a << 1) + 1;
465 
466       bgta = HIGHBIT_TO_MASK (t);
467 
468       /* b <-- min (a, b) */
469       b += (bgta & t);
470 
471       /* a <-- |a - b| */
472       a = (t ^ bgta) - bgta;
473     }
474 }
475 
476 static uintmax_t
gcd2_odd(uintmax_t * r1,uintmax_t a1,uintmax_t a0,uintmax_t b1,uintmax_t b0)477 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
478 {
479   affirm (b0 & 1);
480 
481   if ((a0 | a1) == 0)
482     {
483       *r1 = b1;
484       return b0;
485     }
486 
487   while ((a0 & 1) == 0)
488     rsh2 (a1, a0, a1, a0, 1);
489 
490   for (;;)
491     {
492       if ((b1 | a1) == 0)
493         {
494           *r1 = 0;
495           return gcd_odd (b0, a0);
496         }
497 
498       if (gt2 (a1, a0, b1, b0))
499         {
500           sub_ddmmss (a1, a0, a1, a0, b1, b0);
501           do
502             rsh2 (a1, a0, a1, a0, 1);
503           while ((a0 & 1) == 0);
504         }
505       else if (gt2 (b1, b0, a1, a0))
506         {
507           sub_ddmmss (b1, b0, b1, b0, a1, a0);
508           do
509             rsh2 (b1, b0, b1, b0, 1);
510           while ((b0 & 1) == 0);
511         }
512       else
513         break;
514     }
515 
516   *r1 = a1;
517   return a0;
518 }
519 
520 static void
factor_insert_multiplicity(struct factors * factors,uintmax_t prime,int m)521 factor_insert_multiplicity (struct factors *factors,
522                             uintmax_t prime, int m)
523 {
524   int nfactors = factors->nfactors;
525   uintmax_t *p = factors->p;
526   unsigned char *e = factors->e;
527 
528   /* Locate position for insert new or increment e.  */
529   int i;
530   for (i = nfactors - 1; i >= 0; i--)
531     {
532       if (p[i] <= prime)
533         break;
534     }
535 
536   if (i < 0 || p[i] != prime)
537     {
538       for (int j = nfactors - 1; j > i; j--)
539         {
540           p[j + 1] = p[j];
541           e[j + 1] = e[j];
542         }
543       p[i + 1] = prime;
544       e[i + 1] = m;
545       factors->nfactors = nfactors + 1;
546     }
547   else
548     {
549       e[i] += m;
550     }
551 }
552 
553 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
554 
555 static void
factor_insert_large(struct factors * factors,uintmax_t p1,uintmax_t p0)556 factor_insert_large (struct factors *factors,
557                      uintmax_t p1, uintmax_t p0)
558 {
559   if (p1 > 0)
560     {
561       affirm (factors->plarge[1] == 0);
562       factors->plarge[0] = p0;
563       factors->plarge[1] = p1;
564     }
565   else
566     factor_insert (factors, p0);
567 }
568 
569 #ifndef mpz_inits
570 
571 # include <stdarg.h>
572 
573 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
574 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
575 
576 static void
mpz_va_init(void (* mpz_single_init)(mpz_t),...)577 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
578 {
579   va_list ap;
580 
581   va_start (ap, mpz_single_init);
582 
583   mpz_t *mpz;
584   while ((mpz = va_arg (ap, mpz_t *)))
585     mpz_single_init (*mpz);
586 
587   va_end (ap);
588 }
589 #endif
590 
591 static void mp_factor (mpz_t, struct mp_factors *);
592 
593 static void
mp_factor_init(struct mp_factors * factors)594 mp_factor_init (struct mp_factors *factors)
595 {
596   factors->p = nullptr;
597   factors->e = nullptr;
598   factors->nfactors = 0;
599   factors->nalloc = 0;
600 }
601 
602 static void
mp_factor_clear(struct mp_factors * factors)603 mp_factor_clear (struct mp_factors *factors)
604 {
605   for (idx_t i = 0; i < factors->nfactors; i++)
606     mpz_clear (factors->p[i]);
607 
608   free (factors->p);
609   free (factors->e);
610 }
611 
612 static void
mp_factor_insert(struct mp_factors * factors,mpz_t prime)613 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
614 {
615   idx_t nfactors = factors->nfactors;
616   mpz_t *p = factors->p;
617   unsigned long int *e = factors->e;
618   ptrdiff_t i;
619 
620   /* Locate position for insert new or increment e.  */
621   for (i = nfactors - 1; i >= 0; i--)
622     {
623       if (mpz_cmp (p[i], prime) <= 0)
624         break;
625     }
626 
627   if (i < 0 || mpz_cmp (p[i], prime) != 0)
628     {
629       if (factors->nfactors == factors->nalloc)
630         {
631           p = xpalloc (p, &factors->nalloc, 1, -1, sizeof *p);
632           e = xireallocarray (e, factors->nalloc, sizeof *e);
633         }
634 
635       mpz_init (p[nfactors]);
636       for (ptrdiff_t j = nfactors - 1; j > i; j--)
637         {
638           mpz_set (p[j + 1], p[j]);
639           e[j + 1] = e[j];
640         }
641       mpz_set (p[i + 1], prime);
642       e[i + 1] = 1;
643 
644       factors->p = p;
645       factors->e = e;
646       factors->nfactors = nfactors + 1;
647     }
648   else
649     {
650       e[i] += 1;
651     }
652 }
653 
654 static void
mp_factor_insert_ui(struct mp_factors * factors,unsigned long int prime)655 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
656 {
657   mpz_t pz;
658 
659   mpz_init_set_ui (pz, prime);
660   mp_factor_insert (factors, pz);
661   mpz_clear (pz);
662 }
663 
664 
665 /* Number of bits in an uintmax_t.  */
666 enum { W = sizeof (uintmax_t) * CHAR_BIT };
667 
668 /* Verify that uintmax_t does not have holes in its representation.  */
669 static_assert (UINTMAX_MAX >> (W - 1) == 1);
670 
671 #define P(a,b,c,d) a,
672 static const unsigned char primes_diff[] = {
673 #include "primes.h"
674 0,0,0,0,0,0,0                           /* 7 sentinels for 8-way loop */
675 };
676 #undef P
677 
678 #define PRIMES_PTAB_ENTRIES \
679   (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
680 
681 #define P(a,b,c,d) b,
682 static const unsigned char primes_diff8[] = {
683 #include "primes.h"
684 0,0,0,0,0,0,0                           /* 7 sentinels for 8-way loop */
685 };
686 #undef P
687 
688 struct primes_dtab
689 {
690   uintmax_t binv, lim;
691 };
692 
693 #define P(a,b,c,d) {c,d},
694 static const struct primes_dtab primes_dtab[] = {
695 #include "primes.h"
696 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
697 };
698 #undef P
699 
700 /* Verify that uintmax_t is not wider than
701    the integers used to generate primes.h.  */
702 static_assert (W <= WIDE_UINT_BITS);
703 
704 /* debugging for developers.  Enables devmsg().
705    This flag is used only in the GMP code.  */
706 static bool dev_debug = false;
707 
708 /* Prove primality or run probabilistic tests.  */
709 static bool flag_prove_primality = PROVE_PRIMALITY;
710 
711 /* Number of Miller-Rabin tests to run when not proving primality.  */
712 #define MR_REPS 25
713 
714 static void
factor_insert_refind(struct factors * factors,uintmax_t p,int i,int off)715 factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off)
716 {
717   for (int j = 0; j < off; j++)
718     p += primes_diff[i + j];
719   factor_insert (factors, p);
720 }
721 
722 /* Trial division with odd primes uses the following trick.
723 
724    Let p be an odd prime, and B = 2^{W_TYPE_SIZE}.  For simplicity,
725    consider the case t < B (this is the second loop below).
726 
727    From our tables we get
728 
729      binv = p^{-1} (mod B)
730      lim = floor ((B-1) / p).
731 
732    First assume that t is a multiple of p, t = q * p.  Then 0 <= q <= lim
733    (and all quotients in this range occur for some t).
734 
735    Then t = q * p is true also (mod B), and p is invertible we get
736 
737      q = t * binv (mod B).
738 
739    Next, assume that t is *not* divisible by p.  Since multiplication
740    by binv (mod B) is a one-to-one mapping,
741 
742      t * binv (mod B) > lim,
743 
744    because all the smaller values are already taken.
745 
746    This can be summed up by saying that the function
747 
748      q(t) = binv * t (mod B)
749 
750    is a permutation of the range 0 <= t < B, with the curious property
751    that it maps the multiples of p onto the range 0 <= q <= lim, in
752    order, and the non-multiples of p onto the range lim < q < B.
753  */
754 
755 static uintmax_t
factor_using_division(uintmax_t * t1p,uintmax_t t1,uintmax_t t0,struct factors * factors)756 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
757                        struct factors *factors)
758 {
759   if (t0 % 2 == 0)
760     {
761       int cnt;
762 
763       if (t0 == 0)
764         {
765           count_trailing_zeros (cnt, t1);
766           t0 = t1 >> cnt;
767           t1 = 0;
768           cnt += W_TYPE_SIZE;
769         }
770       else
771         {
772           count_trailing_zeros (cnt, t0);
773           rsh2 (t1, t0, t1, t0, cnt);
774         }
775 
776       factor_insert_multiplicity (factors, 2, cnt);
777     }
778 
779   uintmax_t p = 3;
780   idx_t i;
781   for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
782     {
783       for (;;)
784         {
785           uintmax_t q1, q0, hi;
786           MAYBE_UNUSED uintmax_t lo;
787 
788           q0 = t0 * primes_dtab[i].binv;
789           umul_ppmm (hi, lo, q0, p);
790           if (hi > t1)
791             break;
792           hi = t1 - hi;
793           q1 = hi * primes_dtab[i].binv;
794           if (LIKELY (q1 > primes_dtab[i].lim))
795             break;
796           t1 = q1; t0 = q0;
797           factor_insert (factors, p);
798         }
799       p += primes_diff[i + 1];
800     }
801   if (t1p)
802     *t1p = t1;
803 
804 #define DIVBLOCK(I)                                                     \
805   do {                                                                  \
806     for (;;)                                                            \
807       {                                                                 \
808         q = t0 * pd[I].binv;                                            \
809         if (LIKELY (q > pd[I].lim))                                     \
810           break;                                                        \
811         t0 = q;                                                         \
812         factor_insert_refind (factors, p, i + 1, I);                    \
813       }                                                                 \
814   } while (0)
815 
816   for (; i < PRIMES_PTAB_ENTRIES; i += 8)
817     {
818       uintmax_t q;
819       const struct primes_dtab *pd = &primes_dtab[i];
820       DIVBLOCK (0);
821       DIVBLOCK (1);
822       DIVBLOCK (2);
823       DIVBLOCK (3);
824       DIVBLOCK (4);
825       DIVBLOCK (5);
826       DIVBLOCK (6);
827       DIVBLOCK (7);
828 
829       p += primes_diff8[i];
830       if (p * p > t0)
831         break;
832     }
833 
834   return t0;
835 }
836 
837 static void
mp_factor_using_division(mpz_t t,struct mp_factors * factors)838 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
839 {
840   mpz_t q;
841   mp_bitcnt_t p;
842 
843   devmsg ("[trial division] ");
844 
845   mpz_init (q);
846 
847   p = mpz_scan1 (t, 0);
848   mpz_fdiv_q_2exp (t, t, p);
849   while (p)
850     {
851       mp_factor_insert_ui (factors, 2);
852       --p;
853     }
854 
855   unsigned long int d = 3;
856   for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;)
857     {
858       if (! mpz_divisible_ui_p (t, d))
859         {
860           d += primes_diff[i++];
861           if (mpz_cmp_ui (t, d * d) < 0)
862             break;
863         }
864       else
865         {
866           mpz_tdiv_q_ui (t, t, d);
867           mp_factor_insert_ui (factors, d);
868         }
869     }
870 
871   mpz_clear (q);
872 }
873 
874 /* Entry i contains (2i+1)^(-1) mod 2^8.  */
875 static const unsigned char  binvert_table[128] =
876 {
877   0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
878   0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
879   0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
880   0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
881   0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
882   0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
883   0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
884   0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
885   0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
886   0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
887   0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
888   0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
889   0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
890   0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
891   0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
892   0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
893 };
894 
895 /* Compute n^(-1) mod B, using a Newton iteration.  */
896 #define binv(inv,n)                                                     \
897   do {                                                                  \
898     uintmax_t  __n = (n);                                               \
899     uintmax_t  __inv;                                                   \
900                                                                         \
901     __inv = binvert_table[(__n / 2) & 0x7F]; /*  8 */                   \
902     if (W_TYPE_SIZE > 8)   __inv = 2 * __inv - __inv * __inv * __n;     \
903     if (W_TYPE_SIZE > 16)  __inv = 2 * __inv - __inv * __inv * __n;     \
904     if (W_TYPE_SIZE > 32)  __inv = 2 * __inv - __inv * __inv * __n;     \
905                                                                         \
906     if (W_TYPE_SIZE > 64)                                               \
907       {                                                                 \
908         int  __invbits = 64;                                            \
909         do {                                                            \
910           __inv = 2 * __inv - __inv * __inv * __n;                      \
911           __invbits *= 2;                                               \
912         } while (__invbits < W_TYPE_SIZE);                              \
913       }                                                                 \
914                                                                         \
915     (inv) = __inv;                                                      \
916   } while (0)
917 
918 /* q = u / d, assuming d|u.  */
919 #define divexact_21(q1, q0, u1, u0, d)                                  \
920   do {                                                                  \
921     uintmax_t _di, _q0;                                                 \
922     binv (_di, (d));                                                    \
923     _q0 = (u0) * _di;                                                   \
924     if ((u1) >= (d))                                                    \
925       {                                                                 \
926         uintmax_t _p1;                                                  \
927         MAYBE_UNUSED intmax_t _p0;                                      \
928         umul_ppmm (_p1, _p0, _q0, d);                                   \
929         (q1) = ((u1) - _p1) * _di;                                      \
930         (q0) = _q0;                                                     \
931       }                                                                 \
932     else                                                                \
933       {                                                                 \
934         (q0) = _q0;                                                     \
935         (q1) = 0;                                                       \
936       }                                                                 \
937   } while (0)
938 
939 /* x B (mod n).  */
940 #define redcify(r_prim, r, n)                                           \
941   do {                                                                  \
942     MAYBE_UNUSED uintmax_t _redcify_q;					\
943     udiv_qrnnd (_redcify_q, r_prim, r, 0, n);                           \
944   } while (0)
945 
946 /* x B^2 (mod n).  Requires x > 0, n1 < B/2.  */
947 #define redcify2(r1, r0, x, n1, n0)                                     \
948   do {                                                                  \
949     uintmax_t _r1, _r0, _i;                                             \
950     if ((x) < (n1))                                                     \
951       {                                                                 \
952         _r1 = (x); _r0 = 0;                                             \
953         _i = W_TYPE_SIZE;                                               \
954       }                                                                 \
955     else                                                                \
956       {                                                                 \
957         _r1 = 0; _r0 = (x);                                             \
958         _i = 2 * W_TYPE_SIZE;                                           \
959       }                                                                 \
960     while (_i-- > 0)                                                    \
961       {                                                                 \
962         lsh2 (_r1, _r0, _r1, _r0, 1);                                   \
963         if (ge2 (_r1, _r0, (n1), (n0)))                                 \
964           sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0));                  \
965       }                                                                 \
966     (r1) = _r1;                                                         \
967     (r0) = _r0;                                                         \
968   } while (0)
969 
970 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
971    Both a and b must be in redc form, the result will be in redc form too.  */
972 static inline uintmax_t
mulredc(uintmax_t a,uintmax_t b,uintmax_t m,uintmax_t mi)973 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
974 {
975   uintmax_t rh, rl, q, th, xh;
976   MAYBE_UNUSED uintmax_t tl;
977 
978   umul_ppmm (rh, rl, a, b);
979   q = rl * mi;
980   umul_ppmm (th, tl, q, m);
981   xh = rh - th;
982   if (rh < th)
983     xh += m;
984 
985   return xh;
986 }
987 
988 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
989    Both a and b must be in redc form, the result will be in redc form too.
990    For performance reasons, the most significant bit of m must be clear.  */
991 static uintmax_t
mulredc2(uintmax_t * r1p,uintmax_t a1,uintmax_t a0,uintmax_t b1,uintmax_t b0,uintmax_t m1,uintmax_t m0,uintmax_t mi)992 mulredc2 (uintmax_t *r1p,
993           uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
994           uintmax_t m1, uintmax_t m0, uintmax_t mi)
995 {
996   uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
997   MAYBE_UNUSED uintmax_t p0;
998   mi = -mi;
999   affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0);
1000   affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0);
1001   affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0);
1002 
1003   /* First compute a0 * <b1, b0> B^{-1}
1004         +-----+
1005         |a0 b0|
1006      +--+--+--+
1007      |a0 b1|
1008      +--+--+--+
1009         |q0 m0|
1010      +--+--+--+
1011      |q0 m1|
1012     -+--+--+--+
1013      |r1|r0| 0|
1014      +--+--+--+
1015   */
1016   umul_ppmm (t1, t0, a0, b0);
1017   umul_ppmm (r1, r0, a0, b1);
1018   q = mi * t0;
1019   umul_ppmm (p1, p0, q, m0);
1020   umul_ppmm (s1, s0, q, m1);
1021   r0 += (t0 != 0); /* Carry */
1022   add_ssaaaa (r1, r0, r1, r0, 0, p1);
1023   add_ssaaaa (r1, r0, r1, r0, 0, t1);
1024   add_ssaaaa (r1, r0, r1, r0, s1, s0);
1025 
1026   /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1027         +-----+
1028         |a1 b0|
1029         +--+--+
1030         |r1|r0|
1031      +--+--+--+
1032      |a1 b1|
1033      +--+--+--+
1034         |q1 m0|
1035      +--+--+--+
1036      |q1 m1|
1037     -+--+--+--+
1038      |r1|r0| 0|
1039      +--+--+--+
1040   */
1041   umul_ppmm (t1, t0, a1, b0);
1042   umul_ppmm (s1, s0, a1, b1);
1043   add_ssaaaa (t1, t0, t1, t0, 0, r0);
1044   q = mi * t0;
1045   add_ssaaaa (r1, r0, s1, s0, 0, r1);
1046   umul_ppmm (p1, p0, q, m0);
1047   umul_ppmm (s1, s0, q, m1);
1048   r0 += (t0 != 0); /* Carry */
1049   add_ssaaaa (r1, r0, r1, r0, 0, p1);
1050   add_ssaaaa (r1, r0, r1, r0, 0, t1);
1051   add_ssaaaa (r1, r0, r1, r0, s1, s0);
1052 
1053   if (ge2 (r1, r0, m1, m0))
1054     sub_ddmmss (r1, r0, r1, r0, m1, m0);
1055 
1056   *r1p = r1;
1057   return r0;
1058 }
1059 
1060 ATTRIBUTE_CONST
1061 static uintmax_t
powm(uintmax_t b,uintmax_t e,uintmax_t n,uintmax_t ni,uintmax_t one)1062 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1063 {
1064   uintmax_t y = one;
1065 
1066   if (e & 1)
1067     y = b;
1068 
1069   while (e != 0)
1070     {
1071       b = mulredc (b, b, n, ni);
1072       e >>= 1;
1073 
1074       if (e & 1)
1075         y = mulredc (y, b, n, ni);
1076     }
1077 
1078   return y;
1079 }
1080 
1081 static uintmax_t
powm2(uintmax_t * r1m,const uintmax_t * bp,const uintmax_t * ep,const uintmax_t * np,uintmax_t ni,const uintmax_t * one)1082 powm2 (uintmax_t *r1m,
1083        const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1084        uintmax_t ni, const uintmax_t *one)
1085 {
1086   uintmax_t r1, r0, b1, b0, n1, n0;
1087   int i;
1088   uintmax_t e;
1089 
1090   b0 = bp[0];
1091   b1 = bp[1];
1092   n0 = np[0];
1093   n1 = np[1];
1094 
1095   r0 = one[0];
1096   r1 = one[1];
1097 
1098   for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1099     {
1100       if (e & 1)
1101         {
1102           r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1103           r1 = *r1m;
1104         }
1105       b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1106       b1 = *r1m;
1107     }
1108   for (e = ep[1]; e > 0; e >>= 1)
1109     {
1110       if (e & 1)
1111         {
1112           r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1113           r1 = *r1m;
1114         }
1115       b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1116       b1 = *r1m;
1117     }
1118   *r1m = r1;
1119   return r0;
1120 }
1121 
1122 ATTRIBUTE_CONST
1123 static bool
millerrabin(uintmax_t n,uintmax_t ni,uintmax_t b,uintmax_t q,int k,uintmax_t one)1124 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1125              int k, uintmax_t one)
1126 {
1127   uintmax_t y = powm (b, q, n, ni, one);
1128 
1129   uintmax_t nm1 = n - one;      /* -1, but in redc representation.  */
1130 
1131   if (y == one || y == nm1)
1132     return true;
1133 
1134   for (int i = 1; i < k; i++)
1135     {
1136       y = mulredc (y, y, n, ni);
1137 
1138       if (y == nm1)
1139         return true;
1140       if (y == one)
1141         return false;
1142     }
1143   return false;
1144 }
1145 
1146 ATTRIBUTE_PURE static bool
millerrabin2(const uintmax_t * np,uintmax_t ni,const uintmax_t * bp,const uintmax_t * qp,int k,const uintmax_t * one)1147 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1148               const uintmax_t *qp, int k, const uintmax_t *one)
1149 {
1150   uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1151 
1152   y0 = powm2 (&r1m, bp, qp, np, ni, one);
1153   y1 = r1m;
1154 
1155   if (y0 == one[0] && y1 == one[1])
1156     return true;
1157 
1158   sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1159 
1160   if (y0 == nm1_0 && y1 == nm1_1)
1161     return true;
1162 
1163   for (int i = 1; i < k; i++)
1164     {
1165       y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1166       y1 = r1m;
1167 
1168       if (y0 == nm1_0 && y1 == nm1_1)
1169         return true;
1170       if (y0 == one[0] && y1 == one[1])
1171         return false;
1172     }
1173   return false;
1174 }
1175 
1176 static bool
mp_millerrabin(mpz_srcptr n,mpz_srcptr nm1,mpz_ptr x,mpz_ptr y,mpz_srcptr q,mp_bitcnt_t k)1177 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1178                 mpz_srcptr q, mp_bitcnt_t k)
1179 {
1180   mpz_powm (y, x, q, n);
1181 
1182   if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1183     return true;
1184 
1185   for (mp_bitcnt_t i = 1; i < k; i++)
1186     {
1187       mpz_powm_ui (y, y, 2, n);
1188       if (mpz_cmp (y, nm1) == 0)
1189         return true;
1190       if (mpz_cmp_ui (y, 1) == 0)
1191         return false;
1192     }
1193   return false;
1194 }
1195 
1196 /* Lucas' prime test.  The number of iterations vary greatly, up to a few dozen
1197    have been observed.  The average seem to be about 2.  */
1198 static bool ATTRIBUTE_PURE
prime_p(uintmax_t n)1199 prime_p (uintmax_t n)
1200 {
1201   mp_bitcnt_t k;
1202   bool is_prime;
1203   uintmax_t a_prim, one, ni;
1204   struct factors factors;
1205 
1206   if (n <= 1)
1207     return false;
1208 
1209   /* We have already cast out small primes.  */
1210   if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1211     return true;
1212 
1213   /* Precomputation for Miller-Rabin.  */
1214   uintmax_t q = n - 1;
1215   for (k = 0; (q & 1) == 0; k++)
1216     q >>= 1;
1217 
1218   uintmax_t a = 2;
1219   binv (ni, n);                 /* ni <- 1/n mod B */
1220   redcify (one, 1, n);
1221   addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1222 
1223   /* Perform a Miller-Rabin test, finds most composites quickly.  */
1224   if (!millerrabin (n, ni, a_prim, q, k, one))
1225     return false;
1226 
1227   if (flag_prove_primality)
1228     {
1229       /* Factor n-1 for Lucas.  */
1230       factor (0, n - 1, &factors);
1231     }
1232 
1233   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1234      number composite.  */
1235   for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1236     {
1237       if (flag_prove_primality)
1238         {
1239           is_prime = true;
1240           for (int i = 0; i < factors.nfactors && is_prime; i++)
1241             {
1242               is_prime
1243                 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1244             }
1245         }
1246       else
1247         {
1248           /* After enough Miller-Rabin runs, be content.  */
1249           is_prime = (r == MR_REPS - 1);
1250         }
1251 
1252       if (is_prime)
1253         return true;
1254 
1255       a += primes_diff[r];      /* Establish new base.  */
1256 
1257       /* The following is equivalent to redcify (a_prim, a, n).  It runs faster
1258          on most processors, since it avoids udiv_qrnnd.  If we go down the
1259          udiv_qrnnd_preinv path, this code should be replaced.  */
1260       {
1261         uintmax_t s1, s0;
1262         umul_ppmm (s1, s0, one, a);
1263         if (LIKELY (s1 == 0))
1264           a_prim = s0 % n;
1265         else
1266           {
1267             MAYBE_UNUSED uintmax_t dummy;
1268             udiv_qrnnd (dummy, a_prim, s1, s0, n);
1269           }
1270       }
1271 
1272       if (!millerrabin (n, ni, a_prim, q, k, one))
1273         return false;
1274     }
1275 
1276   affirm (!"Lucas prime test failure.  This should not happen");
1277 }
1278 
1279 static bool ATTRIBUTE_PURE
prime2_p(uintmax_t n1,uintmax_t n0)1280 prime2_p (uintmax_t n1, uintmax_t n0)
1281 {
1282   uintmax_t q[2], nm1[2];
1283   uintmax_t a_prim[2];
1284   uintmax_t one[2];
1285   uintmax_t na[2];
1286   uintmax_t ni;
1287   int k;
1288   struct factors factors;
1289 
1290   if (n1 == 0)
1291     return prime_p (n0);
1292 
1293   nm1[1] = n1 - (n0 == 0);
1294   nm1[0] = n0 - 1;
1295   if (nm1[0] == 0)
1296     {
1297       count_trailing_zeros (k, nm1[1]);
1298 
1299       q[0] = nm1[1] >> k;
1300       q[1] = 0;
1301       k += W_TYPE_SIZE;
1302     }
1303   else
1304     {
1305       count_trailing_zeros (k, nm1[0]);
1306       rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1307     }
1308 
1309   uintmax_t a = 2;
1310   binv (ni, n0);
1311   redcify2 (one[1], one[0], 1, n1, n0);
1312   addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1313 
1314   /* FIXME: Use scalars or pointers in arguments?  Some consistency needed.  */
1315   na[0] = n0;
1316   na[1] = n1;
1317 
1318   if (!millerrabin2 (na, ni, a_prim, q, k, one))
1319     return false;
1320 
1321   if (flag_prove_primality)
1322     {
1323       /* Factor n-1 for Lucas.  */
1324       factor (nm1[1], nm1[0], &factors);
1325     }
1326 
1327   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1328      number composite.  */
1329   for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1330     {
1331       bool is_prime;
1332       uintmax_t e[2], y[2];
1333 
1334       if (flag_prove_primality)
1335         {
1336           is_prime = true;
1337           if (factors.plarge[1])
1338             {
1339               uintmax_t pi;
1340               binv (pi, factors.plarge[0]);
1341               e[0] = pi * nm1[0];
1342               e[1] = 0;
1343               y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1344               is_prime = (y[0] != one[0] || y[1] != one[1]);
1345             }
1346           for (int i = 0; i < factors.nfactors && is_prime; i++)
1347             {
1348               /* FIXME: We always have the factor 2.  Do we really need to
1349                  handle it here?  We have done the same powering as part
1350                  of millerrabin.  */
1351               if (factors.p[i] == 2)
1352                 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1353               else
1354                 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1355               y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1356               is_prime = (y[0] != one[0] || y[1] != one[1]);
1357             }
1358         }
1359       else
1360         {
1361           /* After enough Miller-Rabin runs, be content.  */
1362           is_prime = (r == MR_REPS - 1);
1363         }
1364 
1365       if (is_prime)
1366         return true;
1367 
1368       a += primes_diff[r];      /* Establish new base.  */
1369       redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1370 
1371       if (!millerrabin2 (na, ni, a_prim, q, k, one))
1372         return false;
1373     }
1374 
1375   affirm (!"Lucas prime test failure.  This should not happen");
1376 }
1377 
1378 static bool
mp_prime_p(mpz_t n)1379 mp_prime_p (mpz_t n)
1380 {
1381   bool is_prime;
1382   mpz_t q, a, nm1, tmp;
1383   struct mp_factors factors;
1384 
1385   if (mpz_cmp_ui (n, 1) <= 0)
1386     return false;
1387 
1388   /* We have already cast out small primes.  */
1389   if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1390     return true;
1391 
1392   mpz_inits (q, a, nm1, tmp, nullptr);
1393 
1394   /* Precomputation for Miller-Rabin.  */
1395   mpz_sub_ui (nm1, n, 1);
1396 
1397   /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
1398   mp_bitcnt_t k = mpz_scan1 (nm1, 0);
1399   mpz_tdiv_q_2exp (q, nm1, k);
1400 
1401   mpz_set_ui (a, 2);
1402 
1403   /* Perform a Miller-Rabin test, finds most composites quickly.  */
1404   if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1405     {
1406       is_prime = false;
1407       goto ret2;
1408     }
1409 
1410   if (flag_prove_primality)
1411     {
1412       /* Factor n-1 for Lucas.  */
1413       mpz_set (tmp, nm1);
1414       mp_factor (tmp, &factors);
1415     }
1416 
1417   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1418      number composite.  */
1419   for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1420     {
1421       if (flag_prove_primality)
1422         {
1423           is_prime = true;
1424           for (idx_t i = 0; i < factors.nfactors && is_prime; i++)
1425             {
1426               mpz_divexact (tmp, nm1, factors.p[i]);
1427               mpz_powm (tmp, a, tmp, n);
1428               is_prime = mpz_cmp_ui (tmp, 1) != 0;
1429             }
1430         }
1431       else
1432         {
1433           /* After enough Miller-Rabin runs, be content.  */
1434           is_prime = (r == MR_REPS - 1);
1435         }
1436 
1437       if (is_prime)
1438         goto ret1;
1439 
1440       mpz_add_ui (a, a, primes_diff[r]);        /* Establish new base.  */
1441 
1442       if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1443         {
1444           is_prime = false;
1445           goto ret1;
1446         }
1447     }
1448 
1449   affirm (!"Lucas prime test failure.  This should not happen");
1450 
1451  ret1:
1452   if (flag_prove_primality)
1453     mp_factor_clear (&factors);
1454  ret2:
1455   mpz_clears (q, a, nm1, tmp, nullptr);
1456 
1457   return is_prime;
1458 }
1459 
1460 static void
factor_using_pollard_rho(uintmax_t n,unsigned long int a,struct factors * factors)1461 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1462                           struct factors *factors)
1463 {
1464   uintmax_t x, z, y, P, t, ni, g;
1465 
1466   unsigned long int k = 1;
1467   unsigned long int l = 1;
1468 
1469   redcify (P, 1, n);
1470   addmod (x, P, P, n);          /* i.e., redcify(2) */
1471   y = z = x;
1472 
1473   while (n != 1)
1474     {
1475       affirm (a < n);
1476 
1477       binv (ni, n);             /* FIXME: when could we use old 'ni' value?  */
1478 
1479       for (;;)
1480         {
1481           do
1482             {
1483               x = mulredc (x, x, n, ni);
1484               addmod (x, x, a, n);
1485 
1486               submod (t, z, x, n);
1487               P = mulredc (P, t, n, ni);
1488 
1489               if (k % 32 == 1)
1490                 {
1491                   if (gcd_odd (P, n) != 1)
1492                     goto factor_found;
1493                   y = x;
1494                 }
1495             }
1496           while (--k != 0);
1497 
1498           z = x;
1499           k = l;
1500           l = 2 * l;
1501           for (unsigned long int i = 0; i < k; i++)
1502             {
1503               x = mulredc (x, x, n, ni);
1504               addmod (x, x, a, n);
1505             }
1506           y = x;
1507         }
1508 
1509     factor_found:
1510       do
1511         {
1512           y = mulredc (y, y, n, ni);
1513           addmod (y, y, a, n);
1514 
1515           submod (t, z, y, n);
1516           g = gcd_odd (t, n);
1517         }
1518       while (g == 1);
1519 
1520       if (n == g)
1521         {
1522           /* Found n itself as factor.  Restart with different params.  */
1523           factor_using_pollard_rho (n, a + 1, factors);
1524           return;
1525         }
1526 
1527       n = n / g;
1528 
1529       if (!prime_p (g))
1530         factor_using_pollard_rho (g, a + 1, factors);
1531       else
1532         factor_insert (factors, g);
1533 
1534       if (prime_p (n))
1535         {
1536           factor_insert (factors, n);
1537           break;
1538         }
1539 
1540       x = x % n;
1541       z = z % n;
1542       y = y % n;
1543     }
1544 }
1545 
1546 static void
factor_using_pollard_rho2(uintmax_t n1,uintmax_t n0,unsigned long int a,struct factors * factors)1547 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1548                            struct factors *factors)
1549 {
1550   uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1551 
1552   unsigned long int k = 1;
1553   unsigned long int l = 1;
1554 
1555   redcify2 (P1, P0, 1, n1, n0);
1556   addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1557   y1 = z1 = x1;
1558   y0 = z0 = x0;
1559 
1560   while (n1 != 0 || n0 != 1)
1561     {
1562       binv (ni, n0);
1563 
1564       for (;;)
1565         {
1566           do
1567             {
1568               x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1569               x1 = r1m;
1570               addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1571 
1572               submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1573               P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1574               P1 = r1m;
1575 
1576               if (k % 32 == 1)
1577                 {
1578                   g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1579                   if (g1 != 0 || g0 != 1)
1580                     goto factor_found;
1581                   y1 = x1; y0 = x0;
1582                 }
1583             }
1584           while (--k != 0);
1585 
1586           z1 = x1; z0 = x0;
1587           k = l;
1588           l = 2 * l;
1589           for (unsigned long int i = 0; i < k; i++)
1590             {
1591               x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1592               x1 = r1m;
1593               addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1594             }
1595           y1 = x1; y0 = x0;
1596         }
1597 
1598     factor_found:
1599       do
1600         {
1601           y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1602           y1 = r1m;
1603           addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1604 
1605           submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1606           g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1607         }
1608       while (g1 == 0 && g0 == 1);
1609 
1610       if (g1 == 0)
1611         {
1612           /* The found factor is one word, and > 1.  */
1613           divexact_21 (n1, n0, n1, n0, g0);     /* n = n / g */
1614 
1615           if (!prime_p (g0))
1616             factor_using_pollard_rho (g0, a + 1, factors);
1617           else
1618             factor_insert (factors, g0);
1619         }
1620       else
1621         {
1622           /* The found factor is two words.  This is highly unlikely, thus hard
1623              to trigger.  Please be careful before you change this code!  */
1624           uintmax_t ginv;
1625 
1626           if (n1 == g1 && n0 == g0)
1627             {
1628               /* Found n itself as factor.  Restart with different params.  */
1629               factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1630               return;
1631             }
1632 
1633           /* Compute n = n / g.  Since the result will fit one word,
1634              we can compute the quotient modulo B, ignoring the high
1635              divisor word.  */
1636           binv (ginv, g0);
1637           n0 = ginv * n0;
1638           n1 = 0;
1639 
1640           if (!prime2_p (g1, g0))
1641             factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1642           else
1643             factor_insert_large (factors, g1, g0);
1644         }
1645 
1646       if (n1 == 0)
1647         {
1648           if (prime_p (n0))
1649             {
1650               factor_insert (factors, n0);
1651               break;
1652             }
1653 
1654           factor_using_pollard_rho (n0, a, factors);
1655           return;
1656         }
1657 
1658       if (prime2_p (n1, n0))
1659         {
1660           factor_insert_large (factors, n1, n0);
1661           break;
1662         }
1663 
1664       x0 = mod2 (&x1, x1, x0, n1, n0);
1665       z0 = mod2 (&z1, z1, z0, n1, n0);
1666       y0 = mod2 (&y1, y1, y0, n1, n0);
1667     }
1668 }
1669 
1670 static void
mp_factor_using_pollard_rho(mpz_t n,unsigned long int a,struct mp_factors * factors)1671 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1672                              struct mp_factors *factors)
1673 {
1674   mpz_t x, z, y, P;
1675   mpz_t t, t2;
1676 
1677   devmsg ("[pollard-rho (%lu)] ", a);
1678 
1679   mpz_inits (t, t2, nullptr);
1680   mpz_init_set_si (y, 2);
1681   mpz_init_set_si (x, 2);
1682   mpz_init_set_si (z, 2);
1683   mpz_init_set_ui (P, 1);
1684 
1685   unsigned long long int k = 1;
1686   unsigned long long int l = 1;
1687 
1688   while (mpz_cmp_ui (n, 1) != 0)
1689     {
1690       for (;;)
1691         {
1692           do
1693             {
1694               mpz_mul (t, x, x);
1695               mpz_mod (x, t, n);
1696               mpz_add_ui (x, x, a);
1697 
1698               mpz_sub (t, z, x);
1699               mpz_mul (t2, P, t);
1700               mpz_mod (P, t2, n);
1701 
1702               if (k % 32 == 1)
1703                 {
1704                   mpz_gcd (t, P, n);
1705                   if (mpz_cmp_ui (t, 1) != 0)
1706                     goto factor_found;
1707                   mpz_set (y, x);
1708                 }
1709             }
1710           while (--k != 0);
1711 
1712           mpz_set (z, x);
1713           k = l;
1714           l = 2 * l;
1715           for (unsigned long long int i = 0; i < k; i++)
1716             {
1717               mpz_mul (t, x, x);
1718               mpz_mod (x, t, n);
1719               mpz_add_ui (x, x, a);
1720             }
1721           mpz_set (y, x);
1722         }
1723 
1724     factor_found:
1725       do
1726         {
1727           mpz_mul (t, y, y);
1728           mpz_mod (y, t, n);
1729           mpz_add_ui (y, y, a);
1730 
1731           mpz_sub (t, z, y);
1732           mpz_gcd (t, t, n);
1733         }
1734       while (mpz_cmp_ui (t, 1) == 0);
1735 
1736       mpz_divexact (n, n, t);   /* divide by t, before t is overwritten */
1737 
1738       if (!mp_prime_p (t))
1739         {
1740           devmsg ("[composite factor--restarting pollard-rho] ");
1741           mp_factor_using_pollard_rho (t, a + 1, factors);
1742         }
1743       else
1744         {
1745           mp_factor_insert (factors, t);
1746         }
1747 
1748       if (mp_prime_p (n))
1749         {
1750           mp_factor_insert (factors, n);
1751           break;
1752         }
1753 
1754       mpz_mod (x, x, n);
1755       mpz_mod (z, z, n);
1756       mpz_mod (y, y, n);
1757     }
1758 
1759   mpz_clears (P, t2, t, z, x, y, nullptr);
1760 }
1761 
1762 #if USE_SQUFOF
1763 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)?  If
1764    algorithm is replaced, consider also returning the remainder.  */
1765 ATTRIBUTE_CONST
1766 static uintmax_t
isqrt(uintmax_t n)1767 isqrt (uintmax_t n)
1768 {
1769   uintmax_t x;
1770   int c;
1771   if (n == 0)
1772     return 0;
1773 
1774   count_leading_zeros (c, n);
1775 
1776   /* Make x > sqrt(n).  This will be invariant through the loop.  */
1777   x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
1778 
1779   for (;;)
1780     {
1781       uintmax_t y = (x + n / x) / 2;
1782       if (y >= x)
1783         return x;
1784 
1785       x = y;
1786     }
1787 }
1788 
1789 ATTRIBUTE_CONST
1790 static uintmax_t
isqrt2(uintmax_t nh,uintmax_t nl)1791 isqrt2 (uintmax_t nh, uintmax_t nl)
1792 {
1793   int shift;
1794   uintmax_t x;
1795 
1796   /* Ensures the remainder fits in an uintmax_t.  */
1797   affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1798 
1799   if (nh == 0)
1800     return isqrt (nl);
1801 
1802   count_leading_zeros (shift, nh);
1803   shift &= ~1;
1804 
1805   /* Make x > sqrt (n).  */
1806   x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1807   x <<= (W_TYPE_SIZE - shift) >> 1;
1808 
1809   /* Do we need more than one iteration?  */
1810   for (;;)
1811     {
1812       MAYBE_UNUSED uintmax_t r;
1813       uintmax_t q, y;
1814       udiv_qrnnd (q, r, nh, nl, x);
1815       y = (x + q) / 2;
1816 
1817       if (y >= x)
1818         {
1819           uintmax_t hi, lo;
1820           umul_ppmm (hi, lo, x + 1, x + 1);
1821           affirm (gt2 (hi, lo, nh, nl));
1822 
1823           umul_ppmm (hi, lo, x, x);
1824           affirm (ge2 (nh, nl, hi, lo));
1825           sub_ddmmss (hi, lo, nh, nl, hi, lo);
1826           affirm (hi == 0);
1827 
1828           return x;
1829         }
1830 
1831       x = y;
1832     }
1833 }
1834 
1835 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N.  */
1836 # define MAGIC64 0x0202021202030213ULL
1837 # define MAGIC63 0x0402483012450293ULL
1838 # define MAGIC65 0x218a019866014613ULL
1839 # define MAGIC11 0x23b
1840 
1841 /* Return the square root if the input is a square, otherwise 0.  */
1842 ATTRIBUTE_CONST
1843 static uintmax_t
is_square(uintmax_t x)1844 is_square (uintmax_t x)
1845 {
1846   /* Uses the tests suggested by Cohen.  Excludes 99% of the non-squares before
1847      computing the square root.  */
1848   if (((MAGIC64 >> (x & 63)) & 1)
1849       && ((MAGIC63 >> (x % 63)) & 1)
1850       /* Both 0 and 64 are squares mod (65).  */
1851       && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1852       && ((MAGIC11 >> (x % 11) & 1)))
1853     {
1854       uintmax_t r = isqrt (x);
1855       if (r * r == x)
1856         return r;
1857     }
1858   return 0;
1859 }
1860 
1861 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1862 static short const invtab[0x81] =
1863   {
1864     0x200,
1865     0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1866     0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1867     0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1868     0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1869     0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1870     0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1871     0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1872     0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1873     0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1874     0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1875     0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1876     0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1877     0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1878     0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1879     0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1880     0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1881   };
1882 
1883 /* Compute q = [u/d], r = u mod d.  Avoids slow hardware division for the case
1884    that q < 0x40; here it instead uses a table of (Euclidean) inverses.  */
1885 # define div_smallq(q, r, u, d)                                          \
1886   do {                                                                  \
1887     if ((u) / 0x40 < (d))                                               \
1888       {                                                                 \
1889         int _cnt;                                                       \
1890         uintmax_t _dinv, _mask, _q, _r;                                 \
1891         count_leading_zeros (_cnt, (d));                                \
1892         _r = (u);                                                       \
1893         if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8)))                        \
1894           {                                                             \
1895             _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80];   \
1896             _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt);                \
1897           }                                                             \
1898         else                                                            \
1899           {                                                             \
1900             _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f];   \
1901             _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11;        \
1902           }                                                             \
1903         _r -= _q * (d);                                                 \
1904                                                                         \
1905         _mask = -(uintmax_t) (_r >= (d));                               \
1906         (r) = _r - (_mask & (d));                                       \
1907         (q) = _q - _mask;                                               \
1908         affirm ((q) * (d) + (r) == u);					\
1909       }                                                                 \
1910     else                                                                \
1911       {                                                                 \
1912         uintmax_t _q = (u) / (d);                                       \
1913         (r) = (u) - _q * (d);                                           \
1914         (q) = _q;                                                       \
1915       }                                                                 \
1916   } while (0)
1917 
1918 /* Notes: Example N = 22117019.  After first phase we find Q1 = 6314, Q
1919    = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1920    with 3025 = 55^2.
1921 
1922    Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1923    representing G_0 = (-55, 2 * 4652, 8653).
1924 
1925    In the notation of the paper:
1926 
1927    S_{-1} = 55, S_0 = 8653, R_0 = 4652
1928 
1929    Put
1930 
1931      t_0 = floor([q_0 + R_0] / S0) = 1
1932      R_1 = t_0 * S_0 - R_0 = 4001
1933      S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1934 */
1935 
1936 /* Multipliers, in order of efficiency:
1937    0.7268  3*5*7*11 = 1155 = 3 (mod 4)
1938    0.7317  3*5*7    =  105 = 1
1939    0.7820  3*5*11   =  165 = 1
1940    0.7872  3*5      =   15 = 3
1941    0.8101  3*7*11   =  231 = 3
1942    0.8155  3*7      =   21 = 1
1943    0.8284  5*7*11   =  385 = 1
1944    0.8339  5*7      =   35 = 3
1945    0.8716  3*11     =   33 = 1
1946    0.8774  3        =    3 = 3
1947    0.8913  5*11     =   55 = 3
1948    0.8972  5        =    5 = 1
1949    0.9233  7*11     =   77 = 1
1950    0.9295  7        =    7 = 3
1951    0.9934  11       =   11 = 3
1952 */
1953 # define QUEUE_SIZE 50
1954 #endif
1955 
1956 #if STAT_SQUFOF
1957 # define Q_FREQ_SIZE 50
1958 /* Element 0 keeps the total */
1959 static int q_freq[Q_FREQ_SIZE + 1];
1960 #endif
1961 
1962 #if USE_SQUFOF
1963 /* Return true on success.  Expected to fail only for numbers
1964    >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit.  */
1965 static bool
factor_using_squfof(uintmax_t n1,uintmax_t n0,struct factors * factors)1966 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1967 {
1968   /* Uses algorithm and notation from
1969 
1970      SQUARE FORM FACTORIZATION
1971      JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1972 
1973      https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1974    */
1975 
1976   static short const multipliers_1[] =
1977     { /* = 1 (mod 4) */
1978       105, 165, 21, 385, 33, 5, 77, 1, 0
1979     };
1980   static short const multipliers_3[] =
1981     { /* = 3 (mod 4) */
1982       1155, 15, 231, 35, 3, 55, 7, 11, 0
1983     };
1984 
1985   struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1986 
1987   if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1988     return false;
1989 
1990   uintmax_t sqrt_n = isqrt2 (n1, n0);
1991 
1992   if (n0 == sqrt_n * sqrt_n)
1993     {
1994       uintmax_t p1, p0;
1995 
1996       umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1997       affirm (p0 == n0);
1998 
1999       if (n1 == p1)
2000         {
2001           if (prime_p (sqrt_n))
2002             factor_insert_multiplicity (factors, sqrt_n, 2);
2003           else
2004             {
2005               struct factors f;
2006 
2007               f.nfactors = 0;
2008               if (!factor_using_squfof (0, sqrt_n, &f))
2009                 {
2010                   /* Try pollard rho instead */
2011                   factor_using_pollard_rho (sqrt_n, 1, &f);
2012                 }
2013               /* Duplicate the new factors */
2014               for (unsigned int i = 0; i < f.nfactors; i++)
2015                 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
2016             }
2017           return true;
2018         }
2019     }
2020 
2021   /* Select multipliers so we always get n * mu = 3 (mod 4) */
2022   for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2023        *m; m++)
2024     {
2025       uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2026       unsigned int i;
2027       unsigned int mu = *m;
2028       int qpos = 0;
2029 
2030       affirm (mu * n0 % 4 == 3);
2031 
2032       /* In the notation of the paper, with mu * n == 3 (mod 4), we
2033          get \Delta = 4 mu * n, and the paper's \mu is 2 mu.  As far as
2034          I understand it, the necessary bound is 4 \mu^3 < n, or 32
2035          mu^3 < n.
2036 
2037          However, this seems insufficient: With n = 37243139 and mu =
2038          105, we get a trivial factor, from the square 38809 = 197^2,
2039          without any corresponding Q earlier in the iteration.
2040 
2041          Requiring 64 mu^3 < n seems sufficient.  */
2042       if (n1 == 0)
2043         {
2044           if ((uintmax_t) mu * mu * mu >= n0 / 64)
2045             continue;
2046         }
2047       else
2048         {
2049           if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2050             continue;
2051         }
2052       umul_ppmm (Dh, Dl, n0, mu);
2053       Dh += n1 * mu;
2054 
2055       affirm (Dl % 4 != 1);
2056       affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2057 
2058       S = isqrt2 (Dh, Dl);
2059 
2060       Q1 = 1;
2061       P = S;
2062 
2063       /* Square root remainder fits in one word, so ignore high part.  */
2064       Q = Dl - P * P;
2065       /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))?  */
2066       L = isqrt (2 * S);
2067       B = 2 * L;
2068       L1 = mu * 2 * L;
2069 
2070       /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2071          4 D.  */
2072 
2073       for (i = 0; i <= B; i++)
2074         {
2075           uintmax_t q, P1, t, rem;
2076 
2077           div_smallq (q, rem, S + P, Q);
2078           P1 = S - rem; /* P1 = q*Q - P */
2079 
2080           affirm (q > 0 && Q > 0);
2081 
2082 # if STAT_SQUFOF
2083           q_freq[0]++;
2084           q_freq[MIN (q, Q_FREQ_SIZE)]++;
2085 # endif
2086 
2087           if (Q <= L1)
2088             {
2089               uintmax_t g = Q;
2090 
2091               if ((Q & 1) == 0)
2092                 g /= 2;
2093 
2094               g /= gcd_odd (g, mu);
2095 
2096               if (g <= L)
2097                 {
2098                   if (qpos >= QUEUE_SIZE)
2099                     error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2100                   queue[qpos].Q = g;
2101                   queue[qpos].P = P % g;
2102                   qpos++;
2103                 }
2104             }
2105 
2106           /* I think the difference can be either sign, but mod
2107              2^W_TYPE_SIZE arithmetic should be fine.  */
2108           t = Q1 + q * (P - P1);
2109           Q1 = Q;
2110           Q = t;
2111           P = P1;
2112 
2113           if ((i & 1) == 0)
2114             {
2115               uintmax_t r = is_square (Q);
2116               if (r)
2117                 {
2118                   for (int j = 0; j < qpos; j++)
2119                     {
2120                       if (queue[j].Q == r)
2121                         {
2122                           if (r == 1)
2123                             /* Traversed entire cycle.  */
2124                             goto next_multiplier;
2125 
2126                           /* Need the absolute value for divisibility test.  */
2127                           if (P >= queue[j].P)
2128                             t = P - queue[j].P;
2129                           else
2130                             t = queue[j].P - P;
2131                           if (t % r == 0)
2132                             {
2133                               /* Delete entries up to and including entry
2134                                  j, which matched.  */
2135                               memmove (queue, queue + j + 1,
2136                                        (qpos - j - 1) * sizeof (queue[0]));
2137                               qpos -= (j + 1);
2138                             }
2139                           goto next_i;
2140                         }
2141                     }
2142 
2143                   /* We have found a square form, which should give a
2144                      factor.  */
2145                   Q1 = r;
2146                   affirm (S >= P); /* What signs are possible?  */
2147                   P += r * ((S - P) / r);
2148 
2149                   /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2150                      for the case D = 2N.  */
2151                   /* Compute Q = (D - P*P) / Q1, but we need double
2152                      precision.  */
2153                   uintmax_t hi, lo;
2154                   umul_ppmm (hi, lo, P, P);
2155                   sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2156                   udiv_qrnnd (Q, rem, hi, lo, Q1);
2157                   affirm (rem == 0);
2158 
2159                   for (;;)
2160                     {
2161                       /* Note: There appears to by a typo in the paper,
2162                          Step 4a in the algorithm description says q <--
2163                          floor([S+P]/\hat Q), but looking at the equations
2164                          in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2165                          (In this code, \hat Q is Q1).  */
2166                       div_smallq (q, rem, S + P, Q);
2167                       P1 = S - rem;     /* P1 = q*Q - P */
2168 
2169 # if STAT_SQUFOF
2170                       q_freq[0]++;
2171                       q_freq[MIN (q, Q_FREQ_SIZE)]++;
2172 # endif
2173                       if (P == P1)
2174                         break;
2175                       t = Q1 + q * (P - P1);
2176                       Q1 = Q;
2177                       Q = t;
2178                       P = P1;
2179                     }
2180 
2181                   if ((Q & 1) == 0)
2182                     Q /= 2;
2183                   Q /= gcd_odd (Q, mu);
2184 
2185                   affirm (Q > 1 && (n1 || Q < n0));
2186 
2187                   if (prime_p (Q))
2188                     factor_insert (factors, Q);
2189                   else if (!factor_using_squfof (0, Q, factors))
2190                     factor_using_pollard_rho (Q, 2, factors);
2191 
2192                   divexact_21 (n1, n0, n1, n0, Q);
2193 
2194                   if (prime2_p (n1, n0))
2195                     factor_insert_large (factors, n1, n0);
2196                   else
2197                     {
2198                       if (!factor_using_squfof (n1, n0, factors))
2199                         {
2200                           if (n1 == 0)
2201                             factor_using_pollard_rho (n0, 1, factors);
2202                           else
2203                             factor_using_pollard_rho2 (n1, n0, 1, factors);
2204                         }
2205                     }
2206 
2207                   return true;
2208                 }
2209             }
2210         next_i:;
2211         }
2212     next_multiplier:;
2213     }
2214   return false;
2215 }
2216 #endif
2217 
2218 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2219    results in FACTORS.  */
2220 static void
factor(uintmax_t t1,uintmax_t t0,struct factors * factors)2221 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2222 {
2223   factors->nfactors = 0;
2224   factors->plarge[1] = 0;
2225 
2226   if (t1 == 0 && t0 < 2)
2227     return;
2228 
2229   t0 = factor_using_division (&t1, t1, t0, factors);
2230 
2231   if (t1 == 0 && t0 < 2)
2232     return;
2233 
2234   if (prime2_p (t1, t0))
2235     factor_insert_large (factors, t1, t0);
2236   else
2237     {
2238 #if USE_SQUFOF
2239       if (factor_using_squfof (t1, t0, factors))
2240         return;
2241 #endif
2242 
2243       if (t1 == 0)
2244         factor_using_pollard_rho (t0, 1, factors);
2245       else
2246         factor_using_pollard_rho2 (t1, t0, 1, factors);
2247     }
2248 }
2249 
2250 /* Use Pollard-rho to compute the prime factors of
2251    arbitrary-precision T, and put the results in FACTORS.  */
2252 static void
mp_factor(mpz_t t,struct mp_factors * factors)2253 mp_factor (mpz_t t, struct mp_factors *factors)
2254 {
2255   mp_factor_init (factors);
2256 
2257   if (mpz_sgn (t) != 0)
2258     {
2259       mp_factor_using_division (t, factors);
2260 
2261       if (mpz_cmp_ui (t, 1) != 0)
2262         {
2263           devmsg ("[is number prime?] ");
2264           if (mp_prime_p (t))
2265             mp_factor_insert (factors, t);
2266           else
2267             mp_factor_using_pollard_rho (t, 1, factors);
2268         }
2269     }
2270 }
2271 
2272 static strtol_error
strto2uintmax(uintmax_t * hip,uintmax_t * lop,char const * s)2273 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2274 {
2275   int lo_carry;
2276   uintmax_t hi = 0, lo = 0;
2277 
2278   strtol_error err = LONGINT_INVALID;
2279 
2280   /* Initial scan for invalid digits.  */
2281   char const *p = s;
2282   for (;;)
2283     {
2284       unsigned char c = *p++;
2285       if (c == 0)
2286         break;
2287 
2288       if (UNLIKELY (!ISDIGIT (c)))
2289         {
2290           err = LONGINT_INVALID;
2291           break;
2292         }
2293 
2294       err = LONGINT_OK;           /* we've seen at least one valid digit */
2295     }
2296 
2297   while (err == LONGINT_OK)
2298     {
2299       unsigned char c = *s++;
2300       if (c == 0)
2301         break;
2302 
2303       c -= '0';
2304 
2305       if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2306         {
2307           err = LONGINT_OVERFLOW;
2308           break;
2309         }
2310       hi = 10 * hi;
2311 
2312       lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2313       lo_carry += 10 * lo < 2 * lo;
2314 
2315       lo = 10 * lo;
2316       lo += c;
2317 
2318       lo_carry += lo < c;
2319       hi += lo_carry;
2320       if (UNLIKELY (hi < lo_carry))
2321         {
2322           err = LONGINT_OVERFLOW;
2323           break;
2324         }
2325     }
2326 
2327   *hip = hi;
2328   *lop = lo;
2329 
2330   return err;
2331 }
2332 
2333 /* Structure and routines for buffering and outputting full lines,
2334    to support parallel operation efficiently.  */
2335 static struct lbuf_
2336 {
2337   char *buf;
2338   char *end;
2339 } lbuf;
2340 
2341 /* 512 is chosen to give good performance,
2342    and also is the max guaranteed size that
2343    consumers can read atomically through pipes.
2344    Also it's big enough to cater for max line length
2345    even with 128 bit uintmax_t.  */
2346 #define FACTOR_PIPE_BUF 512
2347 
2348 static void
lbuf_alloc(void)2349 lbuf_alloc (void)
2350 {
2351   if (lbuf.buf)
2352     return;
2353 
2354   /* Double to ensure enough space for
2355      previous numbers + next number.  */
2356   lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2357   lbuf.end = lbuf.buf;
2358 }
2359 
2360 /* Write complete LBUF to standard output.  */
2361 static void
lbuf_flush(void)2362 lbuf_flush (void)
2363 {
2364   size_t size = lbuf.end - lbuf.buf;
2365   if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2366     write_error ();
2367   lbuf.end = lbuf.buf;
2368 }
2369 
2370 /* Add a character C to LBUF and if it's a newline
2371    and enough bytes are already buffered,
2372    then write atomically to standard output.  */
2373 static void
lbuf_putc(char c)2374 lbuf_putc (char c)
2375 {
2376   *lbuf.end++ = c;
2377 
2378   if (c == '\n')
2379     {
2380       size_t buffered = lbuf.end - lbuf.buf;
2381 
2382       /* Provide immediate output for interactive use.  */
2383       static int line_buffered = -1;
2384       if (line_buffered == -1)
2385         line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2386       if (line_buffered)
2387         lbuf_flush ();
2388       else if (buffered >= FACTOR_PIPE_BUF)
2389         {
2390           /* Write output in <= PIPE_BUF chunks
2391              so consumers can read atomically.  */
2392           char const *tend = lbuf.end;
2393 
2394           /* Since a umaxint_t's factors must fit in 512
2395              we're guaranteed to find a newline here.  */
2396           char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2397           while (*--tlend != '\n');
2398           tlend++;
2399 
2400           lbuf.end = tlend;
2401           lbuf_flush ();
2402 
2403           /* Buffer the remainder.  */
2404           memcpy (lbuf.buf, tlend, tend - tlend);
2405           lbuf.end = lbuf.buf + (tend - tlend);
2406         }
2407     }
2408 }
2409 
2410 /* Buffer an int to the internal LBUF.  */
2411 static void
lbuf_putint(uintmax_t i,size_t min_width)2412 lbuf_putint (uintmax_t i, size_t min_width)
2413 {
2414   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2415   char const *umaxstr = umaxtostr (i, buf);
2416   size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2417   size_t z = width;
2418 
2419   for (; z < min_width; z++)
2420     *lbuf.end++ = '0';
2421 
2422   memcpy (lbuf.end, umaxstr, width);
2423   lbuf.end += width;
2424 }
2425 
2426 static void
print_uintmaxes(uintmax_t t1,uintmax_t t0)2427 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2428 {
2429   uintmax_t q, r;
2430 
2431   if (t1 == 0)
2432     lbuf_putint (t0, 0);
2433   else
2434     {
2435       /* Use very plain code here since it seems hard to write fast code
2436          without assuming a specific word size.  */
2437       q = t1 / 1000000000;
2438       r = t1 % 1000000000;
2439       udiv_qrnnd (t0, r, r, t0, 1000000000);
2440       print_uintmaxes (q, t0);
2441       lbuf_putint (r, 9);
2442     }
2443 }
2444 
2445 /* Single-precision factoring */
2446 static void
print_factors_single(uintmax_t t1,uintmax_t t0)2447 print_factors_single (uintmax_t t1, uintmax_t t0)
2448 {
2449   struct factors factors;
2450 
2451   print_uintmaxes (t1, t0);
2452   lbuf_putc (':');
2453 
2454   factor (t1, t0, &factors);
2455 
2456   for (int j = 0; j < factors.nfactors; j++)
2457     for (int k = 0; k < factors.e[j]; k++)
2458       {
2459         lbuf_putc (' ');
2460         print_uintmaxes (0, factors.p[j]);
2461         if (print_exponents && factors.e[j] > 1)
2462           {
2463             lbuf_putc ('^');
2464             lbuf_putint (factors.e[j], 0);
2465             break;
2466           }
2467       }
2468 
2469   if (factors.plarge[1])
2470     {
2471       lbuf_putc (' ');
2472       print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2473     }
2474 
2475   lbuf_putc ('\n');
2476 }
2477 
2478 /* Emit the factors of the indicated number.  If we have the option of using
2479    either algorithm, we select on the basis of the length of the number.
2480    For longer numbers, we prefer the MP algorithm even if the native algorithm
2481    has enough digits, because the algorithm is better.  The turnover point
2482    depends on the value.  */
2483 static bool
print_factors(char const * input)2484 print_factors (char const *input)
2485 {
2486   /* Skip initial spaces and '+'.  */
2487   char const *str = input;
2488   while (*str == ' ')
2489     str++;
2490   str += *str == '+';
2491 
2492   uintmax_t t1, t0;
2493 
2494   /* Try converting the number to one or two words.  If it fails, use GMP or
2495      print an error message.  The 2nd condition checks that the most
2496      significant bit of the two-word number is clear, in a typesize neutral
2497      way.  */
2498   strtol_error err = strto2uintmax (&t1, &t0, str);
2499 
2500   switch (err)
2501     {
2502     case LONGINT_OK:
2503       if (((t1 << 1) >> 1) == t1)
2504         {
2505           devmsg ("[using single-precision arithmetic] ");
2506           print_factors_single (t1, t0);
2507           return true;
2508         }
2509       break;
2510 
2511     case LONGINT_OVERFLOW:
2512       /* Try GMP.  */
2513       break;
2514 
2515     default:
2516       error (0, 0, _("%s is not a valid positive integer"), quote (input));
2517       return false;
2518     }
2519 
2520   devmsg ("[using arbitrary-precision arithmetic] ");
2521   mpz_t t;
2522   struct mp_factors factors;
2523 
2524   mpz_init_set_str (t, str, 10);
2525 
2526   mpz_out_str (stdout, 10, t);
2527   putchar (':');
2528   mp_factor (t, &factors);
2529 
2530   for (idx_t j = 0; j < factors.nfactors; j++)
2531     for (unsigned long int k = 0; k < factors.e[j]; k++)
2532       {
2533         putchar (' ');
2534         mpz_out_str (stdout, 10, factors.p[j]);
2535         if (print_exponents && factors.e[j] > 1)
2536           {
2537             printf ("^%lu", factors.e[j]);
2538             break;
2539           }
2540       }
2541 
2542   mp_factor_clear (&factors);
2543   mpz_clear (t);
2544   putchar ('\n');
2545   fflush (stdout);
2546   return true;
2547 }
2548 
2549 void
usage(int status)2550 usage (int status)
2551 {
2552   if (status != EXIT_SUCCESS)
2553     emit_try_help ();
2554   else
2555     {
2556       printf (_("\
2557 Usage: %s [OPTION] [NUMBER]...\n\
2558 "),
2559               program_name);
2560       fputs (_("\
2561 Print the prime factors of each specified integer NUMBER.  If none\n\
2562 are specified on the command line, read them from standard input.\n\
2563 \n\
2564 "), stdout);
2565       fputs ("\
2566   -h, --exponents   print repeated factors in form p^e unless e is 1\n\
2567 ", stdout);
2568       fputs (HELP_OPTION_DESCRIPTION, stdout);
2569       fputs (VERSION_OPTION_DESCRIPTION, stdout);
2570       emit_ancillary_info (PROGRAM_NAME);
2571     }
2572   exit (status);
2573 }
2574 
2575 static bool
do_stdin(void)2576 do_stdin (void)
2577 {
2578   bool ok = true;
2579   token_buffer tokenbuffer;
2580 
2581   init_tokenbuffer (&tokenbuffer);
2582 
2583   while (true)
2584     {
2585       size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2586                                        &tokenbuffer);
2587       if (token_length == (size_t) -1)
2588         {
2589           if (ferror (stdin))
2590             error (EXIT_FAILURE, errno, _("error reading input"));
2591           break;
2592         }
2593 
2594       ok &= print_factors (tokenbuffer.buffer);
2595     }
2596   free (tokenbuffer.buffer);
2597 
2598   return ok;
2599 }
2600 
2601 int
main(int argc,char ** argv)2602 main (int argc, char **argv)
2603 {
2604   initialize_main (&argc, &argv);
2605   set_program_name (argv[0]);
2606   setlocale (LC_ALL, "");
2607   bindtextdomain (PACKAGE, LOCALEDIR);
2608   textdomain (PACKAGE);
2609 
2610   lbuf_alloc ();
2611   atexit (close_stdout);
2612   atexit (lbuf_flush);
2613 
2614   int c;
2615   while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1)
2616     {
2617       switch (c)
2618         {
2619         case 'h':  /* NetBSD used -h for this functionality first.  */
2620           print_exponents = true;
2621           break;
2622 
2623         case DEV_DEBUG_OPTION:
2624           dev_debug = true;
2625           break;
2626 
2627         case_GETOPT_HELP_CHAR;
2628 
2629         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2630 
2631         default:
2632           usage (EXIT_FAILURE);
2633         }
2634     }
2635 
2636 #if STAT_SQUFOF
2637   memset (q_freq, 0, sizeof (q_freq));
2638 #endif
2639 
2640   bool ok;
2641   if (argc <= optind)
2642     ok = do_stdin ();
2643   else
2644     {
2645       ok = true;
2646       for (int i = optind; i < argc; i++)
2647         if (! print_factors (argv[i]))
2648           ok = false;
2649     }
2650 
2651 #if STAT_SQUFOF
2652   if (q_freq[0] > 0)
2653     {
2654       double acc_f;
2655       printf ("q  freq.  cum. freq.(total: %d)\n", q_freq[0]);
2656       for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2657         {
2658           double f = (double) q_freq[i] / q_freq[0];
2659           acc_f += f;
2660           printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2661                   100.0 * f, 100.0 * acc_f);
2662         }
2663     }
2664 #endif
2665 
2666   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
2667 }
2668