00001
00002
00003
00004
#include "pch.h"
00005
00006
#ifndef CRYPTOPP_IMPORTS
00007
00008
#include "integer.h"
00009
#include "modarith.h"
00010
#include "nbtheory.h"
00011
#include "asn.h"
00012
#include "oids.h"
00013
#include "words.h"
00014
#include "algparam.h"
00015
#include "pubkey.h"
00016
#include "sha.h"
00017
00018
#include <iostream>
00019
00020
#ifdef SSE2_INTRINSICS_AVAILABLE
00021
#ifdef __GNUC__
00022
#include <xmmintrin.h>
00023
#include <signal.h>
00024
#include <setjmp.h>
00025
#ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00026
#include <malloc.h>
00027
#else
00028
#include <stdlib.h>
00029
#endif
00030
#else
00031
#include <emmintrin.h>
00032
#endif
00033
#elif defined(_MSC_VER) && defined(_M_IX86)
00034
#pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00035
#elif defined(__GNUC__) && defined(__i386__)
00036
#warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00037
#endif
00038
00039 NAMESPACE_BEGIN(CryptoPP)
00040
00041 bool FunctionAssignIntToInteger(const std::type_info &valueType,
void *pInteger, const
void *pInt)
00042 {
00043
if (valueType !=
typeid(
Integer))
00044
return false;
00045 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00046
return true;
00047 }
00048
00049
static const char s_RunAtStartup = (AssignIntToInteger = FunctionAssignIntToInteger, 0);
00050
00051
#ifdef SSE2_INTRINSICS_AVAILABLE
00052
template <
class T>
00053 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n,
const void *)
00054 {
00055 CheckSize(n);
00056
if (n == 0)
00057
return NULL;
00058
if (n >= 4)
00059 {
00060
void *p;
00061
#ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00062
while (!(p = _mm_malloc(
sizeof(T)*n, 16)))
00063
#elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00064
while (!(p = memalign(16,
sizeof(T)*n)))
00065
#elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00066
while (!(p = malloc(
sizeof(T)*n)))
00067
#else
00068
while (!(p = (byte *)malloc(
sizeof(T)*n + 8)))
00069
#endif
00070
CallNewHandler();
00071
00072
#ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00073
assert(m_pBlock == NULL);
00074 m_pBlock = p;
00075
if (!IsAlignedOn(p, 16))
00076 {
00077 assert(IsAlignedOn(p, 8));
00078 p = (byte *)p + 8;
00079 }
00080
#endif
00081
00082 assert(IsAlignedOn(p, 16));
00083
return (T*)p;
00084 }
00085
return new T[n];
00086 }
00087
00088
template <
class T>
00089
void AlignedAllocator<T>::deallocate(
void *p, size_type n)
00090 {
00091 memset(p, 0, n*
sizeof(T));
00092
if (n >= 4)
00093 {
00094
#ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00095
_mm_free(p);
00096
#elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00097
assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00098 free(m_pBlock);
00099 m_pBlock = NULL;
00100
#else
00101
free(p);
00102
#endif
00103
}
00104
else
00105
delete [] (T *)p;
00106 }
00107
#endif
00108
00109
static int Compare(
const word *A,
const word *B,
unsigned int N)
00110 {
00111
while (N--)
00112
if (A[N] > B[N])
00113
return 1;
00114
else if (A[N] < B[N])
00115
return -1;
00116
00117
return 0;
00118 }
00119
00120
static word Increment(word *A,
unsigned int N, word B=1)
00121 {
00122 assert(N);
00123 word t = A[0];
00124 A[0] = t+B;
00125
if (A[0] >= t)
00126
return 0;
00127
for (
unsigned i=1; i<N; i++)
00128
if (++A[i])
00129
return 0;
00130
return 1;
00131 }
00132
00133
static word Decrement(word *A,
unsigned int N, word B=1)
00134 {
00135 assert(N);
00136 word t = A[0];
00137 A[0] = t-B;
00138
if (A[0] <= t)
00139
return 0;
00140
for (
unsigned i=1; i<N; i++)
00141
if (A[i]--)
00142
return 0;
00143
return 1;
00144 }
00145
00146
static void TwosComplement(word *A,
unsigned int N)
00147 {
00148 Decrement(A, N);
00149
for (
unsigned i=0; i<N; i++)
00150 A[i] = ~A[i];
00151 }
00152
00153
static word AtomicInverseModPower2(word A)
00154 {
00155 assert(A%2==1);
00156
00157 word R=A%8;
00158
00159
for (
unsigned i=3; i<WORD_BITS; i*=2)
00160 R = R*(2-R*A);
00161
00162 assert(R*A==1);
00163
return R;
00164 }
00165
00166
00167
00168
class DWord
00169 {
00170
public:
00171 DWord() {}
00172
00173
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00174
explicit DWord(word low)
00175 {
00176 m_whole = low;
00177 }
00178
#else
00179
explicit DWord(word low)
00180 {
00181 m_halfs.low = low;
00182 m_halfs.high = 0;
00183 }
00184
#endif
00185
00186 DWord(word low, word high)
00187 {
00188 m_halfs.low = low;
00189 m_halfs.high = high;
00190 }
00191
00192
static DWord Multiply(word a, word b)
00193 {
00194 DWord r;
00195
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00196
r.m_whole = (dword)a * b;
00197
#elif defined(__alpha__)
00198
r.m_halfs.low = a*b; __asm__(
"umulh %1,%2,%0" :
"=r" (r.m_halfs.high) :
"r" (a),
"r" (b));
00199
#elif defined(__ia64__)
00200
r.m_halfs.low = a*b; __asm__(
"xmpy.hu %0=%1,%2" :
"=f" (r.m_halfs.high) :
"f" (a),
"f" (b));
00201
#elif defined(_ARCH_PPC64)
00202
r.m_halfs.low = a*b; __asm__(
"mulhdu %0,%1,%2" :
"=r" (r.m_halfs.high) :
"r" (a),
"r" (b) :
"cc");
00203
#elif defined(__x86_64__)
00204
__asm__(
"mulq %3" :
"=d" (r.m_halfs.high),
"=a" (r.m_halfs.low) :
"a" (a),
"rm" (b) :
"cc");
00205
#elif defined(__mips64)
00206
__asm__(
"dmultu %2,%3" :
"=h" (r.m_halfs.high),
"=l" (r.m_halfs.low) :
"r" (a),
"r" (b));
00207
#elif defined(_M_IX86)
00208
00209 word64 t = (word64)a * b;
00210 r.m_halfs.high = ((word32 *)(&t))[1];
00211 r.m_halfs.low = (word32)t;
00212
#else
00213
#error can not implement DWord
00214
#endif
00215
return r;
00216 }
00217
00218
static DWord MultiplyAndAdd(word a, word b, word c)
00219 {
00220 DWord r = Multiply(a, b);
00221
return r += c;
00222 }
00223
00224 DWord & operator+=(word a)
00225 {
00226
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00227
m_whole = m_whole + a;
00228
#else
00229
m_halfs.low += a;
00230 m_halfs.high += (m_halfs.low < a);
00231
#endif
00232
return *
this;
00233 }
00234
00235 DWord operator+(word a)
00236 {
00237 DWord r;
00238
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00239
r.m_whole = m_whole + a;
00240
#else
00241
r.m_halfs.low = m_halfs.low + a;
00242 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00243
#endif
00244
return r;
00245 }
00246
00247 DWord operator-(DWord a)
00248 {
00249 DWord r;
00250
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00251
r.m_whole = m_whole - a.m_whole;
00252
#else
00253
r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00254 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00255
#endif
00256
return r;
00257 }
00258
00259 DWord operator-(word a)
00260 {
00261 DWord r;
00262
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00263
r.m_whole = m_whole - a;
00264
#else
00265
r.m_halfs.low = m_halfs.low - a;
00266 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00267
#endif
00268
return r;
00269 }
00270
00271
00272 word operator/(word divisor);
00273
00274 word operator%(word a);
00275
00276
bool operator!()
const
00277
{
00278
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279
return !m_whole;
00280
#else
00281
return !m_halfs.high && !m_halfs.low;
00282
#endif
00283
}
00284
00285 word GetLowHalf()
const {
return m_halfs.low;}
00286 word GetHighHalf()
const {
return m_halfs.high;}
00287 word GetHighHalfAsBorrow()
const {
return 0-m_halfs.high;}
00288
00289
private:
00290
union
00291
{
00292
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00293
dword m_whole;
00294
#endif
00295
struct
00296
{
00297
#ifdef IS_LITTLE_ENDIAN
00298
word low;
00299 word high;
00300
#else
00301
word high;
00302 word low;
00303
#endif
00304
} m_halfs;
00305 };
00306 };
00307
00308
class Word
00309 {
00310
public:
00311 Word() {}
00312
00313 Word(word value)
00314 {
00315 m_whole = value;
00316 }
00317
00318 Word(hword low, hword high)
00319 {
00320 m_whole = low | (word(high) << (WORD_BITS/2));
00321 }
00322
00323
static Word Multiply(hword a, hword b)
00324 {
00325 Word r;
00326 r.m_whole = (word)a * b;
00327
return r;
00328 }
00329
00330 Word operator-(Word a)
00331 {
00332 Word r;
00333 r.m_whole = m_whole - a.m_whole;
00334
return r;
00335 }
00336
00337 Word operator-(hword a)
00338 {
00339 Word r;
00340 r.m_whole = m_whole - a;
00341
return r;
00342 }
00343
00344
00345 hword operator/(hword divisor)
00346 {
00347
return hword(m_whole / divisor);
00348 }
00349
00350
bool operator!()
const
00351
{
00352
return !m_whole;
00353 }
00354
00355 word GetWhole()
const {
return m_whole;}
00356 hword GetLowHalf()
const {
return hword(m_whole);}
00357 hword GetHighHalf()
const {
return hword(m_whole>>(WORD_BITS/2));}
00358 hword GetHighHalfAsBorrow()
const {
return 0-hword(m_whole>>(WORD_BITS/2));}
00359
00360
private:
00361 word m_whole;
00362 };
00363
00364
00365
template <
class S,
class D>
00366 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00367 {
00368
00369 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00370
00371
00372 S Q;
00373
if (S(B1+1) == 0)
00374 Q = A[2];
00375
else
00376 Q = D(A[1], A[2]) / S(B1+1);
00377
00378
00379 D p = D::Multiply(B0, Q);
00380 D u = (D) A[0] - p.GetLowHalf();
00381 A[0] = u.GetLowHalf();
00382 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00383 A[1] = u.GetLowHalf();
00384 A[2] += u.GetHighHalf();
00385
00386
00387
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00388 {
00389 u = (D) A[0] - B0;
00390 A[0] = u.GetLowHalf();
00391 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00392 A[1] = u.GetLowHalf();
00393 A[2] += u.GetHighHalf();
00394 Q++;
00395 assert(Q);
00396 }
00397
00398
return Q;
00399 }
00400
00401
00402
template <
class S,
class D>
00403
inline D DivideFourWordsByTwo(S *T,
const D &Al,
const D &Ah,
const D &B)
00404 {
00405
if (!B)
00406
return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00407
else
00408 {
00409 S Q[2];
00410 T[0] = Al.GetLowHalf();
00411 T[1] = Al.GetHighHalf();
00412 T[2] = Ah.GetLowHalf();
00413 T[3] = Ah.GetHighHalf();
00414 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00415 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00416
return D(Q[0], Q[1]);
00417 }
00418 }
00419
00420
00421
inline word DWord::operator/(word a)
00422 {
00423
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00424
return word(m_whole / a);
00425
#else
00426
hword r[4];
00427
return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00428
#endif
00429
}
00430
00431
inline word DWord::operator%(word a)
00432 {
00433
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00434
return word(m_whole % a);
00435
#else
00436
if (a < (word(1) << (WORD_BITS/2)))
00437 {
00438 hword h = hword(a);
00439 word r = m_halfs.high % h;
00440 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00441
return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00442 }
00443
else
00444 {
00445 hword r[4];
00446 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00447
return Word(r[0], r[1]).GetWhole();
00448 }
00449
#endif
00450
}
00451
00452
00453
00454
class Portable
00455 {
00456
public:
00457
static word Add(word *C,
const word *A,
const word *B,
unsigned int N);
00458
static word Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00459
00460
static inline void Multiply2(word *C,
const word *A,
const word *B);
00461
static inline word Multiply2Add(word *C,
const word *A,
const word *B);
00462
static void Multiply4(word *C,
const word *A,
const word *B);
00463
static void Multiply8(word *C,
const word *A,
const word *B);
00464
static inline unsigned int MultiplyRecursionLimit() {
return 8;}
00465
00466
static inline void Multiply2Bottom(word *C,
const word *A,
const word *B);
00467
static void Multiply4Bottom(word *C,
const word *A,
const word *B);
00468
static void Multiply8Bottom(word *C,
const word *A,
const word *B);
00469
static inline unsigned int MultiplyBottomRecursionLimit() {
return 8;}
00470
00471
static void Square2(word *R,
const word *A);
00472
static void Square4(word *R,
const word *A);
00473
static void Square8(word *R,
const word *A) {assert(
false);}
00474
static inline unsigned int SquareRecursionLimit() {
return 4;}
00475 };
00476
00477 word Portable::Add(word *C,
const word *A,
const word *B,
unsigned int N)
00478 {
00479 assert (N%2 == 0);
00480
00481 DWord u(0, 0);
00482
for (
unsigned int i = 0; i < N; i+=2)
00483 {
00484 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00485 C[i] = u.GetLowHalf();
00486 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00487 C[i+1] = u.GetLowHalf();
00488 }
00489
return u.GetHighHalf();
00490 }
00491
00492 word Portable::Subtract(word *C,
const word *A,
const word *B,
unsigned int N)
00493 {
00494 assert (N%2 == 0);
00495
00496 DWord u(0, 0);
00497
for (
unsigned int i = 0; i < N; i+=2)
00498 {
00499 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00500 C[i] = u.GetLowHalf();
00501 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00502 C[i+1] = u.GetLowHalf();
00503 }
00504
return 0-u.GetHighHalf();
00505 }
00506
00507
void Portable::Multiply2(word *C,
const word *A,
const word *B)
00508 {
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00538
unsigned int ai = A[1] < A[0];
00539
unsigned int bi = B[0] < B[1];
00540
unsigned int di = ai & bi;
00541 DWord d = DWord::Multiply(D[di], D[di+2]);
00542 D[1] = D[3] = 0;
00543
unsigned int si = ai + !bi;
00544 word s = D[si];
00545
00546 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00547 C[0] = A0B0.GetLowHalf();
00548
00549 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00550 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00551 C[1] = t.GetLowHalf();
00552
00553 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00554 C[2] = t.GetLowHalf();
00555 C[3] = t.GetHighHalf();
00556 }
00557
00558
inline void Portable::Multiply2Bottom(word *C,
const word *A,
const word *B)
00559 {
00560 DWord t = DWord::Multiply(A[0], B[0]);
00561 C[0] = t.GetLowHalf();
00562 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00563 }
00564
00565 word Portable::Multiply2Add(word *C,
const word *A,
const word *B)
00566 {
00567 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00568
unsigned int ai = A[1] < A[0];
00569
unsigned int bi = B[0] < B[1];
00570
unsigned int di = ai & bi;
00571 DWord d = DWord::Multiply(D[di], D[di+2]);
00572 D[1] = D[3] = 0;
00573
unsigned int si = ai + !bi;
00574 word s = D[si];
00575
00576 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00577 DWord t = A0B0 + C[0];
00578 C[0] = t.GetLowHalf();
00579
00580 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00581 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00582 C[1] = t.GetLowHalf();
00583
00584 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00585 C[2] = t.GetLowHalf();
00586
00587 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00588 C[3] = t.GetLowHalf();
00589
return t.GetHighHalf();
00590 }
00591
00592
#define MulAcc(x, y) \
00593
p = DWord::MultiplyAndAdd(A[x], B[y], c); \
00594
c = p.GetLowHalf(); \
00595
p = (DWord) d + p.GetHighHalf(); \
00596
d = p.GetLowHalf(); \
00597
e += p.GetHighHalf();
00598
00599
#define SaveMulAcc(s, x, y) \
00600
R[s] = c; \
00601
p = DWord::MultiplyAndAdd(A[x], B[y], d); \
00602
c = p.GetLowHalf(); \
00603
p = (DWord) e + p.GetHighHalf(); \
00604
d = p.GetLowHalf(); \
00605
e = p.GetHighHalf();
00606
00607
#define SquAcc(x, y) \
00608
q = DWord::Multiply(A[x], A[y]); \
00609
p = q + c; \
00610
c = p.GetLowHalf(); \
00611
p = (DWord) d + p.GetHighHalf(); \
00612
d = p.GetLowHalf(); \
00613
e += p.GetHighHalf(); \
00614
p = q + c; \
00615
c = p.GetLowHalf(); \
00616
p = (DWord) d + p.GetHighHalf(); \
00617
d = p.GetLowHalf(); \
00618
e += p.GetHighHalf();
00619
00620
#define SaveSquAcc(s, x, y) \
00621
R[s] = c; \
00622
q = DWord::Multiply(A[x], A[y]); \
00623
p = q + d; \
00624
c = p.GetLowHalf(); \
00625
p = (DWord) e + p.GetHighHalf(); \
00626
d = p.GetLowHalf(); \
00627
e = p.GetHighHalf(); \
00628
p = q + c; \
00629
c = p.GetLowHalf(); \
00630
p = (DWord) d + p.GetHighHalf(); \
00631
d = p.GetLowHalf(); \
00632
e += p.GetHighHalf();
00633
00634
void Portable::Multiply4(word *R,
const word *A,
const word *B)
00635 {
00636 DWord p;
00637 word c, d, e;
00638
00639 p = DWord::Multiply(A[0], B[0]);
00640 R[0] = p.GetLowHalf();
00641 c = p.GetHighHalf();
00642 d = e = 0;
00643
00644 MulAcc(0, 1);
00645 MulAcc(1, 0);
00646
00647 SaveMulAcc(1, 2, 0);
00648 MulAcc(1, 1);
00649 MulAcc(0, 2);
00650
00651 SaveMulAcc(2, 0, 3);
00652 MulAcc(1, 2);
00653 MulAcc(2, 1);
00654 MulAcc(3, 0);
00655
00656 SaveMulAcc(3, 3, 1);
00657 MulAcc(2, 2);
00658 MulAcc(1, 3);
00659
00660 SaveMulAcc(4, 2, 3);
00661 MulAcc(3, 2);
00662
00663 R[5] = c;
00664 p = DWord::MultiplyAndAdd(A[3], B[3], d);
00665 R[6] = p.GetLowHalf();
00666 R[7] = e + p.GetHighHalf();
00667 }
00668
00669
void Portable::Square2(word *R,
const word *A)
00670 {
00671 DWord p, q;
00672 word c, d, e;
00673
00674 p = DWord::Multiply(A[0], A[0]);
00675 R[0] = p.GetLowHalf();
00676 c = p.GetHighHalf();
00677 d = e = 0;
00678
00679 SquAcc(0, 1);
00680
00681 R[1] = c;
00682 p = DWord::MultiplyAndAdd(A[1], A[1], d);
00683 R[2] = p.GetLowHalf();
00684 R[3] = e + p.GetHighHalf();
00685 }
00686
00687
void Portable::Square4(word *R,
const word *A)
00688 {
00689
#ifdef _MSC_VER
00690
00691
00692
00693
00694 Multiply4(R, A, A);
00695
#else
00696
const word *B = A;
00697 DWord p, q;
00698 word c, d, e;
00699
00700 p = DWord::Multiply(A[0], A[0]);
00701 R[0] = p.GetLowHalf();
00702 c = p.GetHighHalf();
00703 d = e = 0;
00704
00705 SquAcc(0, 1);
00706
00707 SaveSquAcc(1, 2, 0);
00708 MulAcc(1, 1);
00709
00710 SaveSquAcc(2, 0, 3);
00711 SquAcc(1, 2);
00712
00713 SaveSquAcc(3, 3, 1);
00714 MulAcc(2, 2);
00715
00716 SaveSquAcc(4, 2, 3);
00717
00718 R[5] = c;
00719 p = DWord::MultiplyAndAdd(A[3], A[3], d);
00720 R[6] = p.GetLowHalf();
00721 R[7] = e + p.GetHighHalf();
00722
#endif
00723
}
00724
00725
void Portable::Multiply8(word *R,
const word *A,
const word *B)
00726 {
00727 DWord p;
00728 word c, d, e;
00729
00730 p = DWord::Multiply(A[0], B[0]);
00731 R[0] = p.GetLowHalf();
00732 c = p.GetHighHalf();
00733 d = e = 0;
00734
00735 MulAcc(0, 1);
00736 MulAcc(1, 0);
00737
00738 SaveMulAcc(1, 2, 0);
00739 MulAcc(1, 1);
00740 MulAcc(0, 2);
00741
00742 SaveMulAcc(2, 0, 3);
00743 MulAcc(1, 2);
00744 MulAcc(2, 1);
00745 MulAcc(3, 0);
00746
00747 SaveMulAcc(3, 0, 4);
00748 MulAcc(1, 3);
00749 MulAcc(2, 2);
00750 MulAcc(3, 1);
00751 MulAcc(4, 0);
00752
00753 SaveMulAcc(4, 0, 5);
00754 MulAcc(1, 4);
00755 MulAcc(2, 3);
00756 MulAcc(3, 2);
00757 MulAcc(4, 1);
00758 MulAcc(5, 0);
00759
00760 SaveMulAcc(5, 0, 6);
00761 MulAcc(1, 5);
00762 MulAcc(2, 4);
00763 MulAcc(3, 3);
00764 MulAcc(4, 2);
00765 MulAcc(5, 1);
00766 MulAcc(6, 0);
00767
00768 SaveMulAcc(6, 0, 7);
00769 MulAcc(1, 6);
00770 MulAcc(2, 5);
00771 MulAcc(3, 4);
00772 MulAcc(4, 3);
00773 MulAcc(5, 2);
00774 MulAcc(6, 1);
00775 MulAcc(7, 0);
00776
00777 SaveMulAcc(7, 1, 7);
00778 MulAcc(2, 6);
00779 MulAcc(3, 5);
00780 MulAcc(4, 4);
00781 MulAcc(5, 3);
00782 MulAcc(6, 2);
00783 MulAcc(7, 1);
00784
00785 SaveMulAcc(8, 2, 7);
00786 MulAcc(3, 6);
00787 MulAcc(4, 5);
00788 MulAcc(5, 4);
00789 MulAcc(6, 3);
00790 MulAcc(7, 2);
00791
00792 SaveMulAcc(9, 3, 7);
00793 MulAcc(4, 6);
00794 MulAcc(5, 5);
00795 MulAcc(6, 4);
00796 MulAcc(7, 3);
00797
00798 SaveMulAcc(10, 4, 7);
00799 MulAcc(5, 6);
00800 MulAcc(6, 5);
00801 MulAcc(7, 4);
00802
00803 SaveMulAcc(11, 5, 7);
00804 MulAcc(6, 6);
00805 MulAcc(7, 5);
00806
00807 SaveMulAcc(12, 6, 7);
00808 MulAcc(7, 6);
00809
00810 R[13] = c;
00811 p = DWord::MultiplyAndAdd(A[7], B[7], d);
00812 R[14] = p.GetLowHalf();
00813 R[15] = e + p.GetHighHalf();
00814 }
00815
00816
void Portable::Multiply4Bottom(word *R,
const word *A,
const word *B)
00817 {
00818 DWord p;
00819 word c, d, e;
00820
00821 p = DWord::Multiply(A[0], B[0]);
00822 R[0] = p.GetLowHalf();
00823 c = p.GetHighHalf();
00824 d = e = 0;
00825
00826 MulAcc(0, 1);
00827 MulAcc(1, 0);
00828
00829 SaveMulAcc(1, 2, 0);
00830 MulAcc(1, 1);
00831 MulAcc(0, 2);
00832
00833 R[2] = c;
00834 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00835 }
00836
00837
void Portable::Multiply8Bottom(word *R,
const word *A,
const word *B)
00838 {
00839 DWord p;
00840 word c, d, e;
00841
00842 p = DWord::Multiply(A[0], B[0]);
00843 R[0] = p.GetLowHalf();
00844 c = p.GetHighHalf();
00845 d = e = 0;
00846
00847 MulAcc(0, 1);
00848 MulAcc(1, 0);
00849
00850 SaveMulAcc(1, 2, 0);
00851 MulAcc(1, 1);
00852 MulAcc(0, 2);
00853
00854 SaveMulAcc(2, 0, 3);
00855 MulAcc(1, 2);
00856 MulAcc(2, 1);
00857 MulAcc(3, 0);
00858
00859 SaveMulAcc(3, 0, 4);
00860 MulAcc(1, 3);
00861 MulAcc(2, 2);
00862 MulAcc(3, 1);
00863 MulAcc(4, 0);
00864
00865 SaveMulAcc(4, 0, 5);
00866 MulAcc(1, 4);
00867 MulAcc(2, 3);
00868 MulAcc(3, 2);
00869 MulAcc(4, 1);
00870 MulAcc(5, 0);
00871
00872 SaveMulAcc(5, 0, 6);
00873 MulAcc(1, 5);
00874 MulAcc(2, 4);
00875 MulAcc(3, 3);
00876 MulAcc(4, 2);
00877 MulAcc(5, 1);
00878 MulAcc(6, 0);
00879
00880 R[6] = c;
00881 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00882 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00883 }
00884
00885
#undef MulAcc
00886
#undef SaveMulAcc
00887
#undef SquAcc
00888
#undef SaveSquAcc
00889
00890
#ifdef CRYPTOPP_X86ASM_AVAILABLE
00891
00892
00893
00894
static bool s_sse2Enabled =
true;
00895
00896
static void CpuId(word32 input, word32 *output)
00897 {
00898
#ifdef __GNUC__
00899
__asm__
00900 (
00901
00902
"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00903 :
"=a" (output[0]),
"=D" (output[1]),
"=c" (output[2]),
"=d" (output[3])
00904 :
"a" (input)
00905 );
00906
#else
00907
__asm
00908 {
00909 mov eax, input
00910 cpuid
00911 mov edi, output
00912 mov [edi], eax
00913 mov [edi+4], ebx
00914 mov [edi+8], ecx
00915 mov [edi+12], edx
00916 }
00917
#endif
00918
}
00919
00920
#ifdef SSE2_INTRINSICS_AVAILABLE
00921
#ifndef _MSC_VER
00922
static jmp_buf s_env;
00923
static void SigIllHandler(
int)
00924 {
00925 longjmp(s_env, 1);
00926 }
00927
#endif
00928
00929
static bool HasSSE2()
00930 {
00931
if (!s_sse2Enabled)
00932
return false;
00933
00934 word32 cpuid[4];
00935 CpuId(1, cpuid);
00936
if ((cpuid[3] & (1 << 26)) == 0)
00937
return false;
00938
00939
#ifdef _MSC_VER
00940
__try
00941 {
00942 __asm xorpd xmm0, xmm0
00943 }
00944 __except (1)
00945 {
00946
return false;
00947 }
00948
return true;
00949
#else
00950
typedef void (*SigHandler)(
int);
00951
00952 SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00953
if (oldHandler == SIG_ERR)
00954
return false;
00955
00956
bool result =
true;
00957
if (setjmp(s_env))
00958 result =
false;
00959
else
00960 __asm __volatile (
"xorps %xmm0, %xmm0");
00961
00962 signal(SIGILL, oldHandler);
00963
return result;
00964
#endif
00965
}
00966
#endif
00967
00968
static bool IsP4()
00969 {
00970 word32 cpuid[4];
00971
00972 CpuId(0, cpuid);
00973 std::swap(cpuid[2], cpuid[3]);
00974
if (memcmp(cpuid+1,
"GenuineIntel", 12) != 0)
00975
return false;
00976
00977 CpuId(1, cpuid);
00978
return ((cpuid[0] >> 8) & 0xf) == 0xf;
00979 }
00980
00981
00982
00983
class PentiumOptimized :
public Portable
00984 {
00985
public:
00986
static word CRYPTOPP_CDECL Add(word *C,
const word *A,
const word *B,
unsigned int N);
00987
static word CRYPTOPP_CDECL Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00988
static void CRYPTOPP_CDECL Multiply4(word *C,
const word *A,
const word *B);
00989
static void CRYPTOPP_CDECL Multiply8(word *C,
const word *A,
const word *B);
00990
static void CRYPTOPP_CDECL Multiply8Bottom(word *C,
const word *A,
const word *B);
00991 };
00992
00993
class P4Optimized
00994 {
00995
public:
00996
static word CRYPTOPP_CDECL Add(word *C,
const word *A,
const word *B,
unsigned int N);
00997
static word CRYPTOPP_CDECL Subtract(word *C,
const word *A,
const word *B,
unsigned int N);
00998
#ifdef SSE2_INTRINSICS_AVAILABLE
00999
static void CRYPTOPP_CDECL Multiply4(word *C,
const word *A,
const word *B);
01000
static void CRYPTOPP_CDECL Multiply8(word *C,
const word *A,
const word *B);
01001
static void CRYPTOPP_CDECL Multiply8Bottom(word *C,
const word *A,
const word *B);
01002
#endif
01003
};
01004
01005
typedef word (CRYPTOPP_CDECL * PAddSub)(word *C,
const word *A,
const word *B,
unsigned int N);
01006
typedef void (CRYPTOPP_CDECL * PMul)(word *C,
const word *A,
const word *B);
01007
01008
static PAddSub s_pAdd, s_pSub;
01009
#ifdef SSE2_INTRINSICS_AVAILABLE
01010
static PMul s_pMul4, s_pMul8, s_pMul8B;
01011
#endif
01012
01013
static void SetPentiumFunctionPointers()
01014 {
01015
if (IsP4())
01016 {
01017 s_pAdd = &P4Optimized::Add;
01018 s_pSub = &P4Optimized::Subtract;
01019 }
01020
else
01021 {
01022 s_pAdd = &PentiumOptimized::Add;
01023 s_pSub = &PentiumOptimized::Subtract;
01024 }
01025
01026
#ifdef SSE2_INTRINSICS_AVAILABLE
01027
if (HasSSE2())
01028 {
01029 s_pMul4 = &P4Optimized::Multiply4;
01030 s_pMul8 = &P4Optimized::Multiply8;
01031 s_pMul8B = &P4Optimized::Multiply8Bottom;
01032 }
01033
else
01034 {
01035 s_pMul4 = &PentiumOptimized::Multiply4;
01036 s_pMul8 = &PentiumOptimized::Multiply8;
01037 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01038 }
01039
#endif
01040
}
01041
01042
static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01043
01044
void DisableSSE2()
01045 {
01046 s_sse2Enabled =
false;
01047 SetPentiumFunctionPointers();
01048 }
01049
01050
class LowLevel :
public PentiumOptimized
01051 {
01052
public:
01053
inline static word Add(word *C,
const word *A,
const word *B,
unsigned int N)
01054 {
return s_pAdd(C, A, B, N);}
01055
inline static word Subtract(word *C,
const word *A,
const word *B,
unsigned int N)
01056 {
return s_pSub(C, A, B, N);}
01057
inline static void Square4(word *R,
const word *A)
01058 {Multiply4(R, A, A);}
01059
#ifdef SSE2_INTRINSICS_AVAILABLE
01060
inline static void Multiply4(word *C,
const word *A,
const word *B)
01061 {s_pMul4(C, A, B);}
01062
inline static void Multiply8(word *C,
const word *A,
const word *B)
01063 {s_pMul8(C, A, B);}
01064
inline static void Multiply8Bottom(word *C,
const word *A,
const word *B)
01065 {s_pMul8B(C, A, B);}
01066
#endif
01067
};
01068
01069
01070
#ifdef _MSC_VER
01071
#define CRYPTOPP_NAKED __declspec(naked)
01072
#define AS1(x) __asm x
01073
#define AS2(x, y) __asm x, y
01074
#define AddPrologue \
01075
__asm push ebp \
01076
__asm push ebx \
01077
__asm push esi \
01078
__asm push edi \
01079
__asm mov ecx, [esp+20] \
01080
__asm mov edx, [esp+24] \
01081
__asm mov ebx, [esp+28] \
01082
__asm mov esi, [esp+32]
01083
#define AddEpilogue \
01084
__asm pop edi \
01085
__asm pop esi \
01086
__asm pop ebx \
01087
__asm pop ebp \
01088
__asm ret
01089
#define MulPrologue \
01090
__asm push ebp \
01091
__asm push ebx \
01092
__asm push esi \
01093
__asm push edi \
01094
__asm mov ecx, [esp+28] \
01095
__asm mov esi, [esp+24] \
01096
__asm push [esp+20]
01097
#define MulEpilogue \
01098
__asm add esp, 4 \
01099
__asm pop edi \
01100
__asm pop esi \
01101
__asm pop ebx \
01102
__asm pop ebp \
01103
__asm ret
01104
#else
01105
#define CRYPTOPP_NAKED
01106
#define AS1(x) #x ";"
01107
#define AS2(x, y) #x ", " #y ";"
01108
#define AddPrologue \
01109
__asm__ __volatile__ \
01110
( \
01111
"push %%ebx;" \
01112 "mov %2, %%ebx;" \
01113 ".intel_syntax noprefix;" \
01114 "push ebp;"
01115
#define AddEpilogue \
01116
"pop ebp;" \
01117
".att_syntax prefix;" \
01118
"pop %%ebx;" \
01119
: \
01120
: "c" (C), "d" (A), "m" (B), "S" (N) \
01121
: "%edi", "memory", "cc" \
01122
);
01123
#define MulPrologue \
01124
__asm__ __volatile__ \
01125
( \
01126
"push %%ebx;" \
01127 "push %%ebp;" \
01128 "push %0;" \
01129 ".intel_syntax noprefix;"
01130
#define MulEpilogue \
01131
"add esp, 4;" \
01132
"pop ebp;" \
01133
"pop ebx;" \
01134
".att_syntax prefix;" \
01135
: \
01136
: "rm" (Z), "S" (X), "c" (Y) \
01137
: "%eax", "%edx", "%edi", "memory", "cc" \
01138
);
01139
#endif
01140
01141 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C,
const word *A,
const word *B,
unsigned int N)
01142 {
01143 AddPrologue
01144
01145
01146 AS2( sub ecx, edx)
01147 AS2( xor eax, eax)
01148
01149 AS2( sub eax, esi)
01150 AS2( lea ebx, [ebx+4*esi])
01151
01152 AS2( sar eax, 1)
01153 AS1( jz loopendAdd)
01154
01155 AS1(loopstartAdd:)
01156 AS2( mov esi,[edx])
01157 AS2( mov ebp,[edx+4])
01158
01159 AS2( mov edi,[ebx+8*eax])
01160 AS2( lea edx,[edx+8])
01161
01162 AS2( adc esi,edi)
01163 AS2( mov edi,[ebx+8*eax+4])
01164
01165 AS2( adc ebp,edi)
01166 AS1( inc eax)
01167
01168 AS2( mov [edx+ecx-8],esi)
01169 AS2( mov [edx+ecx-4],ebp)
01170
01171 AS1( jnz loopstartAdd)
01172
01173 AS1(loopendAdd:)
01174 AS2( adc eax, 0)
01175
01176 AddEpilogue
01177 }
01178
01179 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B,
unsigned int N)
01180 {
01181 AddPrologue
01182
01183
01184 AS2( sub ecx, edx)
01185 AS2( xor eax, eax)
01186
01187 AS2( sub eax, esi)
01188 AS2( lea ebx, [ebx+4*esi])
01189
01190 AS2( sar eax, 1)
01191 AS1( jz loopendSub)
01192
01193 AS1(loopstartSub:)
01194 AS2( mov esi,[edx])
01195 AS2( mov ebp,[edx+4])
01196
01197 AS2( mov edi,[ebx+8*eax])
01198 AS2( lea edx,[edx+8])
01199
01200 AS2( sbb esi,edi)
01201 AS2( mov edi,[ebx+8*eax+4])
01202
01203 AS2( sbb ebp,edi)
01204 AS1( inc eax)
01205
01206 AS2( mov [edx+ecx-8],esi)
01207 AS2( mov [edx+ecx-4],ebp)
01208
01209 AS1( jnz loopstartSub)
01210
01211 AS1(loopendSub:)
01212 AS2( adc eax, 0)
01213
01214 AddEpilogue
01215 }
01216
01217
01218
01219 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B,
unsigned int N)
01220 {
01221 AddPrologue
01222
01223
01224 AS2( xor eax, eax)
01225 AS1( neg esi)
01226 AS1( jz loopendAddP4)
01227
01228 AS2( mov edi, [edx])
01229 AS2( mov ebp, [ebx])
01230 AS1( jmp carry1AddP4)
01231
01232 AS1(loopstartAddP4:)
01233 AS2( mov edi, [edx+8])
01234 AS2( add ecx, 8)
01235 AS2( add edx, 8)
01236 AS2( mov ebp, [ebx])
01237 AS2( add edi, eax)
01238 AS1( jc carry1AddP4)
01239 AS2( xor eax, eax)
01240
01241 AS1(carry1AddP4:)
01242 AS2( add edi, ebp)
01243 AS2( mov ebp, 1)
01244 AS2( mov [ecx], edi)
01245 AS2( mov edi, [edx+4])
01246 AS2( cmovc eax, ebp)
01247 AS2( mov ebp, [ebx+4])
01248 AS2( add ebx, 8)
01249 AS2( add edi, eax)
01250 AS1( jc carry2AddP4)
01251 AS2( xor eax, eax)
01252
01253 AS1(carry2AddP4:)
01254 AS2( add edi, ebp)
01255 AS2( mov ebp, 1)
01256 AS2( cmovc eax, ebp)
01257 AS2( mov [ecx+4], edi)
01258 AS2( add esi, 2)
01259 AS1( jnz loopstartAddP4)
01260
01261 AS1(loopendAddP4:)
01262
01263 AddEpilogue
01264 }
01265
01266 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B,
unsigned int N)
01267 {
01268 AddPrologue
01269
01270
01271 AS2( xor eax, eax)
01272 AS1( neg esi)
01273 AS1( jz loopendSubP4)
01274
01275 AS2( mov edi, [edx])
01276 AS2( mov ebp, [ebx])
01277 AS1( jmp carry1SubP4)
01278
01279 AS1(loopstartSubP4:)
01280 AS2( mov edi, [edx+8])
01281 AS2( add edx, 8)
01282 AS2( add ecx, 8)
01283 AS2( mov ebp, [ebx])
01284 AS2( sub edi, eax)
01285 AS1( jc carry1SubP4)
01286 AS2( xor eax, eax)
01287
01288 AS1(carry1SubP4:)
01289 AS2( sub edi, ebp)
01290 AS2( mov ebp, 1)
01291 AS2( mov [ecx], edi)
01292 AS2( mov edi, [edx+4])
01293 AS2( cmovc eax, ebp)
01294 AS2( mov ebp, [ebx+4])
01295 AS2( add ebx, 8)
01296 AS2( sub edi, eax)
01297 AS1( jc carry2SubP4)
01298 AS2( xor eax, eax)
01299
01300 AS1(carry2SubP4:)
01301 AS2( sub edi, ebp)
01302 AS2( mov ebp, 1)
01303 AS2( cmovc eax, ebp)
01304 AS2( mov [ecx+4], edi)
01305 AS2( add esi, 2)
01306 AS1( jnz loopstartSubP4)
01307
01308 AS1(loopendSubP4:)
01309
01310 AddEpilogue
01311 }
01312
01313
01314
01315 #define MulStartup \
01316 AS2(xor ebp, ebp) \
01317 AS2(xor edi, edi) \
01318 AS2(xor ebx, ebx)
01319
01320 #define MulShiftCarry \
01321 AS2(mov ebp, edx) \
01322 AS2(mov edi, ebx) \
01323 AS2(xor ebx, ebx)
01324
01325 #define MulAccumulateBottom(i,j) \
01326 AS2(mov eax, [ecx+4*j]) \
01327 AS2(imul eax, dword ptr [esi+4*i]) \
01328 AS2(add ebp, eax)
01329
01330 #define MulAccumulate(i,j) \
01331 AS2(mov eax, [ecx+4*j]) \
01332 AS1(mul dword ptr [esi+4*i]) \
01333 AS2(add ebp, eax) \
01334 AS2(adc edi, edx) \
01335 AS2(adc bl, bh)
01336
01337 #define MulStoreDigit(i) \
01338 AS2(mov edx, edi) \
01339 AS2(mov edi, [esp]) \
01340 AS2(mov [edi+4*i], ebp)
01341
01342 #define MulLastDiagonal(digits) \
01343 AS2(mov eax, [ecx+4*(digits-1)]) \
01344 AS1(mul dword ptr [esi+4*(digits-1)]) \
01345 AS2(add ebp, eax) \
01346 AS2(adc edx, edi) \
01347 AS2(mov edi, [esp]) \
01348 AS2(mov [edi+4*(2*digits-2)], ebp) \
01349 AS2(mov [edi+4*(2*digits-1)], edx)
01350
01351 CRYPTOPP_NAKED
void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01352 {
01353 MulPrologue
01354
01355 MulStartup
01356 MulAccumulate(0,0)
01357 MulStoreDigit(0)
01358 MulShiftCarry
01359
01360 MulAccumulate(1,0)
01361 MulAccumulate(0,1)
01362 MulStoreDigit(1)
01363 MulShiftCarry
01364
01365 MulAccumulate(2,0)
01366 MulAccumulate(1,1)
01367 MulAccumulate(0,2)
01368 MulStoreDigit(2)
01369 MulShiftCarry
01370
01371 MulAccumulate(3,0)
01372 MulAccumulate(2,1)
01373 MulAccumulate(1,2)
01374 MulAccumulate(0,3)
01375 MulStoreDigit(3)
01376 MulShiftCarry
01377
01378 MulAccumulate(3,1)
01379 MulAccumulate(2,2)
01380 MulAccumulate(1,3)
01381 MulStoreDigit(4)
01382 MulShiftCarry
01383
01384 MulAccumulate(3,2)
01385 MulAccumulate(2,3)
01386 MulStoreDigit(5)
01387 MulShiftCarry
01388
01389 MulLastDiagonal(4)
01390 MulEpilogue
01391 }
01392
01393 CRYPTOPP_NAKED
void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01394 {
01395 MulPrologue
01396
01397 MulStartup
01398 MulAccumulate(0,0)
01399 MulStoreDigit(0)
01400 MulShiftCarry
01401
01402 MulAccumulate(1,0)
01403 MulAccumulate(0,1)
01404 MulStoreDigit(1)
01405 MulShiftCarry
01406
01407 MulAccumulate(2,0)
01408 MulAccumulate(1,1)
01409 MulAccumulate(0,2)
01410 MulStoreDigit(2)
01411 MulShiftCarry
01412
01413 MulAccumulate(3,0)
01414 MulAccumulate(2,1)
01415 MulAccumulate(1,2)
01416 MulAccumulate(0,3)
01417 MulStoreDigit(3)
01418 MulShiftCarry
01419
01420 MulAccumulate(4,0)
01421 MulAccumulate(3,1)
01422 MulAccumulate(2,2)
01423 MulAccumulate(1,3)
01424 MulAccumulate(0,4)
01425 MulStoreDigit(4)
01426 MulShiftCarry
01427
01428 MulAccumulate(5,0)
01429 MulAccumulate(4,1)
01430 MulAccumulate(3,2)
01431 MulAccumulate(2,3)
01432 MulAccumulate(1,4)
01433 MulAccumulate(0,5)
01434 MulStoreDigit(5)
01435 MulShiftCarry
01436
01437 MulAccumulate(6,0)
01438 MulAccumulate(5,1)
01439 MulAccumulate(4,2)
01440 MulAccumulate(3,3)
01441 MulAccumulate(2,4)
01442 MulAccumulate(1,5)
01443 MulAccumulate(0,6)
01444 MulStoreDigit(6)
01445 MulShiftCarry
01446
01447 MulAccumulate(7,0)
01448 MulAccumulate(6,1)
01449 MulAccumulate(5,2)
01450 MulAccumulate(4,3)
01451 MulAccumulate(3,4)
01452 MulAccumulate(2,5)
01453 MulAccumulate(1,6)
01454 MulAccumulate(0,7)
01455 MulStoreDigit(7)
01456 MulShiftCarry
01457
01458 MulAccumulate(7,1)
01459 MulAccumulate(6,2)
01460 MulAccumulate(5,3)
01461 MulAccumulate(4,4)
01462 MulAccumulate(3,5)
01463 MulAccumulate(2,6)
01464 MulAccumulate(1,7)
01465 MulStoreDigit(8)
01466 MulShiftCarry
01467
01468 MulAccumulate(7,2)
01469 MulAccumulate(6,3)
01470 MulAccumulate(5,4)
01471 MulAccumulate(4,5)
01472 MulAccumulate(3,6)
01473 MulAccumulate(2,7)
01474 MulStoreDigit(9)
01475 MulShiftCarry
01476
01477 MulAccumulate(7,3)
01478 MulAccumulate(6,4)
01479 MulAccumulate(5,5)
01480 MulAccumulate(4,6)
01481 MulAccumulate(3,7)
01482 MulStoreDigit(10)
01483 MulShiftCarry
01484
01485 MulAccumulate(7,4)
01486 MulAccumulate(6,5)
01487 MulAccumulate(5,6)
01488 MulAccumulate(4,7)
01489 MulStoreDigit(11)
01490 MulShiftCarry
01491
01492 MulAccumulate(7,5)
01493 MulAccumulate(6,6)
01494 MulAccumulate(5,7)
01495 MulStoreDigit(12)
01496 MulShiftCarry
01497
01498 MulAccumulate(7,6)
01499 MulAccumulate(6,7)
01500 MulStoreDigit(13)
01501 MulShiftCarry
01502
01503 MulLastDiagonal(8)
01504 MulEpilogue
01505 }
01506
01507 CRYPTOPP_NAKED
void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01508 {
01509 MulPrologue
01510
01511 MulStartup
01512 MulAccumulate(0,0)
01513 MulStoreDigit(0)
01514 MulShiftCarry
01515
01516 MulAccumulate(1,0)
01517 MulAccumulate(0,1)
01518 MulStoreDigit(1)
01519 MulShiftCarry
01520
01521 MulAccumulate(2,0)
01522 MulAccumulate(1,1)
01523 MulAccumulate(0,2)
01524 MulStoreDigit(2)
01525 MulShiftCarry
01526
01527 MulAccumulate(3,0)
01528 MulAccumulate(2,1)
01529 MulAccumulate(1,2)
01530 MulAccumulate(0,3)
01531 MulStoreDigit(3)
01532 MulShiftCarry
01533
01534 MulAccumulate(4,0)
01535 MulAccumulate(3,1)
01536 MulAccumulate(2,2)
01537 MulAccumulate(1,3)
01538 MulAccumulate(0,4)
01539 MulStoreDigit(4)
01540 MulShiftCarry
01541
01542 MulAccumulate(5,0)
01543 MulAccumulate(4,1)
01544 MulAccumulate(3,2)
01545 MulAccumulate(2,3)
01546 MulAccumulate(1,4)
01547 MulAccumulate(0,5)
01548 MulStoreDigit(5)
01549 MulShiftCarry
01550
01551 MulAccumulate(6,0)
01552 MulAccumulate(5,1)
01553 MulAccumulate(4,2)
01554 MulAccumulate(3,3)
01555 MulAccumulate(2,4)
01556 MulAccumulate(1,5)
01557 MulAccumulate(0,6)
01558 MulStoreDigit(6)
01559 MulShiftCarry
01560
01561 MulAccumulateBottom(7,0)
01562 MulAccumulateBottom(6,1)
01563 MulAccumulateBottom(5,2)
01564 MulAccumulateBottom(4,3)
01565 MulAccumulateBottom(3,4)
01566 MulAccumulateBottom(2,5)
01567 MulAccumulateBottom(1,6)
01568 MulAccumulateBottom(0,7)
01569 MulStoreDigit(7)
01570 MulEpilogue
01571 }
01572
01573 #undef AS1
01574 #undef AS2
01575
01576 #else
01577
01578 typedef Portable LowLevel;
01579
01580 #endif
01581
01582 #ifdef SSE2_INTRINSICS_AVAILABLE
01583
01584 #ifdef __GNUC__
01585 #define CRYPTOPP_FASTCALL
01586 #else
01587 #define CRYPTOPP_FASTCALL __fastcall
01588 #endif
01589
01590 static
void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01591 {
01592 __m128i a3210 = _mm_load_si128(A);
01593 __m128i b3210 = _mm_load_si128(B);
01594
01595 __m128i sum;
01596
01597 __m128i z = _mm_setzero_si128();
01598 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01599 C[0] = a2b2_a0b0;
01600
01601 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01602 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01603 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01604 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01605 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01606 C[1] = _mm_add_epi64(a1b0, a0b1);
01607
01608 __m128i a31 = _mm_srli_epi64(a3210, 32);
01609 __m128i b31 = _mm_srli_epi64(b3210, 32);
01610 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01611 C[6] = a3b3_a1b1;
01612
01613 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01614 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01615 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01616 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01617 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01618 sum = _mm_add_epi64(a1b1, a0b2);
01619 C[2] = _mm_add_epi64(sum, a2b0);
01620
01621 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01622 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01623 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01624 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01625 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01626 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01627 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01628 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01629 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01630 sum = _mm_add_epi64(a2b1, a0b3);
01631 C[3] = _mm_add_epi64(sum, sum1);
01632
01633 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01634 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01635 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01636 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01637 sum = _mm_add_epi64(a2b2, a3b1);
01638 C[4] = _mm_add_epi64(sum, a1b3);
01639
01640 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01641 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01642 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01643 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01644 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01645 C[5] = _mm_add_epi64(a3b2, a2b3);
01646 }
01647
01648
void P4Optimized::Multiply4(word *C,
const word *A,
const word *B)
01649 {
01650 __m128i temp[7];
01651
const word *w = (word *)temp;
01652
const __m64 *mw = (__m64 *)w;
01653
01654 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01655
01656 C[0] = w[0];
01657
01658 __m64 s1, s2;
01659
01660 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01661 __m64 w4 = mw[2];
01662 __m64 w6 = mw[3];
01663 __m64 w8 = mw[4];
01664 __m64 w10 = mw[5];
01665 __m64 w12 = mw[6];
01666 __m64 w14 = mw[7];
01667 __m64 w16 = mw[8];
01668 __m64 w18 = mw[9];
01669 __m64 w20 = mw[10];
01670 __m64 w22 = mw[11];
01671 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01672
01673 s1 = _mm_add_si64(w1, w4);
01674 C[1] = _mm_cvtsi64_si32(s1);
01675 s1 = _mm_srli_si64(s1, 32);
01676
01677 s2 = _mm_add_si64(w6, w8);
01678 s1 = _mm_add_si64(s1, s2);
01679 C[2] = _mm_cvtsi64_si32(s1);
01680 s1 = _mm_srli_si64(s1, 32);
01681
01682 s2 = _mm_add_si64(w10, w12);
01683 s1 = _mm_add_si64(s1, s2);
01684 C[3] = _mm_cvtsi64_si32(s1);
01685 s1 = _mm_srli_si64(s1, 32);
01686
01687 s2 = _mm_add_si64(w14, w16);
01688 s1 = _mm_add_si64(s1, s2);
01689 C[4] = _mm_cvtsi64_si32(s1);
01690 s1 = _mm_srli_si64(s1, 32);
01691
01692 s2 = _mm_add_si64(w18, w20);
01693 s1 = _mm_add_si64(s1, s2);
01694 C[5] = _mm_cvtsi64_si32(s1);
01695 s1 = _mm_srli_si64(s1, 32);
01696
01697 s2 = _mm_add_si64(w22, w26);
01698 s1 = _mm_add_si64(s1, s2);
01699 C[6] = _mm_cvtsi64_si32(s1);
01700 s1 = _mm_srli_si64(s1, 32);
01701
01702 C[7] = _mm_cvtsi64_si32(s1) + w[27];
01703 _mm_empty();
01704 }
01705
01706
void P4Optimized::Multiply8(word *C,
const word *A,
const word *B)
01707 {
01708 __m128i temp[28];
01709
const word *w = (word *)temp;
01710
const __m64 *mw = (__m64 *)w;
01711
const word *x = (word *)temp+7*4;
01712
const __m64 *mx = (__m64 *)x;
01713
const word *y = (word *)temp+7*4*2;
01714
const __m64 *my = (__m64 *)y;
01715
const word *z = (word *)temp+7*4*3;
01716
const __m64 *mz = (__m64 *)z;
01717
01718 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01719
01720 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01721
01722 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01723
01724 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01725
01726 C[0] = w[0];
01727
01728 __m64 s1, s2, s3, s4;
01729
01730 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01731 __m64 w4 = mw[2];
01732 __m64 w6 = mw[3];
01733 __m64 w8 = mw[4];
01734 __m64 w10 = mw[5];
01735 __m64 w12 = mw[6];
01736 __m64 w14 = mw[7];
01737 __m64 w16 = mw[8];
01738 __m64 w18 = mw[9];
01739 __m64 w20 = mw[10];
01740 __m64 w22 = mw[11];
01741 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01742 __m64 w27 = _mm_cvtsi32_si64(w[27]);
01743
01744 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01745 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01746 __m64 x4 = mx[2];
01747 __m64 x6 = mx[3];
01748 __m64 x8 = mx[4];
01749 __m64 x10 = mx[5];
01750 __m64 x12 = mx[6];
01751 __m64 x14 = mx[7];
01752 __m64 x16 = mx[8];
01753 __m64 x18 = mx[9];
01754 __m64 x20 = mx[10];
01755 __m64 x22 = mx[11];
01756 __m64 x26 = _mm_cvtsi32_si64(x[26]);
01757 __m64 x27 = _mm_cvtsi32_si64(x[27]);
01758
01759 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01760 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01761 __m64 y4 = my[2];
01762 __m64 y6 = my[3];
01763 __m64 y8 = my[4];
01764 __m64 y10 = my[5];
01765 __m64 y12 = my[6];
01766 __m64 y14 = my[7];
01767 __m64 y16 = my[8];
01768 __m64 y18 = my[9];
01769 __m64 y20 = my[10];
01770 __m64 y22 = my[11];
01771 __m64 y26 = _mm_cvtsi32_si64(y[26]);
01772 __m64 y27 = _mm_cvtsi32_si64(y[27]);
01773
01774 __m64 z0 = _mm_cvtsi32_si64(z[0]);
01775 __m64 z1 = _mm_cvtsi32_si64(z[1]);
01776 __m64 z4 = mz[2];
01777 __m64 z6 = mz[3];
01778 __m64 z8 = mz[4];
01779 __m64 z10 = mz[5];
01780 __m64 z12 = mz[6];
01781 __m64 z14 = mz[7];
01782 __m64 z16 = mz[8];
01783 __m64 z18 = mz[9];
01784 __m64 z20 = mz[10];
01785 __m64 z22 = mz[11];
01786 __m64 z26 = _mm_cvtsi32_si64(z[26]);
01787
01788 s1 = _mm_add_si64(w1, w4);
01789 C[1] = _mm_cvtsi64_si32(s1);
01790 s1 = _mm_srli_si64(s1, 32);
01791
01792 s2 = _mm_add_si64(w6, w8);
01793 s1 = _mm_add_si64(s1, s2);
01794 C[2] = _mm_cvtsi64_si32(s1);
01795 s1 = _mm_srli_si64(s1, 32);
01796
01797 s2 = _mm_add_si64(w10, w12);
01798 s1 = _mm_add_si64(s1, s2);
01799 C[3] = _mm_cvtsi64_si32(s1);
01800 s1 = _mm_srli_si64(s1, 32);
01801
01802 s3 = _mm_add_si64(x0, y0);
01803 s2 = _mm_add_si64(w14, w16);
01804 s1 = _mm_add_si64(s1, s3);
01805 s1 = _mm_add_si64(s1, s2);
01806 C[4] = _mm_cvtsi64_si32(s1);
01807 s1 = _mm_srli_si64(s1, 32);
01808
01809 s3 = _mm_add_si64(x1, y1);
01810 s4 = _mm_add_si64(x4, y4);
01811 s1 = _mm_add_si64(s1, w18);
01812 s3 = _mm_add_si64(s3, s4);
01813 s1 = _mm_add_si64(s1, w20);
01814 s1 = _mm_add_si64(s1, s3);
01815 C[5] = _mm_cvtsi64_si32(s1);
01816 s1 = _mm_srli_si64(s1, 32);
01817
01818 s3 = _mm_add_si64(x6, y6);
01819 s4 = _mm_add_si64(x8, y8);
01820 s1 = _mm_add_si64(s1, w22);
01821 s3 = _mm_add_si64(s3, s4);
01822 s1 = _mm_add_si64(s1, w26);
01823 s1 = _mm_add_si64(s1, s3);
01824 C[6] = _mm_cvtsi64_si32(s1);
01825 s1 = _mm_srli_si64(s1, 32);
01826
01827 s3 = _mm_add_si64(x10, y10);
01828 s4 = _mm_add_si64(x12, y12);
01829 s1 = _mm_add_si64(s1, w27);
01830 s3 = _mm_add_si64(s3, s4);
01831 s1 = _mm_add_si64(s1, s3);
01832 C[7] = _mm_cvtsi64_si32(s1);
01833 s1 = _mm_srli_si64(s1, 32);
01834
01835 s3 = _mm_add_si64(x14, y14);
01836 s4 = _mm_add_si64(x16, y16);
01837 s1 = _mm_add_si64(s1, z0);
01838 s3 = _mm_add_si64(s3, s4);
01839 s1 = _mm_add_si64(s1, s3);
01840 C[8] = _mm_cvtsi64_si32(s1);
01841 s1 = _mm_srli_si64(s1, 32);
01842
01843 s3 = _mm_add_si64(x18, y18);
01844 s4 = _mm_add_si64(x20, y20);
01845 s1 = _mm_add_si64(s1, z1);
01846 s3 = _mm_add_si64(s3, s4);
01847 s1 = _mm_add_si64(s1, z4);
01848 s1 = _mm_add_si64(s1, s3);
01849 C[9] = _mm_cvtsi64_si32(s1);
01850 s1 = _mm_srli_si64(s1, 32);
01851
01852 s3 = _mm_add_si64(x22, y22);
01853 s4 = _mm_add_si64(x26, y26);
01854 s1 = _mm_add_si64(s1, z6);
01855 s3 = _mm_add_si64(s3, s4);
01856 s1 = _mm_add_si64(s1, z8);
01857 s1 = _mm_add_si64(s1, s3);
01858 C[10] = _mm_cvtsi64_si32(s1);
01859 s1 = _mm_srli_si64(s1, 32);
01860
01861 s3 = _mm_add_si64(x27, y27);
01862 s1 = _mm_add_si64(s1, z10);
01863 s1 = _mm_add_si64(s1, z12);
01864 s1 = _mm_add_si64(s1, s3);
01865 C[11] = _mm_cvtsi64_si32(s1);
01866 s1 = _mm_srli_si64(s1, 32);
01867
01868 s3 = _mm_add_si64(z14, z16);
01869 s1 = _mm_add_si64(s1, s3);
01870 C[12] = _mm_cvtsi64_si32(s1);
01871 s1 = _mm_srli_si64(s1, 32);
01872
01873 s3 = _mm_add_si64(z18, z20);
01874 s1 = _mm_add_si64(s1, s3);
01875 C[13] = _mm_cvtsi64_si32(s1);
01876 s1 = _mm_srli_si64(s1, 32);
01877
01878 s3 = _mm_add_si64(z22, z26);
01879 s1 = _mm_add_si64(s1, s3);
01880 C[14] = _mm_cvtsi64_si32(s1);
01881 s1 = _mm_srli_si64(s1, 32);
01882
01883 C[15] = z[27] + _mm_cvtsi64_si32(s1);
01884 _mm_empty();
01885 }
01886
01887
void P4Optimized::Multiply8Bottom(word *C,
const word *A,
const word *B)
01888 {
01889 __m128i temp[21];
01890
const word *w = (word *)temp;
01891
const __m64 *mw = (__m64 *)w;
01892
const word *x = (word *)temp+7*4;
01893
const __m64 *mx = (__m64 *)x;
01894
const word *y = (word *)temp+7*4*2;
01895
const __m64 *my = (__m64 *)y;
01896
01897 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01898
01899 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01900
01901 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01902
01903 C[0] = w[0];
01904
01905 __m64 s1, s2, s3, s4;
01906
01907 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01908 __m64 w4 = mw[2];
01909 __m64 w6 = mw[3];
01910 __m64 w8 = mw[4];
01911 __m64 w10 = mw[5];
01912 __m64 w12 = mw[6];
01913 __m64 w14 = mw[7];
01914 __m64 w16 = mw[8];
01915 __m64 w18 = mw[9];
01916 __m64 w20 = mw[10];
01917 __m64 w22 = mw[11];
01918 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01919
01920 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01921 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01922 __m64 x4 = mx[2];
01923 __m64 x6 = mx[3];
01924 __m64 x8 = mx[4];
01925
01926 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01927 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01928 __m64 y4 = my[2];
01929 __m64 y6 = my[3];
01930 __m64 y8 = my[4];
01931
01932 s1 = _mm_add_si64(w1, w4);
01933 C[1] = _mm_cvtsi64_si32(s1);
01934 s1 = _mm_srli_si64(s1, 32);
01935
01936 s2 = _mm_add_si64(w6, w8);
01937 s1 = _mm_add_si64(s1, s2);
01938 C[2] = _mm_cvtsi64_si32(s1);
01939 s1 = _mm_srli_si64(s1, 32);
01940
01941 s2 = _mm_add_si64(w10, w12);
01942 s1 = _mm_add_si64(s1, s2);
01943 C[3] = _mm_cvtsi64_si32(s1);
01944 s1 = _mm_srli_si64(s1, 32);
01945
01946 s3 = _mm_add_si64(x0, y0);
01947 s2 = _mm_add_si64(w14, w16);
01948 s1 = _mm_add_si64(s1, s3);
01949 s1 = _mm_add_si64(s1, s2);
01950 C[4] = _mm_cvtsi64_si32(s1);
01951 s1 = _mm_srli_si64(s1, 32);
01952
01953 s3 = _mm_add_si64(x1, y1);
01954 s4 = _mm_add_si64(x4, y4);
01955 s1 = _mm_add_si64(s1, w18);
01956 s3 = _mm_add_si64(s3, s4);
01957 s1 = _mm_add_si64(s1, w20);
01958 s1 = _mm_add_si64(s1, s3);
01959 C[5] = _mm_cvtsi64_si32(s1);
01960 s1 = _mm_srli_si64(s1, 32);
01961
01962 s3 = _mm_add_si64(x6, y6);
01963 s4 = _mm_add_si64(x8, y8);
01964 s1 = _mm_add_si64(s1, w22);
01965 s3 = _mm_add_si64(s3, s4);
01966 s1 = _mm_add_si64(s1, w26);
01967 s1 = _mm_add_si64(s1, s3);
01968 C[6] = _mm_cvtsi64_si32(s1);
01969 s1 = _mm_srli_si64(s1, 32);
01970
01971 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01972 _mm_empty();
01973 }
01974
01975
#endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01976
01977
01978
01979
#define A0 A
01980
#define A1 (A+N2)
01981
#define B0 B
01982
#define B1 (B+N2)
01983
01984
#define T0 T
01985
#define T1 (T+N2)
01986
#define T2 (T+N)
01987
#define T3 (T+N+N2)
01988
01989
#define R0 R
01990
#define R1 (R+N2)
01991
#define R2 (R+N)
01992
#define R3 (R+N+N2)
01993
01994
01995
01996
01997
01998
01999
void RecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02000 {
02001 assert(N>=2 && N%2==0);
02002
02003
if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02004 LowLevel::Multiply8(R, A, B);
02005
else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02006 LowLevel::Multiply4(R, A, B);
02007
else if (N==2)
02008 LowLevel::Multiply2(R, A, B);
02009
else
02010 {
02011
const unsigned int N2 = N/2;
02012
int carry;
02013
02014
int aComp = Compare(A0, A1, N2);
02015
int bComp = Compare(B0, B1, N2);
02016
02017
switch (2*aComp + aComp + bComp)
02018 {
02019
case -4:
02020 LowLevel::Subtract(R0, A1, A0, N2);
02021 LowLevel::Subtract(R1, B0, B1, N2);
02022 RecursiveMultiply(T0, T2, R0, R1, N2);
02023 LowLevel::Subtract(T1, T1, R0, N2);
02024 carry = -1;
02025
break;
02026
case -2:
02027 LowLevel::Subtract(R0, A1, A0, N2);
02028 LowLevel::Subtract(R1, B0, B1, N2);
02029 RecursiveMultiply(T0, T2, R0, R1, N2);
02030 carry = 0;
02031
break;
02032
case 2:
02033 LowLevel::Subtract(R0, A0, A1, N2);
02034 LowLevel::Subtract(R1, B1, B0, N2);
02035 RecursiveMultiply(T0, T2, R0, R1, N2);
02036 carry = 0;
02037
break;
02038
case 4:
02039 LowLevel::Subtract(R0, A1, A0, N2);
02040 LowLevel::Subtract(R1, B0, B1, N2);
02041 RecursiveMultiply(T0, T2, R0, R1, N2);
02042 LowLevel::Subtract(T1, T1, R1, N2);
02043 carry = -1;
02044
break;
02045
default:
02046 SetWords(T0, 0, N);
02047 carry = 0;
02048 }
02049
02050 RecursiveMultiply(R0, T2, A0, B0, N2);
02051 RecursiveMultiply(R2, T2, A1, B1, N2);
02052
02053
02054
02055 carry += LowLevel::Add(T0, T0, R0, N);
02056 carry += LowLevel::Add(T0, T0, R2, N);
02057 carry += LowLevel::Add(R1, R1, T0, N);
02058
02059 assert (carry >= 0 && carry <= 2);
02060 Increment(R3, N2, carry);
02061 }
02062 }
02063
02064
02065
02066
02067
02068
void RecursiveSquare(word *R, word *T,
const word *A,
unsigned int N)
02069 {
02070 assert(N && N%2==0);
02071
if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02072 LowLevel::Square8(R, A);
02073
if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02074 LowLevel::Square4(R, A);
02075
else if (N==2)
02076 LowLevel::Square2(R, A);
02077
else
02078 {
02079
const unsigned int N2 = N/2;
02080
02081 RecursiveSquare(R0, T2, A0, N2);
02082 RecursiveSquare(R2, T2, A1, N2);
02083 RecursiveMultiply(T0, T2, A0, A1, N2);
02084
02085 word carry = LowLevel::Add(R1, R1, T0, N);
02086 carry += LowLevel::Add(R1, R1, T0, N);
02087 Increment(R3, N2, carry);
02088 }
02089 }
02090
02091
02092
02093
02094
02095
02096
void RecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02097 {
02098 assert(N>=2 && N%2==0);
02099
if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02100 LowLevel::Multiply8Bottom(R, A, B);
02101
else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02102 LowLevel::Multiply4Bottom(R, A, B);
02103
else if (N==2)
02104 LowLevel::Multiply2Bottom(R, A, B);
02105
else
02106 {
02107
const unsigned int N2 = N/2;
02108
02109 RecursiveMultiply(R, T, A0, B0, N2);
02110 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02111 LowLevel::Add(R1, R1, T0, N2);
02112 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02113 LowLevel::Add(R1, R1, T0, N2);
02114 }
02115 }
02116
02117
02118
02119
02120
02121
02122
02123
void RecursiveMultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
unsigned int N)
02124 {
02125 assert(N>=2 && N%2==0);
02126
02127
if (N==4)
02128 {
02129 LowLevel::Multiply4(T, A, B);
02130 memcpy(R, T+4, 4*WORD_SIZE);
02131 }
02132
else if (N==2)
02133 {
02134 LowLevel::Multiply2(T, A, B);
02135 memcpy(R, T+2, 2*WORD_SIZE);
02136 }
02137
else
02138 {
02139
const unsigned int N2 = N/2;
02140
int carry;
02141
02142
int aComp = Compare(A0, A1, N2);
02143
int bComp = Compare(B0, B1, N2);
02144
02145
switch (2*aComp + aComp + bComp)
02146 {
02147
case -4:
02148 LowLevel::Subtract(R0, A1, A0, N2);
02149 LowLevel::Subtract(R1, B0, B1, N2);
02150 RecursiveMultiply(T0, T2, R0, R1, N2);
02151 LowLevel::Subtract(T1, T1, R0, N2);
02152 carry = -1;
02153
break;
02154
case -2:
02155 LowLevel::Subtract(R0, A1, A0, N2);
02156 LowLevel::Subtract(R1, B0, B1, N2);
02157 RecursiveMultiply(T0, T2, R0, R1, N2);
02158 carry = 0;
02159
break;
02160
case 2:
02161 LowLevel::Subtract(R0, A0, A1, N2);
02162 LowLevel::Subtract(R1, B1, B0, N2);
02163 RecursiveMultiply(T0, T2, R0, R1, N2);
02164 carry = 0;
02165
break;
02166
case 4:
02167 LowLevel::Subtract(R0, A1, A0, N2);
02168 LowLevel::Subtract(R1, B0, B1, N2);
02169 RecursiveMultiply(T0, T2, R0, R1, N2);
02170 LowLevel::Subtract(T1, T1, R1, N2);
02171 carry = -1;
02172
break;
02173
default:
02174 SetWords(T0, 0, N);
02175 carry = 0;
02176 }
02177
02178 RecursiveMultiply(T2, R0, A1, B1, N2);
02179
02180
02181
02182 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02183 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02184 word t = (Compare(R0, T2, N2) == -1);
02185
02186 carry += t;
02187 carry += Increment(R0, N2, c2+t);
02188 carry += LowLevel::Add(R0, R0, T1, N2);
02189 carry += LowLevel::Add(R0, R0, T3, N2);
02190 assert (carry >= 0 && carry <= 2);
02191
02192 CopyWords(R1, T3, N2);
02193 Increment(R1, N2, carry);
02194 }
02195 }
02196
02197
inline word Add(word *C,
const word *A,
const word *B,
unsigned int N)
02198 {
02199
return LowLevel::Add(C, A, B, N);
02200 }
02201
02202
inline word Subtract(word *C,
const word *A,
const word *B,
unsigned int N)
02203 {
02204
return LowLevel::Subtract(C, A, B, N);
02205 }
02206
02207
inline void Multiply(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02208 {
02209 RecursiveMultiply(R, T, A, B, N);
02210 }
02211
02212
inline void Square(word *R, word *T,
const word *A,
unsigned int N)
02213 {
02214 RecursiveSquare(R, T, A, N);
02215 }
02216
02217
inline void MultiplyBottom(word *R, word *T,
const word *A,
const word *B,
unsigned int N)
02218 {
02219 RecursiveMultiplyBottom(R, T, A, B, N);
02220 }
02221
02222
inline void MultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
unsigned int N)
02223 {
02224 RecursiveMultiplyTop(R, T, L, A, B, N);
02225 }
02226
02227
static word LinearMultiply(word *C,
const word *A, word B,
unsigned int N)
02228 {
02229 word carry=0;
02230
for(
unsigned i=0; i<N; i++)
02231 {
02232 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02233 C[i] = p.GetLowHalf();
02234 carry = p.GetHighHalf();
02235 }
02236
return carry;
02237 }
02238
02239
02240
02241
02242
02243
02244
void AsymmetricMultiply(word *R, word *T,
const word *A,
unsigned int NA,
const word *B,
unsigned int NB)
02245 {
02246
if (NA == NB)
02247 {
02248
if (A == B)
02249
Square(R, T, A, NA);
02250
else
02251 Multiply(R, T, A, B, NA);
02252
02253
return;
02254 }
02255
02256
if (NA > NB)
02257 {
02258 std::swap(A, B);
02259 std::swap(NA, NB);
02260 }
02261
02262 assert(NB % NA == 0);
02263 assert((NB/NA)%2 == 0);
02264
02265
if (NA==2 && !A[1])
02266 {
02267
switch (A[0])
02268 {
02269
case 0:
02270 SetWords(R, 0, NB+2);
02271
return;
02272
case 1:
02273 CopyWords(R, B, NB);
02274 R[NB] = R[NB+1] = 0;
02275
return;
02276
default:
02277 R[NB] = LinearMultiply(R, B, A[0], NB);
02278 R[NB+1] = 0;
02279
return;
02280 }
02281 }
02282
02283 Multiply(R, T, A, B, NA);
02284 CopyWords(T+2*NA, R+NA, NA);
02285
02286
unsigned i;
02287
02288
for (i=2*NA; i<NB; i+=2*NA)
02289 Multiply(T+NA+i, T, A, B+i, NA);
02290
for (i=NA; i<NB; i+=2*NA)
02291 Multiply(R+i, T, A, B+i, NA);
02292
02293
if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02294 Increment(R+NB, NA);
02295 }
02296
02297
02298
02299
02300
02301
void RecursiveInverseModPower2(word *R, word *T,
const word *A,
unsigned int N)
02302 {
02303
if (N==2)
02304 {
02305 T[0] = AtomicInverseModPower2(A[0]);
02306 T[1] = 0;
02307 LowLevel::Multiply2Bottom(T+2, T, A);
02308 TwosComplement(T+2, 2);
02309 Increment(T+2, 2, 2);
02310 LowLevel::Multiply2Bottom(R, T, T+2);
02311 }
02312
else
02313 {
02314
const unsigned int N2 = N/2;
02315 RecursiveInverseModPower2(R0, T0, A0, N2);
02316 T0[0] = 1;
02317 SetWords(T0+1, 0, N2-1);
02318 MultiplyTop(R1, T1, T0, R0, A0, N2);
02319 MultiplyBottom(T0, T1, R0, A1, N2);
02320 Add(T0, R1, T0, N2);
02321 TwosComplement(T0, N2);
02322 MultiplyBottom(R1, T1, R0, T0, N2);
02323 }
02324 }
02325
02326
02327
02328
02329
02330
02331
02332
void MontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
unsigned int N)
02333 {
02334 MultiplyBottom(R, T, X, U, N);
02335 MultiplyTop(T, T+N, X, R, M, N);
02336 word borrow = Subtract(T, X+N, T, N);
02337
02338 word carry = Add(T+N, T, M, N);
02339 assert(carry || !borrow);
02340 CopyWords(R, T + (borrow ? N : 0), N);
02341 }
02342
02343
02344
02345
02346
02347
02348
02349
02350
void HalfMontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
const word *V,
unsigned int N)
02351 {
02352 assert(N%2==0 && N>=4);
02353
02354
#define M0 M
02355
#define M1 (M+N2)
02356
#define V0 V
02357
#define V1 (V+N2)
02358
02359
#define X0 X
02360
#define X1 (X+N2)
02361
#define X2 (X+N)
02362
#define X3 (X+N+N2)
02363
02364
const unsigned int N2 = N/2;
02365 Multiply(T0, T2, V0, X3, N2);
02366
int c2 = Add(T0, T0, X0, N);
02367 MultiplyBottom(T3, T2, T0, U, N2);
02368 MultiplyTop(T2, R, T0, T3, M0, N2);
02369 c2 -= Subtract(T2, T1, T2, N2);
02370 Multiply(T0, R, T3, M1, N2);
02371 c2 -= Subtract(T0, T2, T0, N2);
02372
int c3 = -(
int)Subtract(T1, X2, T1, N2);
02373 Multiply(R0, T2, V1, X3, N2);
02374 c3 += Add(R, R, T, N);
02375
02376
if (c2>0)
02377 c3 += Increment(R1, N2);
02378
else if (c2<0)
02379 c3 -= Decrement(R1, N2, -c2);
02380
02381 assert(c3>=-1 && c3<=1);
02382
if (c3>0)
02383 Subtract(R, R, M, N);
02384
else if (c3<0)
02385 Add(R, R, M, N);
02386
02387
#undef M0
02388
#undef M1
02389
#undef V0
02390
#undef V1
02391
02392
#undef X0
02393
#undef X1
02394
#undef X2
02395
#undef X3
02396
}
02397
02398
#undef A0
02399
#undef A1
02400
#undef B0
02401
#undef B1
02402
02403
#undef T0
02404
#undef T1
02405
#undef T2
02406
#undef T3
02407
02408
#undef R0
02409
#undef R1
02410
#undef R2
02411
#undef R3
02412
02413
02414
02415
02416
02417
02418
02419
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
static inline void AtomicDivide(word *Q,
const word *A,
const word *B)
02478 {
02479 word T[4];
02480 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02481 Q[0] = q.GetLowHalf();
02482 Q[1] = q.GetHighHalf();
02483
02484
#ifndef NDEBUG
02485
if (B[0] || B[1])
02486 {
02487
02488 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02489 word P[4];
02490 Portable::Multiply2(P, Q, B);
02491 Add(P, P, T, 4);
02492 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02493 }
02494
#endif
02495
}
02496
02497
02498
static void CorrectQuotientEstimate(word *R, word *T, word *Q,
const word *B,
unsigned int N)
02499 {
02500 assert(N && N%2==0);
02501
02502
if (Q[1])
02503 {
02504 T[N] = T[N+1] = 0;
02505
unsigned i;
02506
for (i=0; i<N; i+=4)
02507 LowLevel::Multiply2(T+i, Q, B+i);
02508
for (i=2; i<N; i+=4)
02509
if (LowLevel::Multiply2Add(T+i, Q, B+i))
02510 T[i+5] += (++T[i+4]==0);
02511 }
02512
else
02513 {
02514 T[N] = LinearMultiply(T, B, Q[0], N);
02515 T[N+1] = 0;
02516 }
02517
02518 word borrow = Subtract(R, R, T, N+2);
02519 assert(!borrow && !R[N+1]);
02520
02521
while (R[N] || Compare(R, B, N) >= 0)
02522 {
02523 R[N] -= Subtract(R, R, B, N);
02524 Q[1] += (++Q[0]==0);
02525 assert(Q[0] || Q[1]);
02526 }
02527 }
02528
02529
02530
02531
02532
02533
02534
02535
void Divide(word *R, word *Q, word *T,
const word *A,
unsigned int NA,
const word *B,
unsigned int NB)
02536 {
02537 assert(NA && NB && NA%2==0 && NB%2==0);
02538 assert(B[NB-1] || B[NB-2]);
02539 assert(NB <= NA);
02540
02541
02542 word *
const TA=T;
02543 word *
const TB=T+NA+2;
02544 word *
const TP=T+NA+2+NB;
02545
02546
02547
unsigned shiftWords = (B[NB-1]==0);
02548 TB[0] = TB[NB-1] = 0;
02549 CopyWords(TB+shiftWords, B, NB-shiftWords);
02550
unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02551 assert(shiftBits < WORD_BITS);
02552 ShiftWordsLeftByBits(TB, NB, shiftBits);
02553
02554
02555 TA[0] = TA[NA] = TA[NA+1] = 0;
02556 CopyWords(TA+shiftWords, A, NA);
02557 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02558
02559
if (TA[NA+1]==0 && TA[NA] <= 1)
02560 {
02561 Q[NA-NB+1] = Q[NA-NB] = 0;
02562
while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02563 {
02564 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02565 ++Q[NA-NB];
02566 }
02567 }
02568
else
02569 {
02570 NA+=2;
02571 assert(Compare(TA+NA-NB, TB, NB) < 0);
02572 }
02573
02574 word BT[2];
02575 BT[0] = TB[NB-2] + 1;
02576 BT[1] = TB[NB-1] + (BT[0]==0);
02577
02578
02579
for (
unsigned i=NA-2; i>=NB; i-=2)
02580 {
02581 AtomicDivide(Q+i-NB, TA+i-2, BT);
02582 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02583 }
02584
02585
02586 CopyWords(R, TA+shiftWords, NB);
02587 ShiftWordsRightByBits(R, NB, shiftBits);
02588 }
02589
02590
static inline unsigned int EvenWordCount(
const word *X,
unsigned int N)
02591 {
02592
while (N && X[N-2]==0 && X[N-1]==0)
02593 N-=2;
02594
return N;
02595 }
02596
02597
02598
02599
02600
02601
02602
02603
unsigned int AlmostInverse(word *R, word *T,
const word *A,
unsigned int NA,
const word *M,
unsigned int N)
02604 {
02605 assert(NA<=N && N && N%2==0);
02606
02607 word *b = T;
02608 word *c = T+N;
02609 word *f = T+2*N;
02610 word *g = T+3*N;
02611
unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02612
unsigned int k=0, s=0;
02613
02614 SetWords(T, 0, 3*N);
02615 b[0]=1;
02616 CopyWords(f, A, NA);
02617 CopyWords(g, M, N);
02618
02619
while (1)
02620 {
02621 word t=f[0];
02622
while (!t)
02623 {
02624
if (EvenWordCount(f, fgLen)==0)
02625 {
02626 SetWords(R, 0, N);
02627
return 0;
02628 }
02629
02630 ShiftWordsRightByWords(f, fgLen, 1);
02631
if (c[bcLen-1]) bcLen+=2;
02632 assert(bcLen <= N);
02633 ShiftWordsLeftByWords(c, bcLen, 1);
02634 k+=WORD_BITS;
02635 t=f[0];
02636 }
02637
02638
unsigned int i=0;
02639
while (t%2 == 0)
02640 {
02641 t>>=1;
02642 i++;
02643 }
02644 k+=i;
02645
02646
if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02647 {
02648
if (s%2==0)
02649 CopyWords(R, b, N);
02650
else
02651 Subtract(R, M, b, N);
02652
return k;
02653 }
02654
02655 ShiftWordsRightByBits(f, fgLen, i);
02656 t=ShiftWordsLeftByBits(c, bcLen, i);
02657
if (t)
02658 {
02659 c[bcLen] = t;
02660 bcLen+=2;
02661 assert(bcLen <= N);
02662 }
02663
02664
if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02665 fgLen-=2;
02666
02667
if (Compare(f, g, fgLen)==-1)
02668 {
02669 std::swap(f, g);
02670 std::swap(b, c);
02671 s++;
02672 }
02673
02674 Subtract(f, f, g, fgLen);
02675
02676
if (Add(b, b, c, bcLen))
02677 {
02678 b[bcLen] = 1;
02679 bcLen+=2;
02680 assert(bcLen <= N);
02681 }
02682 }
02683 }
02684
02685
02686
02687
02688
02689
void DivideByPower2Mod(word *R,
const word *A,
unsigned int k,
const word *M,
unsigned int N)
02690 {
02691 CopyWords(R, A, N);
02692
02693
while (k--)
02694 {
02695
if (R[0]%2==0)
02696 ShiftWordsRightByBits(R, N, 1);
02697
else
02698 {
02699 word carry = Add(R, R, M, N);
02700 ShiftWordsRightByBits(R, N, 1);
02701 R[N-1] += carry<<(WORD_BITS-1);
02702 }
02703 }
02704 }
02705
02706
02707
02708
02709
02710
void MultiplyByPower2Mod(word *R,
const word *A,
unsigned int k,
const word *M,
unsigned int N)
02711 {
02712 CopyWords(R, A, N);
02713
02714
while (k--)
02715
if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02716 Subtract(R, R, M, N);
02717 }
02718
02719
02720
02721
static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02722
02723
static inline unsigned int RoundupSize(
unsigned int n)
02724 {
02725
if (n<=8)
02726
return RoundupSizeTable[n];
02727
else if (n<=16)
02728
return 16;
02729
else if (n<=32)
02730
return 32;
02731
else if (n<=64)
02732
return 64;
02733
else return 1U << BitPrecision(n-1);
02734 }
02735
02736 Integer::Integer()
02737 : reg(2), sign(POSITIVE)
02738 {
02739 reg[0] = reg[1] = 0;
02740 }
02741
02742 Integer::Integer(
const Integer& t)
02743 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02744 {
02745 CopyWords(reg, t.reg, reg.
size());
02746 }
02747
02748 Integer::Integer(Sign s, lword value)
02749 : reg(2), sign(s)
02750 {
02751 reg[0] = word(value);
02752 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02753 }
02754
02755 Integer::Integer(
signed long value)
02756 : reg(2)
02757 {
02758
if (value >= 0)
02759 sign = POSITIVE;
02760
else
02761 {
02762 sign = NEGATIVE;
02763 value = -value;
02764 }
02765 reg[0] = word(value);
02766 reg[1] = word(SafeRightShift<WORD_BITS>((
unsigned long)value));
02767 }
02768
02769 Integer::Integer(Sign s, word high, word low)
02770 : reg(2), sign(s)
02771 {
02772 reg[0] = low;
02773 reg[1] = high;
02774 }
02775
02776 bool Integer::IsConvertableToLong()
const
02777
{
02778
if (
ByteCount() >
sizeof(
long))
02779
return false;
02780
02781
unsigned long value = reg[0];
02782 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02783
02784
if (sign==POSITIVE)
02785
return (
signed long)value >= 0;
02786
else
02787
return -(
signed long)value < 0;
02788 }
02789
02790 signed long Integer::ConvertToLong()
const
02791
{
02792 assert(
IsConvertableToLong());
02793
02794
unsigned long value = reg[0];
02795 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02796
return sign==POSITIVE ? value : -(
signed long)value;
02797 }
02798
02799 Integer::Integer(
BufferedTransformation &encodedInteger,
unsigned int byteCount, Signedness s)
02800 {
02801 Decode(encodedInteger, byteCount, s);
02802 }
02803
02804 Integer::Integer(
const byte *encodedInteger,
unsigned int byteCount, Signedness s)
02805 {
02806 Decode(encodedInteger, byteCount, s);
02807 }
02808
02809 Integer::Integer(
BufferedTransformation &bt)
02810 {
02811
BERDecode(bt);
02812 }
02813
02814 Integer::Integer(
RandomNumberGenerator &rng,
unsigned int bitcount)
02815 {
02816
Randomize(rng, bitcount);
02817 }
02818
02819 Integer::Integer(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max, RandomNumberType rnType,
const Integer &equiv,
const Integer &mod)
02820 {
02821
if (!
Randomize(rng, min, max, rnType, equiv, mod))
02822
throw Integer::RandomNumberNotFound();
02823 }
02824
02825 Integer Integer::Power2(
unsigned int e)
02826 {
02827
Integer r((word)0, BitsToWords(e+1));
02828 r.
SetBit(e);
02829
return r;
02830 }
02831
02832
template <
long i>
02833
struct NewInteger
02834 {
02835
Integer * operator()()
const
02836
{
02837
return new Integer(i);
02838 }
02839 };
02840
02841 const Integer &
Integer::Zero()
02842 {
02843
return Singleton<Integer>().Ref();
02844 }
02845
02846 const Integer &
Integer::One()
02847 {
02848
return Singleton<Integer, NewInteger<1> >().Ref();
02849 }
02850
02851 const Integer &
Integer::Two()
02852 {
02853
return Singleton<Integer, NewInteger<2> >().Ref();
02854 }
02855
02856
bool Integer::operator!()
const
02857
{
02858
return IsNegative() ?
false : (reg[0]==0 &&
WordCount()==0);
02859 }
02860
02861
Integer& Integer::operator=(
const Integer& t)
02862 {
02863
if (
this != &t)
02864 {
02865 reg.
New(RoundupSize(t.WordCount()));
02866 CopyWords(reg, t.reg, reg.
size());
02867 sign = t.sign;
02868 }
02869
return *
this;
02870 }
02871
02872 bool Integer::GetBit(
unsigned int n)
const
02873
{
02874
if (n/WORD_BITS >= reg.
size())
02875
return 0;
02876
else
02877
return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02878 }
02879
02880 void Integer::SetBit(
unsigned int n,
bool value)
02881 {
02882
if (value)
02883 {
02884 reg.
CleanGrow(RoundupSize(BitsToWords(n+1)));
02885 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02886 }
02887
else
02888 {
02889
if (n/WORD_BITS < reg.
size())
02890 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02891 }
02892 }
02893
02894 byte
Integer::GetByte(
unsigned int n)
const
02895
{
02896
if (n/WORD_SIZE >= reg.
size())
02897
return 0;
02898
else
02899
return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02900 }
02901
02902 void Integer::SetByte(
unsigned int n, byte value)
02903 {
02904 reg.
CleanGrow(RoundupSize(BytesToWords(n+1)));
02905 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02906 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02907 }
02908
02909 unsigned long Integer::GetBits(
unsigned int i,
unsigned int n)
const
02910
{
02911 assert(n <=
sizeof(
unsigned long)*8);
02912
unsigned long v = 0;
02913
for (
unsigned int j=0; j<n; j++)
02914 v |=
GetBit(i+j) << j;
02915
return v;
02916 }
02917
02918
Integer Integer::operator-()
const
02919
{
02920
Integer result(*
this);
02921 result.Negate();
02922
return result;
02923 }
02924
02925
Integer Integer::AbsoluteValue()
const
02926
{
02927
Integer result(*
this);
02928 result.sign = POSITIVE;
02929
return result;
02930 }
02931
02932
void Integer::swap(
Integer &a)
02933 {
02934 reg.
swap(a.reg);
02935 std::swap(sign, a.sign);
02936 }
02937
02938
Integer::Integer(word value,
unsigned int length)
02939 : reg(RoundupSize(length)), sign(POSITIVE)
02940 {
02941 reg[0] = value;
02942 SetWords(reg+1, 0, reg.
size()-1);
02943 }
02944
02945
template <
class T>
02946
static Integer StringToInteger(
const T *str)
02947 {
02948 word radix;
02949
02950
02951
02952
unsigned int length;
02953
for (length = 0; str[length] != 0; length++) {}
02954
02955
Integer v;
02956
02957
if (length == 0)
02958
return v;
02959
02960
switch (str[length-1])
02961 {
02962
case 'h':
02963
case 'H':
02964 radix=16;
02965
break;
02966
case 'o':
02967
case 'O':
02968 radix=8;
02969
break;
02970
case 'b':
02971
case 'B':
02972 radix=2;
02973
break;
02974
default:
02975 radix=10;
02976 }
02977
02978
if (length > 2 && str[0] ==
'0' && str[1] ==
'x')
02979 radix = 16;
02980
02981
for (
unsigned i=0; i<length; i++)
02982 {
02983 word digit;
02984
02985
if (str[i] >=
'0' && str[i] <=
'9')
02986 digit = str[i] -
'0';
02987
else if (str[i] >=
'A' && str[i] <=
'F')
02988 digit = str[i] -
'A' + 10;
02989
else if (str[i] >=
'a' && str[i] <=
'f')
02990 digit = str[i] -
'a' + 10;
02991
else
02992 digit = radix;
02993
02994
if (digit < radix)
02995 {
02996 v *= radix;
02997 v += digit;
02998 }
02999 }
03000
03001
if (str[0] ==
'-')
03002 v.
Negate();
03003
03004
return v;
03005 }
03006
03007 Integer::Integer(
const char *str)
03008 : reg(2), sign(POSITIVE)
03009 {
03010 *
this = StringToInteger(str);
03011 }
03012
03013
Integer::Integer(
const wchar_t *str)
03014 : reg(2), sign(POSITIVE)
03015 {
03016 *
this = StringToInteger(str);
03017 }
03018
03019 unsigned int Integer::WordCount()
const
03020
{
03021
return CountWords(reg, reg.
size());
03022 }
03023
03024 unsigned int Integer::ByteCount()
const
03025
{
03026
unsigned wordCount =
WordCount();
03027
if (wordCount)
03028
return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03029
else
03030
return 0;
03031 }
03032
03033 unsigned int Integer::BitCount()
const
03034
{
03035
unsigned wordCount =
WordCount();
03036
if (wordCount)
03037
return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03038
else
03039
return 0;
03040 }
03041
03042
void Integer::Decode(
const byte *input,
unsigned int inputLen, Signedness s)
03043 {
03044
StringStore store(input, inputLen);
03045 Decode(store, inputLen, s);
03046 }
03047
03048
void Integer::Decode(
BufferedTransformation &bt,
unsigned int inputLen, Signedness s)
03049 {
03050 assert(bt.
MaxRetrievable() >= inputLen);
03051
03052 byte b;
03053 bt.
Peek(b);
03054 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03055
03056
while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03057 {
03058 bt.
Skip(1);
03059 inputLen--;
03060 bt.
Peek(b);
03061 }
03062
03063 reg.
CleanNew(RoundupSize(BytesToWords(inputLen)));
03064
03065
for (
unsigned int i=inputLen; i > 0; i--)
03066 {
03067 bt.
Get(b);
03068 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03069 }
03070
03071
if (sign == NEGATIVE)
03072 {
03073
for (
unsigned i=inputLen; i<reg.
size()*WORD_SIZE; i++)
03074 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03075 TwosComplement(reg, reg.
size());
03076 }
03077 }
03078
03079 unsigned int Integer::MinEncodedSize(Signedness signedness)
const
03080
{
03081
unsigned int outputLen = STDMAX(1U,
ByteCount());
03082
if (signedness == UNSIGNED)
03083
return outputLen;
03084
if (NotNegative() && (
GetByte(outputLen-1) & 0x80))
03085 outputLen++;
03086
if (IsNegative() && *
this < -
Power2(outputLen*8-1))
03087 outputLen++;
03088
return outputLen;
03089 }
03090
03091 unsigned int Integer::Encode(byte *output,
unsigned int outputLen, Signedness signedness)
const
03092
{
03093
ArraySink sink(output, outputLen);
03094
return Encode(sink, outputLen, signedness);
03095 }
03096
03097
unsigned int Integer::Encode(
BufferedTransformation &bt,
unsigned int outputLen, Signedness signedness)
const
03098
{
03099
if (signedness == UNSIGNED || NotNegative())
03100 {
03101
for (
unsigned int i=outputLen; i > 0; i--)
03102 bt.
Put(
GetByte(i-1));
03103 }
03104
else
03105 {
03106
03107
Integer temp =
Integer::Power2(8*STDMAX(
ByteCount(), outputLen)) + *
this;
03108
for (
unsigned i=0; i<outputLen; i++)
03109 bt.
Put(temp.
GetByte(outputLen-i-1));
03110 }
03111
return outputLen;
03112 }
03113
03114 void Integer::DEREncode(
BufferedTransformation &bt)
const
03115
{
03116
DERGeneralEncoder enc(bt, INTEGER);
03117
Encode(enc,
MinEncodedSize(SIGNED), SIGNED);
03118 enc.
MessageEnd();
03119 }
03120
03121
void Integer::BERDecode(
const byte *input,
unsigned int len)
03122 {
03123
StringStore store(input, len);
03124
BERDecode(store);
03125 }
03126
03127 void Integer::BERDecode(
BufferedTransformation &bt)
03128 {
03129
BERGeneralDecoder dec(bt, INTEGER);
03130
if (!dec.
IsDefiniteLength() || dec.
MaxRetrievable() < dec.
RemainingLength())
03131 BERDecodeError();
03132 Decode(dec, dec.
RemainingLength(), SIGNED);
03133 dec.
MessageEnd();
03134 }
03135
03136 void Integer::DEREncodeAsOctetString(
BufferedTransformation &bt,
unsigned int length)
const
03137
{
03138
DERGeneralEncoder enc(bt, OCTET_STRING);
03139
Encode(enc, length);
03140 enc.
MessageEnd();
03141 }
03142
03143 void Integer::BERDecodeAsOctetString(
BufferedTransformation &bt,
unsigned int length)
03144 {
03145
BERGeneralDecoder dec(bt, OCTET_STRING);
03146
if (!dec.
IsDefiniteLength() || dec.
RemainingLength() != length)
03147 BERDecodeError();
03148 Decode(dec, length);
03149 dec.
MessageEnd();
03150 }
03151
03152 unsigned int Integer::OpenPGPEncode(byte *output,
unsigned int len)
const
03153
{
03154
ArraySink sink(output, len);
03155
return OpenPGPEncode(sink);
03156 }
03157
03158 unsigned int Integer::OpenPGPEncode(
BufferedTransformation &bt)
const
03159
{
03160 word16 bitCount =
BitCount();
03161 bt.
PutWord16(bitCount);
03162
return 2 +
Encode(bt, BitsToBytes(bitCount));
03163 }
03164
03165
void Integer::OpenPGPDecode(
const byte *input,
unsigned int len)
03166 {
03167
StringStore store(input, len);
03168 OpenPGPDecode(store);
03169 }
03170
03171
void Integer::OpenPGPDecode(
BufferedTransformation &bt)
03172 {
03173 word16 bitCount;
03174
if (bt.
GetWord16(bitCount) != 2 || bt.
MaxRetrievable() < BitsToBytes(bitCount))
03175
throw OpenPGPDecodeErr();
03176 Decode(bt, BitsToBytes(bitCount));
03177 }
03178
03179
void Integer::Randomize(
RandomNumberGenerator &rng,
unsigned int nbits)
03180 {
03181
const unsigned int nbytes = nbits/8 + 1;
03182
SecByteBlock buf(nbytes);
03183 rng.
GenerateBlock(buf, nbytes);
03184
if (nbytes)
03185 buf[0] = (byte)Crop(buf[0], nbits % 8);
03186 Decode(buf, nbytes, UNSIGNED);
03187 }
03188
03189
void Integer::Randomize(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max)
03190 {
03191
if (min > max)
03192
throw InvalidArgument(
"Integer: Min must be no greater than Max");
03193
03194
Integer range = max - min;
03195
const unsigned int nbits = range.
BitCount();
03196
03197
do
03198 {
03199
Randomize(rng, nbits);
03200 }
03201
while (*
this > range);
03202
03203 *
this += min;
03204 }
03205
03206 bool Integer::Randomize(
RandomNumberGenerator &rng,
const Integer &min,
const Integer &max, RandomNumberType rnType,
const Integer &equiv,
const Integer &mod)
03207 {
03208
return GenerateRandomNoThrow(rng, MakeParameters(
"Min", min)(
"Max", max)(
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
03209 }
03210
03211
class KDF2_RNG :
public RandomNumberGenerator
03212 {
03213
public:
03214 KDF2_RNG(
const byte *seed,
unsigned int seedSize)
03215 : m_counter(0), m_counterAndSeed(seedSize + 4)
03216 {
03217 memcpy(m_counterAndSeed + 4, seed, seedSize);
03218 }
03219
03220 byte
GenerateByte()
03221 {
03222 byte b;
03223
GenerateBlock(&b, 1);
03224
return b;
03225 }
03226
03227
void GenerateBlock(byte *output,
unsigned int size)
03228 {
03229 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03230 ++m_counter;
03231
P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03232 }
03233
03234
private:
03235 word32 m_counter;
03236
SecByteBlock m_counterAndSeed;
03237 };
03238
03239
bool Integer::GenerateRandomNoThrow(
RandomNumberGenerator &i_rng,
const NameValuePairs ¶ms)
03240 {
03241
Integer min = params.
GetValueWithDefault(
"Min", Integer::Zero());
03242
Integer max;
03243
if (!params.
GetValue(
"Max", max))
03244 {
03245
int bitLength;
03246
if (params.
GetIntValue(
"BitLength", bitLength))
03247 max =
Integer::Power2(bitLength);
03248
else
03249
throw InvalidArgument(
"Integer: missing Max argument");
03250 }
03251
if (min > max)
03252
throw InvalidArgument(
"Integer: Min must be no greater than Max");
03253
03254
Integer equiv = params.
GetValueWithDefault(
"EquivalentTo", Integer::Zero());
03255
Integer mod = params.
GetValueWithDefault(
"Mod", Integer::One());
03256
03257
if (equiv.
IsNegative() || equiv >= mod)
03258
throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
03259
03260 Integer::RandomNumberType rnType = params.
GetValueWithDefault(
"RandomNumberType", Integer::ANY);
03261
03262 member_ptr<KDF2_RNG> kdf2Rng;
03263
ConstByteArrayParameter seed;
03264
if (params.
GetValue(
"Seed", seed))
03265 {
03266
ByteQueue bq;
03267
DERSequenceEncoder seq(bq);
03268 min.
DEREncode(seq);
03269 max.
DEREncode(seq);
03270 equiv.
DEREncode(seq);
03271 mod.
DEREncode(seq);
03272 DEREncodeUnsigned(seq, rnType);
03273 DEREncodeOctetString(seq, seed.
begin(), seed.
size());
03274 seq.MessageEnd();
03275
03276
SecByteBlock finalSeed(bq.
MaxRetrievable());
03277 bq.
Get(finalSeed, finalSeed.size());
03278 kdf2Rng.reset(
new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03279 }
03280
RandomNumberGenerator &rng = kdf2Rng.get() ? (
RandomNumberGenerator &)*kdf2Rng : i_rng;
03281
03282
switch (rnType)
03283 {
03284
case ANY:
03285
if (mod ==
One())
03286
Randomize(rng, min, max);
03287
else
03288 {
03289
Integer min1 = min + (equiv-min)%mod;
03290
if (max < min1)
03291
return false;
03292
Randomize(rng,
Zero(), (max - min1) / mod);
03293 *
this *= mod;
03294 *
this += min1;
03295 }
03296
return true;
03297
03298
case PRIME:
03299 {
03300
const PrimeSelector *pSelector = params.
GetValueWithDefault(Name::PointerToPrimeSelector(), (
const PrimeSelector *)NULL);
03301
03302
int i;
03303 i = 0;
03304
while (1)
03305 {
03306
if (++i==16)
03307 {
03308
03309
Integer first = min;
03310
if (FirstPrime(first, max, equiv, mod, pSelector))
03311 {
03312
03313 *
this = first;
03314
if (!FirstPrime(first, max, equiv, mod, pSelector))
03315
return true;
03316 }
03317
else
03318
return false;
03319 }
03320
03321
Randomize(rng, min, max);
03322
if (FirstPrime(*
this, STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03323
return true;
03324 }
03325 }
03326
03327
default:
03328
throw InvalidArgument(
"Integer: invalid RandomNumberType argument");
03329 }
03330 }
03331
03332 std::istream& operator>>(std::istream& in,
Integer &a)
03333 {
03334
char c;
03335
unsigned int length = 0;
03336
SecBlock<char> str(length + 16);
03337
03338 std::ws(in);
03339
03340
do
03341 {
03342 in.read(&c, 1);
03343 str[length++] = c;
03344
if (length >= str.size())
03345 str.Grow(length + 16);
03346 }
03347
while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<='9') || (c>=
'a' && c<='f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
03348
03349
if (in.gcount())
03350 in.putback(c);
03351 str[length-1] =
'\0';
03352 a =
Integer(str);
03353
03354
return in;
03355 }
03356
03357 std::ostream& operator<<(std::ostream& out,
const Integer &a)
03358 {
03359
03360
long f = out.flags() & std::ios::basefield;
03361
int base, block;
03362
char suffix;
03363
switch(f)
03364 {
03365
case std::ios::oct :
03366 base = 8;
03367 block = 8;
03368 suffix =
'o';
03369
break;
03370
case std::ios::hex :
03371 base = 16;
03372 block = 4;
03373 suffix =
'h';
03374
break;
03375
default :
03376 base = 10;
03377 block = 3;
03378 suffix =
'.';
03379 }
03380
03381
SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03382
Integer temp1=a, temp2;
03383
unsigned i=0;
03384
const char vec[]=
"0123456789ABCDEF";
03385
03386
if (a.IsNegative())
03387 {
03388 out <<
'-';
03389 temp1.
Negate();
03390 }
03391
03392
if (!a)
03393 out <<
'0';
03394
03395
while (!!temp1)
03396 {
03397 word digit;
03398
Integer::Divide(digit, temp2, temp1, base);
03399 s[i++]=vec[digit];
03400 temp1=temp2;
03401 }
03402
03403
while (i--)
03404 {
03405 out << s[i];
03406
03407
03408 }
03409
return out << suffix;
03410 }
03411
03412
Integer& Integer::operator++()
03413 {
03414
if (NotNegative())
03415 {
03416
if (Increment(reg, reg.
size()))
03417 {
03418 reg.
CleanGrow(2*reg.
size());
03419 reg[reg.
size()/2]=1;
03420 }
03421 }
03422
else
03423 {
03424 word borrow = Decrement(reg, reg.
size());
03425 assert(!borrow);
03426
if (
WordCount()==0)
03427 *
this =
Zero();
03428 }
03429
return *
this;
03430 }
03431
03432
Integer& Integer::operator--()
03433 {
03434
if (IsNegative())
03435 {
03436
if (Increment(reg, reg.
size()))
03437 {
03438 reg.
CleanGrow(2*reg.
size());
03439 reg[reg.
size()/2]=1;
03440 }
03441 }
03442
else
03443 {
03444
if (Decrement(reg, reg.
size()))
03445 *
this = -
One();
03446 }
03447
return *
this;
03448 }
03449
03450
void PositiveAdd(
Integer &sum,
const Integer &a,
const Integer& b)
03451 {
03452 word carry;
03453
if (a.reg.size() == b.reg.size())
03454 carry = Add(sum.
reg, a.reg, b.reg, a.reg.size());
03455
else if (a.reg.size() > b.reg.size())
03456 {
03457 carry = Add(sum.
reg, a.reg, b.reg, b.reg.size());
03458 CopyWords(sum.
reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03459 carry = Increment(sum.
reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03460 }
03461
else
03462 {
03463 carry = Add(sum.
reg, a.reg, b.reg, a.reg.size());
03464 CopyWords(sum.
reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03465 carry = Increment(sum.
reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03466 }
03467
03468
if (carry)
03469 {
03470 sum.
reg.
CleanGrow(2*sum.
reg.
size());
03471 sum.
reg[sum.
reg.
size()/2] = 1;
03472 }
03473 sum.
sign = Integer::POSITIVE;
03474 }
03475
03476
void PositiveSubtract(
Integer &diff,
const Integer &a,
const Integer& b)
03477 {
03478
unsigned aSize = a.WordCount();
03479 aSize += aSize%2;
03480
unsigned bSize = b.WordCount();
03481 bSize += bSize%2;
03482
03483
if (aSize == bSize)
03484 {
03485
if (Compare(a.reg, b.reg, aSize) >= 0)
03486 {
03487 Subtract(diff.
reg, a.reg, b.reg, aSize);
03488 diff.
sign = Integer::POSITIVE;
03489 }
03490
else
03491 {
03492 Subtract(diff.
reg, b.reg, a.reg, aSize);
03493 diff.
sign = Integer::NEGATIVE;
03494 }
03495 }
03496
else if (aSize > bSize)
03497 {
03498 word borrow = Subtract(diff.
reg, a.reg, b.reg, bSize);
03499 CopyWords(diff.
reg+bSize, a.reg+bSize, aSize-bSize);
03500 borrow = Decrement(diff.
reg+bSize, aSize-bSize, borrow);
03501 assert(!borrow);
03502 diff.
sign = Integer::POSITIVE;
03503 }
03504
else
03505 {
03506 word borrow = Subtract(diff.
reg, b.reg, a.reg, aSize);
03507 CopyWords(diff.
reg+aSize, b.reg+aSize, bSize-aSize);
03508 borrow = Decrement(diff.
reg+aSize, bSize-aSize, borrow);
03509 assert(!borrow);
03510 diff.
sign = Integer::NEGATIVE;
03511 }
03512 }
03513
03514
Integer Integer::Plus(
const Integer& b)
const
03515
{
03516
Integer sum((word)0, STDMAX(reg.
size(), b.reg.size()));
03517
if (NotNegative())
03518 {
03519
if (b.NotNegative())
03520 PositiveAdd(sum, *
this, b);
03521
else
03522 PositiveSubtract(sum, *
this, b);
03523 }
03524
else
03525 {
03526
if (b.NotNegative())
03527 PositiveSubtract(sum, b, *
this);
03528
else
03529 {
03530 PositiveAdd(sum, *
this, b);
03531 sum.sign = Integer::NEGATIVE;
03532 }
03533 }
03534
return sum;
03535 }
03536
03537
Integer& Integer::operator+=(
const Integer& t)
03538 {
03539 reg.
CleanGrow(t.reg.size());
03540
if (NotNegative())
03541 {
03542
if (t.NotNegative())
03543 PositiveAdd(*
this, *
this, t);
03544
else
03545 PositiveSubtract(*
this, *
this, t);
03546 }
03547
else
03548 {
03549
if (t.NotNegative())
03550 PositiveSubtract(*
this, t, *
this);
03551
else
03552 {
03553 PositiveAdd(*
this, *
this, t);
03554 sign = Integer::NEGATIVE;
03555 }
03556 }
03557
return *
this;
03558 }
03559
03560
Integer Integer::Minus(
const Integer& b)
const
03561
{
03562
Integer diff((word)0, STDMAX(reg.
size(), b.reg.size()));
03563
if (NotNegative())
03564 {
03565
if (b.NotNegative())
03566 PositiveSubtract(diff, *
this, b);
03567
else
03568 PositiveAdd(diff, *
this, b);
03569 }
03570
else
03571 {
03572
if (b.NotNegative())
03573 {
03574 PositiveAdd(diff, *
this, b);
03575 diff.sign = Integer::NEGATIVE;
03576 }
03577
else
03578 PositiveSubtract(diff, b, *
this);
03579 }
03580
return diff;
03581 }
03582
03583
Integer& Integer::operator-=(
const Integer& t)
03584 {
03585 reg.
CleanGrow(t.reg.size());
03586
if (NotNegative())
03587 {
03588
if (t.NotNegative())
03589 PositiveSubtract(*
this, *
this, t);
03590
else
03591 PositiveAdd(*
this, *
this, t);
03592 }
03593
else
03594 {
03595
if (t.NotNegative())
03596 {
03597 PositiveAdd(*
this, *
this, t);
03598 sign = Integer::NEGATIVE;
03599 }
03600
else
03601 PositiveSubtract(*
this, t, *
this);
03602 }
03603
return *
this;
03604 }
03605
03606
Integer& Integer::operator<<=(
unsigned int n)
03607 {
03608
const unsigned int wordCount =
WordCount();
03609
const unsigned int shiftWords = n / WORD_BITS;
03610
const unsigned int shiftBits = n % WORD_BITS;
03611
03612 reg.
CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03613 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03614 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03615
return *
this;
03616 }
03617
03618
Integer& Integer::operator>>=(
unsigned int n)
03619 {
03620
const unsigned int wordCount =
WordCount();
03621
const unsigned int shiftWords = n / WORD_BITS;
03622
const unsigned int shiftBits = n % WORD_BITS;
03623
03624 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03625
if (wordCount > shiftWords)
03626 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03627
if (IsNegative() &&
WordCount()==0)
03628 *
this =
Zero();
03629
return *
this;
03630 }
03631
03632
void PositiveMultiply(
Integer &product,
const Integer &a,
const Integer &b)
03633 {
03634
unsigned aSize = RoundupSize(a.WordCount());
03635
unsigned bSize = RoundupSize(b.WordCount());
03636
03637 product.
reg.
CleanNew(RoundupSize(aSize+bSize));
03638 product.
sign = Integer::POSITIVE;
03639
03640
SecAlignedWordBlock workspace(aSize + bSize);
03641 AsymmetricMultiply(product.
reg, workspace, a.reg, aSize, b.reg, bSize);
03642 }
03643
03644
void Multiply(
Integer &product,
const Integer &a,
const Integer &b)
03645 {
03646 PositiveMultiply(product, a, b);
03647
03648
if (a.NotNegative() != b.NotNegative())
03649 product.
Negate();
03650 }
03651
03652
Integer Integer::Times(
const Integer &b)
const
03653
{
03654
Integer product;
03655 Multiply(product, *
this, b);
03656
return product;
03657 }
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680
03681
void PositiveDivide(
Integer &remainder,
Integer "ient,
03682
const Integer &a,
const Integer &b)
03683 {
03684
unsigned aSize = a.WordCount();
03685
unsigned bSize = b.WordCount();
03686
03687
if (!bSize)
03688
throw Integer::DivideByZero();
03689
03690
if (a.PositiveCompare(b) == -1)
03691 {
03692 remainder = a;
03693 remainder.
sign = Integer::POSITIVE;
03694 quotient =
Integer::Zero();
03695
return;
03696 }
03697
03698 aSize += aSize%2;
03699 bSize += bSize%2;
03700
03701 remainder.
reg.
CleanNew(RoundupSize(bSize));
03702 remainder.
sign = Integer::POSITIVE;
03703 quotient.
reg.
CleanNew(RoundupSize(aSize-bSize+2));
03704 quotient.
sign = Integer::POSITIVE;
03705
03706
SecAlignedWordBlock T(aSize+2*bSize+4);
03707 Divide(remainder.
reg, quotient.
reg, T, a.reg, aSize, b.reg, bSize);
03708 }
03709
03710 void Integer::Divide(
Integer &remainder,
Integer "ient,
const Integer ÷nd,
const Integer &divisor)
03711 {
03712 PositiveDivide(remainder, quotient, dividend, divisor);
03713
03714
if (dividend.
IsNegative())
03715 {
03716 quotient.
Negate();
03717
if (remainder.
NotZero())
03718 {
03719 --quotient;
03720 remainder = divisor.
AbsoluteValue() - remainder;
03721 }
03722 }
03723
03724
if (divisor.
IsNegative())
03725 quotient.
Negate();
03726 }
03727
03728 void Integer::DivideByPowerOf2(
Integer &r,
Integer &q,
const Integer &a,
unsigned int n)
03729 {
03730 q = a;
03731 q >>= n;
03732
03733
const unsigned int wordCount = BitsToWords(n);
03734
if (wordCount <= a.WordCount())
03735 {
03736 r.
reg.
resize(RoundupSize(wordCount));
03737 CopyWords(r.
reg, a.reg, wordCount);
03738 SetWords(r.
reg+wordCount, 0, r.
reg.
size()-wordCount);
03739
if (n % WORD_BITS != 0)
03740 r.
reg[wordCount-1] %= (1 << (n % WORD_BITS));
03741 }
03742
else
03743 {
03744 r.
reg.
resize(RoundupSize(a.WordCount()));
03745 CopyWords(r.
reg, a.reg, r.
reg.
size());
03746 }
03747 r.
sign = POSITIVE;
03748
03749
if (a.IsNegative() && r.
NotZero())
03750 {
03751 --q;
03752 r =
Power2(n) - r;
03753 }
03754 }
03755
03756
Integer Integer::DividedBy(
const Integer &b)
const
03757
{
03758
Integer remainder, quotient;
03759
Integer::Divide(remainder, quotient, *
this, b);
03760
return quotient;
03761 }
03762
03763
Integer Integer::Modulo(
const Integer &b)
const
03764
{
03765
Integer remainder, quotient;
03766
Integer::Divide(remainder, quotient, *
this, b);
03767
return remainder;
03768 }
03769
03770 void Integer::Divide(word &remainder,
Integer "ient,
const Integer ÷nd, word divisor)
03771 {
03772
if (!divisor)
03773
throw Integer::DivideByZero();
03774
03775 assert(divisor);
03776
03777
if ((divisor & (divisor-1)) == 0)
03778 {
03779 quotient = dividend >> (BitPrecision(divisor)-1);
03780 remainder = dividend.
reg[0] & (divisor-1);
03781
return;
03782 }
03783
03784
unsigned int i = dividend.
WordCount();
03785 quotient.
reg.
CleanNew(RoundupSize(i));
03786 remainder = 0;
03787
while (i--)
03788 {
03789 quotient.
reg[i] = DWord(dividend.
reg[i], remainder) / divisor;
03790 remainder = DWord(dividend.
reg[i], remainder) % divisor;
03791 }
03792
03793
if (dividend.
NotNegative())
03794 quotient.
sign = POSITIVE;
03795
else
03796 {
03797 quotient.
sign = NEGATIVE;
03798
if (remainder)
03799 {
03800 --quotient;
03801 remainder = divisor - remainder;
03802 }
03803 }
03804 }
03805
03806
Integer Integer::DividedBy(word b)
const
03807
{
03808 word remainder;
03809
Integer quotient;
03810
Integer::Divide(remainder, quotient, *
this, b);
03811
return quotient;
03812 }
03813
03814 word Integer::Modulo(word divisor)
const
03815
{
03816
if (!divisor)
03817
throw Integer::DivideByZero();
03818
03819 assert(divisor);
03820
03821 word remainder;
03822
03823
if ((divisor & (divisor-1)) == 0)
03824 remainder = reg[0] & (divisor-1);
03825
else
03826 {
03827
unsigned int i =
WordCount();
03828
03829
if (divisor <= 5)
03830 {
03831 DWord sum(0, 0);
03832
while (i--)
03833 sum += reg[i];
03834 remainder = sum % divisor;
03835 }
03836
else
03837 {
03838 remainder = 0;
03839
while (i--)
03840 remainder = DWord(reg[i], remainder) % divisor;
03841 }
03842 }
03843
03844
if (IsNegative() && remainder)
03845 remainder = divisor - remainder;
03846
03847
return remainder;
03848 }
03849
03850
void Integer::Negate()
03851 {
03852
if (!!(*this))
03853 sign = Sign(1-sign);
03854 }
03855
03856
int Integer::PositiveCompare(
const Integer& t)
const
03857
{
03858
unsigned size =
WordCount(), tSize = t.WordCount();
03859
03860
if (size == tSize)
03861
return CryptoPP::Compare(reg, t.reg, size);
03862
else
03863
return size > tSize ? 1 : -1;
03864 }
03865
03866 int Integer::Compare(
const Integer& t)
const
03867
{
03868
if (NotNegative())
03869 {
03870
if (t.NotNegative())
03871
return PositiveCompare(t);
03872
else
03873
return 1;
03874 }
03875
else
03876 {
03877
if (t.NotNegative())
03878
return -1;
03879
else
03880
return -PositiveCompare(t);
03881 }
03882 }
03883
03884 Integer Integer::SquareRoot()
const
03885
{
03886
if (!IsPositive())
03887
return Zero();
03888
03889
03890
Integer x, y =
Power2((
BitCount()+1)/2);
03891 assert(y*y >= *
this);
03892
03893
do
03894 {
03895 x = y;
03896 y = (x + *
this/x) >> 1;
03897 }
while (y<x);
03898
03899
return x;
03900 }
03901
03902 bool Integer::IsSquare()
const
03903
{
03904
Integer r =
SquareRoot();
03905
return *
this == r.
Squared();
03906 }
03907
03908 bool Integer::IsUnit()
const
03909
{
03910
return (
WordCount() == 1) && (reg[0] == 1);
03911 }
03912
03913 Integer Integer::MultiplicativeInverse()
const
03914
{
03915
return IsUnit() ? *
this :
Zero();
03916 }
03917
03918 Integer a_times_b_mod_c(
const Integer &x,
const Integer& y,
const Integer& m)
03919 {
03920
return x*y%m;
03921 }
03922
03923 Integer a_exp_b_mod_c(
const Integer &x,
const Integer& e,
const Integer& m)
03924 {
03925
ModularArithmetic mr(m);
03926
return mr.
Exponentiate(x, e);
03927 }
03928
03929 Integer Integer::Gcd(
const Integer &a,
const Integer &b)
03930 {
03931
return EuclideanDomainOf<Integer>().
Gcd(a, b);
03932 }
03933
03934 Integer Integer::InverseMod(
const Integer &m)
const
03935
{
03936 assert(m.
NotNegative());
03937
03938
if (IsNegative() || *
this>=m)
03939
return (*
this%m).
InverseMod(m);
03940
03941
if (m.
IsEven())
03942 {
03943
if (!m || IsEven())
03944
return Zero();
03945
if (*
this ==
One())
03946
return One();
03947
03948
Integer u = m.
InverseMod(*
this);
03949
return !u ?
Zero() : (m*(*
this-u)+1)/(*this);
03950 }
03951
03952
SecBlock<word> T(m.
reg.
size() * 4);
03953
Integer r((word)0, m.
reg.
size());
03954
unsigned k = AlmostInverse(r.reg, T, reg, reg.
size(), m.
reg, m.
reg.
size());
03955 DivideByPower2Mod(r.reg, r.reg, k, m.
reg, m.
reg.
size());
03956
return r;
03957 }
03958
03959 word
Integer::InverseMod(
const word mod)
const
03960
{
03961 word g0 = mod, g1 = *
this % mod;
03962 word v0 = 0, v1 = 1;
03963 word y;
03964
03965
while (g1)
03966 {
03967
if (g1 == 1)
03968
return v1;
03969 y = g0 / g1;
03970 g0 = g0 % g1;
03971 v0 += y * v1;
03972
03973
if (!g0)
03974
break;
03975
if (g0 == 1)
03976
return mod-v0;
03977 y = g1 / g0;
03978 g1 = g1 % g0;
03979 v1 += y * v0;
03980 }
03981
return 0;
03982 }
03983
03984
03985
03986 ModularArithmetic::ModularArithmetic(
BufferedTransformation &bt)
03987 {
03988
BERSequenceDecoder seq(bt);
03989
OID oid(seq);
03990
if (oid != ASN1::prime_field())
03991 BERDecodeError();
03992 modulus.
BERDecode(seq);
03993 seq.MessageEnd();
03994 result.
reg.
resize(modulus.
reg.
size());
03995 }
03996
03997
void ModularArithmetic::DEREncode(
BufferedTransformation &bt)
const
03998
{
03999
DERSequenceEncoder seq(bt);
04000 ASN1::prime_field().DEREncode(seq);
04001 modulus.
DEREncode(seq);
04002 seq.MessageEnd();
04003 }
04004
04005
void ModularArithmetic::DEREncodeElement(
BufferedTransformation &out,
const Element &a)
const
04006
{
04007 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04008 }
04009
04010
void ModularArithmetic::BERDecodeElement(
BufferedTransformation &in, Element &a)
const
04011
{
04012 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04013 }
04014
04015
const Integer& ModularArithmetic::Half(
const Integer &a)
const
04016
{
04017
if (a.reg.size()==modulus.
reg.
size())
04018 {
04019 CryptoPP::DivideByPower2Mod(result.
reg.
begin(), a.reg, 1, modulus.
reg, a.reg.size());
04020
return result;
04021 }
04022
else
04023
return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
04024 }
04025
04026
const Integer& ModularArithmetic::Add(
const Integer &a,
const Integer &b)
const
04027
{
04028
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
04029 {
04030
if (CryptoPP::Add(result.
reg.
begin(), a.reg, b.reg, a.reg.size())
04031 || Compare(result.
reg, modulus.
reg, a.reg.size()) >= 0)
04032 {
04033 CryptoPP::Subtract(result.
reg.
begin(), result.
reg, modulus.
reg, a.reg.size());
04034 }
04035
return result;
04036 }
04037
else
04038 {
04039 result1 = a+b;
04040
if (result1 >= modulus)
04041 result1 -= modulus;
04042
return result1;
04043 }
04044 }
04045
04046
Integer& ModularArithmetic::Accumulate(
Integer &a,
const Integer &b)
const
04047
{
04048
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
04049 {
04050
if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04051 || Compare(a.reg, modulus.
reg, a.reg.size()) >= 0)
04052 {
04053 CryptoPP::Subtract(a.reg, a.reg, modulus.
reg, a.reg.size());
04054 }
04055 }
04056
else
04057 {
04058 a+=b;
04059
if (a>=modulus)
04060 a-=modulus;
04061 }
04062
04063
return a;
04064 }
04065
04066
const Integer& ModularArithmetic::Subtract(
const Integer &a,
const Integer &b)
const
04067
{
04068
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
04069 {
04070
if (CryptoPP::Subtract(result.
reg.
begin(), a.reg, b.reg, a.reg.size()))
04071 CryptoPP::Add(result.
reg.
begin(), result.
reg, modulus.
reg, a.reg.size());
04072
return result;
04073 }
04074
else
04075 {
04076 result1 = a-b;
04077
if (result1.
IsNegative())
04078 result1 += modulus;
04079
return result1;
04080 }
04081 }
04082
04083
Integer& ModularArithmetic::Reduce(
Integer &a,
const Integer &b)
const
04084
{
04085
if (a.reg.size()==modulus.
reg.
size() && b.reg.size()==modulus.
reg.
size())
04086 {
04087
if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04088 CryptoPP::Add(a.reg, a.reg, modulus.
reg, a.reg.size());
04089 }
04090
else
04091 {
04092 a-=b;
04093
if (a.IsNegative())
04094 a+=modulus;
04095 }
04096
04097
return a;
04098 }
04099
04100
const Integer& ModularArithmetic::Inverse(
const Integer &a)
const
04101
{
04102
if (!a)
04103
return a;
04104
04105 CopyWords(result.
reg.
begin(), modulus.
reg, modulus.
reg.
size());
04106
if (CryptoPP::Subtract(result.
reg.
begin(), result.
reg, a.reg, a.reg.size()))
04107 Decrement(result.
reg.
begin()+a.reg.size(), 1, modulus.
reg.
size()-a.reg.size());
04108
04109
return result;
04110 }
04111
04112
Integer ModularArithmetic::CascadeExponentiate(
const Integer &x,
const Integer &e1,
const Integer &y,
const Integer &e2)
const
04113
{
04114
if (modulus.
IsOdd())
04115 {
04116
MontgomeryRepresentation dr(modulus);
04117
return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04118 }
04119
else
04120
return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04121 }
04122
04123
void ModularArithmetic::SimultaneousExponentiate(
Integer *results,
const Integer &base,
const Integer *exponents,
unsigned int exponentsCount)
const
04124
{
04125
if (modulus.
IsOdd())
04126 {
04127
MontgomeryRepresentation dr(modulus);
04128 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04129
for (
unsigned int i=0; i<exponentsCount; i++)
04130 results[i] = dr.ConvertOut(results[i]);
04131 }
04132
else
04133
AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04134 }
04135
04136 MontgomeryRepresentation::MontgomeryRepresentation(
const Integer &m)
04137 :
ModularArithmetic(m),
04138 u((word)0, modulus.reg.size()),
04139 workspace(5*modulus.reg.size())
04140 {
04141
if (!modulus.IsOdd())
04142
throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04143
04144 RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size());
04145 }
04146
04147
const Integer& MontgomeryRepresentation::Multiply(
const Integer &a,
const Integer &b)
const
04148
{
04149 word *
const T = workspace.
begin();
04150 word *
const R = result.
reg.
begin();
04151
const unsigned int N = modulus.
reg.
size();
04152 assert(a.reg.size()<=N && b.reg.size()<=N);
04153
04154 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04155 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04156 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.
reg, N);
04157
return result;
04158 }
04159
04160
const Integer& MontgomeryRepresentation::Square(
const Integer &a)
const
04161
{
04162 word *
const T = workspace.
begin();
04163 word *
const R = result.
reg.
begin();
04164
const unsigned int N = modulus.
reg.
size();
04165 assert(a.reg.size()<=N);
04166
04167 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04168 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04169 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.
reg, N);
04170
return result;
04171 }
04172
04173
Integer MontgomeryRepresentation::ConvertOut(
const Integer &a)
const
04174
{
04175 word *
const T = workspace.
begin();
04176 word *
const R = result.
reg.
begin();
04177
const unsigned int N = modulus.
reg.
size();
04178 assert(a.reg.size()<=N);
04179
04180 CopyWords(T, a.reg, a.reg.size());
04181 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04182 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.
reg, N);
04183
return result;
04184 }
04185
04186
const Integer& MontgomeryRepresentation::MultiplicativeInverse(
const Integer &a)
const
04187
{
04188
04189 word *
const T = workspace.
begin();
04190 word *
const R = result.
reg.
begin();
04191
const unsigned int N = modulus.
reg.
size();
04192 assert(a.reg.size()<=N);
04193
04194 CopyWords(T, a.reg, a.reg.size());
04195 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04196 MontgomeryReduce(R, T+2*N, T, modulus.
reg, u.
reg, N);
04197
unsigned k = AlmostInverse(R, T, R, N, modulus.
reg, N);
04198
04199
04200
04201
if (k>N*WORD_BITS)
04202 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.
reg, N);
04203
else
04204 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.
reg, N);
04205
04206
return result;
04207 }
04208
04209 NAMESPACE_END
04210
04211
#endif