Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

integer.cpp

00001 // integer.cpp - written and placed in the public domain by Wei Dai 00002 // contains public domain code contributed by Alister Lee and Leonard Janke 00003 00004 #include "pch.h" 00005 00006 #ifndef CRYPTOPP_IMPORTS 00007 00008 #include "integer.h" 00009 #include "modarith.h" 00010 #include "nbtheory.h" 00011 #include "asn.h" 00012 #include "oids.h" 00013 #include "words.h" 00014 #include "algparam.h" 00015 #include "pubkey.h" // for P1363_KDF2 00016 #include "sha.h" 00017 00018 #include <iostream> 00019 00020 #ifdef SSE2_INTRINSICS_AVAILABLE 00021 #ifdef __GNUC__ 00022 #include <xmmintrin.h> 00023 #include <signal.h> 00024 #include <setjmp.h> 00025 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE 00026 #include <malloc.h> 00027 #else 00028 #include <stdlib.h> 00029 #endif 00030 #else 00031 #include <emmintrin.h> 00032 #endif 00033 #elif defined(_MSC_VER) && defined(_M_IX86) 00034 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.") 00035 #elif defined(__GNUC__) && defined(__i386__) 00036 #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled." 00037 #endif 00038 00039 NAMESPACE_BEGIN(CryptoPP) 00040 00041 bool FunctionAssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) 00042 { 00043 if (valueType != typeid(Integer)) 00044 return false; 00045 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); 00046 return true; 00047 } 00048 00049 static const char s_RunAtStartup = (AssignIntToInteger = FunctionAssignIntToInteger, 0); 00050 00051 #ifdef SSE2_INTRINSICS_AVAILABLE 00052 template <class T> 00053 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *) 00054 { 00055 CheckSize(n); 00056 if (n == 0) 00057 return NULL; 00058 if (n >= 4) 00059 { 00060 void *p; 00061 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE 00062 while (!(p = _mm_malloc(sizeof(T)*n, 16))) 00063 #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE) 00064 while (!(p = memalign(16, sizeof(T)*n))) 00065 #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16) 00066 while (!(p = malloc(sizeof(T)*n))) 00067 #else 00068 while (!(p = (byte *)malloc(sizeof(T)*n + 8))) // assume malloc alignment is at least 8 00069 #endif 00070 CallNewHandler(); 00071 00072 #ifdef CRYPTOPP_NO_ALIGNED_ALLOC 00073 assert(m_pBlock == NULL); 00074 m_pBlock = p; 00075 if (!IsAlignedOn(p, 16)) 00076 { 00077 assert(IsAlignedOn(p, 8)); 00078 p = (byte *)p + 8; 00079 } 00080 #endif 00081 00082 assert(IsAlignedOn(p, 16)); 00083 return (T*)p; 00084 } 00085 return new T[n]; 00086 } 00087 00088 template <class T> 00089 void AlignedAllocator<T>::deallocate(void *p, size_type n) 00090 { 00091 memset(p, 0, n*sizeof(T)); 00092 if (n >= 4) 00093 { 00094 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE 00095 _mm_free(p); 00096 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC) 00097 assert(m_pBlock == p || (byte *)m_pBlock+8 == p); 00098 free(m_pBlock); 00099 m_pBlock = NULL; 00100 #else 00101 free(p); 00102 #endif 00103 } 00104 else 00105 delete [] (T *)p; 00106 } 00107 #endif 00108 00109 static int Compare(const word *A, const word *B, unsigned int N) 00110 { 00111 while (N--) 00112 if (A[N] > B[N]) 00113 return 1; 00114 else if (A[N] < B[N]) 00115 return -1; 00116 00117 return 0; 00118 } 00119 00120 static word Increment(word *A, unsigned int N, word B=1) 00121 { 00122 assert(N); 00123 word t = A[0]; 00124 A[0] = t+B; 00125 if (A[0] >= t) 00126 return 0; 00127 for (unsigned i=1; i<N; i++) 00128 if (++A[i]) 00129 return 0; 00130 return 1; 00131 } 00132 00133 static word Decrement(word *A, unsigned int N, word B=1) 00134 { 00135 assert(N); 00136 word t = A[0]; 00137 A[0] = t-B; 00138 if (A[0] <= t) 00139 return 0; 00140 for (unsigned i=1; i<N; i++) 00141 if (A[i]--) 00142 return 0; 00143 return 1; 00144 } 00145 00146 static void TwosComplement(word *A, unsigned int N) 00147 { 00148 Decrement(A, N); 00149 for (unsigned i=0; i<N; i++) 00150 A[i] = ~A[i]; 00151 } 00152 00153 static word AtomicInverseModPower2(word A) 00154 { 00155 assert(A%2==1); 00156 00157 word R=A%8; 00158 00159 for (unsigned i=3; i<WORD_BITS; i*=2) 00160 R = R*(2-R*A); 00161 00162 assert(R*A==1); 00163 return R; 00164 } 00165 00166 // ******************************************************** 00167 00168 class DWord 00169 { 00170 public: 00171 DWord() {} 00172 00173 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00174 explicit DWord(word low) 00175 { 00176 m_whole = low; 00177 } 00178 #else 00179 explicit DWord(word low) 00180 { 00181 m_halfs.low = low; 00182 m_halfs.high = 0; 00183 } 00184 #endif 00185 00186 DWord(word low, word high) 00187 { 00188 m_halfs.low = low; 00189 m_halfs.high = high; 00190 } 00191 00192 static DWord Multiply(word a, word b) 00193 { 00194 DWord r; 00195 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00196 r.m_whole = (dword)a * b; 00197 #elif defined(__alpha__) 00198 r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b)); 00199 #elif defined(__ia64__) 00200 r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b)); 00201 #elif defined(_ARCH_PPC64) 00202 r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc"); 00203 #elif defined(__x86_64__) 00204 __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc"); 00205 #elif defined(__mips64) 00206 __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b)); 00207 #elif defined(_M_IX86) 00208 // for testing 00209 word64 t = (word64)a * b; 00210 r.m_halfs.high = ((word32 *)(&t))[1]; 00211 r.m_halfs.low = (word32)t; 00212 #else 00213 #error can not implement DWord 00214 #endif 00215 return r; 00216 } 00217 00218 static DWord MultiplyAndAdd(word a, word b, word c) 00219 { 00220 DWord r = Multiply(a, b); 00221 return r += c; 00222 } 00223 00224 DWord & operator+=(word a) 00225 { 00226 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00227 m_whole = m_whole + a; 00228 #else 00229 m_halfs.low += a; 00230 m_halfs.high += (m_halfs.low < a); 00231 #endif 00232 return *this; 00233 } 00234 00235 DWord operator+(word a) 00236 { 00237 DWord r; 00238 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00239 r.m_whole = m_whole + a; 00240 #else 00241 r.m_halfs.low = m_halfs.low + a; 00242 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); 00243 #endif 00244 return r; 00245 } 00246 00247 DWord operator-(DWord a) 00248 { 00249 DWord r; 00250 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00251 r.m_whole = m_whole - a.m_whole; 00252 #else 00253 r.m_halfs.low = m_halfs.low - a.m_halfs.low; 00254 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); 00255 #endif 00256 return r; 00257 } 00258 00259 DWord operator-(word a) 00260 { 00261 DWord r; 00262 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00263 r.m_whole = m_whole - a; 00264 #else 00265 r.m_halfs.low = m_halfs.low - a; 00266 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); 00267 #endif 00268 return r; 00269 } 00270 00271 // returns quotient, which must fit in a word 00272 word operator/(word divisor); 00273 00274 word operator%(word a); 00275 00276 bool operator!() const 00277 { 00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00279 return !m_whole; 00280 #else 00281 return !m_halfs.high && !m_halfs.low; 00282 #endif 00283 } 00284 00285 word GetLowHalf() const {return m_halfs.low;} 00286 word GetHighHalf() const {return m_halfs.high;} 00287 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} 00288 00289 private: 00290 union 00291 { 00292 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00293 dword m_whole; 00294 #endif 00295 struct 00296 { 00297 #ifdef IS_LITTLE_ENDIAN 00298 word low; 00299 word high; 00300 #else 00301 word high; 00302 word low; 00303 #endif 00304 } m_halfs; 00305 }; 00306 }; 00307 00308 class Word 00309 { 00310 public: 00311 Word() {} 00312 00313 Word(word value) 00314 { 00315 m_whole = value; 00316 } 00317 00318 Word(hword low, hword high) 00319 { 00320 m_whole = low | (word(high) << (WORD_BITS/2)); 00321 } 00322 00323 static Word Multiply(hword a, hword b) 00324 { 00325 Word r; 00326 r.m_whole = (word)a * b; 00327 return r; 00328 } 00329 00330 Word operator-(Word a) 00331 { 00332 Word r; 00333 r.m_whole = m_whole - a.m_whole; 00334 return r; 00335 } 00336 00337 Word operator-(hword a) 00338 { 00339 Word r; 00340 r.m_whole = m_whole - a; 00341 return r; 00342 } 00343 00344 // returns quotient, which must fit in a word 00345 hword operator/(hword divisor) 00346 { 00347 return hword(m_whole / divisor); 00348 } 00349 00350 bool operator!() const 00351 { 00352 return !m_whole; 00353 } 00354 00355 word GetWhole() const {return m_whole;} 00356 hword GetLowHalf() const {return hword(m_whole);} 00357 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} 00358 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} 00359 00360 private: 00361 word m_whole; 00362 }; 00363 00364 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A 00365 template <class S, class D> 00366 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) 00367 { 00368 // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S 00369 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); 00370 00371 // estimate the quotient: do a 2 S by 1 S divide 00372 S Q; 00373 if (S(B1+1) == 0) 00374 Q = A[2]; 00375 else 00376 Q = D(A[1], A[2]) / S(B1+1); 00377 00378 // now subtract Q*B from A 00379 D p = D::Multiply(B0, Q); 00380 D u = (D) A[0] - p.GetLowHalf(); 00381 A[0] = u.GetLowHalf(); 00382 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); 00383 A[1] = u.GetLowHalf(); 00384 A[2] += u.GetHighHalf(); 00385 00386 // Q <= actual quotient, so fix it 00387 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) 00388 { 00389 u = (D) A[0] - B0; 00390 A[0] = u.GetLowHalf(); 00391 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); 00392 A[1] = u.GetLowHalf(); 00393 A[2] += u.GetHighHalf(); 00394 Q++; 00395 assert(Q); // shouldn't overflow 00396 } 00397 00398 return Q; 00399 } 00400 00401 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 00402 template <class S, class D> 00403 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) 00404 { 00405 if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) 00406 return D(Ah.GetLowHalf(), Ah.GetHighHalf()); 00407 else 00408 { 00409 S Q[2]; 00410 T[0] = Al.GetLowHalf(); 00411 T[1] = Al.GetHighHalf(); 00412 T[2] = Ah.GetLowHalf(); 00413 T[3] = Ah.GetHighHalf(); 00414 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); 00415 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); 00416 return D(Q[0], Q[1]); 00417 } 00418 } 00419 00420 // returns quotient, which must fit in a word 00421 inline word DWord::operator/(word a) 00422 { 00423 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00424 return word(m_whole / a); 00425 #else 00426 hword r[4]; 00427 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); 00428 #endif 00429 } 00430 00431 inline word DWord::operator%(word a) 00432 { 00433 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00434 return word(m_whole % a); 00435 #else 00436 if (a < (word(1) << (WORD_BITS/2))) 00437 { 00438 hword h = hword(a); 00439 word r = m_halfs.high % h; 00440 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; 00441 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); 00442 } 00443 else 00444 { 00445 hword r[4]; 00446 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); 00447 return Word(r[0], r[1]).GetWhole(); 00448 } 00449 #endif 00450 } 00451 00452 // ******************************************************** 00453 00454 class Portable 00455 { 00456 public: 00457 static word Add(word *C, const word *A, const word *B, unsigned int N); 00458 static word Subtract(word *C, const word *A, const word *B, unsigned int N); 00459 00460 static inline void Multiply2(word *C, const word *A, const word *B); 00461 static inline word Multiply2Add(word *C, const word *A, const word *B); 00462 static void Multiply4(word *C, const word *A, const word *B); 00463 static void Multiply8(word *C, const word *A, const word *B); 00464 static inline unsigned int MultiplyRecursionLimit() {return 8;} 00465 00466 static inline void Multiply2Bottom(word *C, const word *A, const word *B); 00467 static void Multiply4Bottom(word *C, const word *A, const word *B); 00468 static void Multiply8Bottom(word *C, const word *A, const word *B); 00469 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;} 00470 00471 static void Square2(word *R, const word *A); 00472 static void Square4(word *R, const word *A); 00473 static void Square8(word *R, const word *A) {assert(false);} 00474 static inline unsigned int SquareRecursionLimit() {return 4;} 00475 }; 00476 00477 word Portable::Add(word *C, const word *A, const word *B, unsigned int N) 00478 { 00479 assert (N%2 == 0); 00480 00481 DWord u(0, 0); 00482 for (unsigned int i = 0; i < N; i+=2) 00483 { 00484 u = DWord(A[i]) + B[i] + u.GetHighHalf(); 00485 C[i] = u.GetLowHalf(); 00486 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf(); 00487 C[i+1] = u.GetLowHalf(); 00488 } 00489 return u.GetHighHalf(); 00490 } 00491 00492 word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N) 00493 { 00494 assert (N%2 == 0); 00495 00496 DWord u(0, 0); 00497 for (unsigned int i = 0; i < N; i+=2) 00498 { 00499 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow(); 00500 C[i] = u.GetLowHalf(); 00501 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow(); 00502 C[i+1] = u.GetLowHalf(); 00503 } 00504 return 0-u.GetHighHalf(); 00505 } 00506 00507 void Portable::Multiply2(word *C, const word *A, const word *B) 00508 { 00509 /* 00510 word s; 00511 dword d; 00512 00513 if (A1 >= A0) 00514 if (B0 >= B1) 00515 { 00516 s = 0; 00517 d = (dword)(A1-A0)*(B0-B1); 00518 } 00519 else 00520 { 00521 s = (A1-A0); 00522 d = (dword)s*(word)(B0-B1); 00523 } 00524 else 00525 if (B0 > B1) 00526 { 00527 s = (B0-B1); 00528 d = (word)(A1-A0)*(dword)s; 00529 } 00530 else 00531 { 00532 s = 0; 00533 d = (dword)(A0-A1)*(B1-B0); 00534 } 00535 */ 00536 // this segment is the branchless equivalent of above 00537 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]}; 00538 unsigned int ai = A[1] < A[0]; 00539 unsigned int bi = B[0] < B[1]; 00540 unsigned int di = ai & bi; 00541 DWord d = DWord::Multiply(D[di], D[di+2]); 00542 D[1] = D[3] = 0; 00543 unsigned int si = ai + !bi; 00544 word s = D[si]; 00545 00546 DWord A0B0 = DWord::Multiply(A[0], B[0]); 00547 C[0] = A0B0.GetLowHalf(); 00548 00549 DWord A1B1 = DWord::Multiply(A[1], B[1]); 00550 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf(); 00551 C[1] = t.GetLowHalf(); 00552 00553 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s; 00554 C[2] = t.GetLowHalf(); 00555 C[3] = t.GetHighHalf(); 00556 } 00557 00558 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B) 00559 { 00560 DWord t = DWord::Multiply(A[0], B[0]); 00561 C[0] = t.GetLowHalf(); 00562 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0]; 00563 } 00564 00565 word Portable::Multiply2Add(word *C, const word *A, const word *B) 00566 { 00567 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]}; 00568 unsigned int ai = A[1] < A[0]; 00569 unsigned int bi = B[0] < B[1]; 00570 unsigned int di = ai & bi; 00571 DWord d = DWord::Multiply(D[di], D[di+2]); 00572 D[1] = D[3] = 0; 00573 unsigned int si = ai + !bi; 00574 word s = D[si]; 00575 00576 DWord A0B0 = DWord::Multiply(A[0], B[0]); 00577 DWord t = A0B0 + C[0]; 00578 C[0] = t.GetLowHalf(); 00579 00580 DWord A1B1 = DWord::Multiply(A[1], B[1]); 00581 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1]; 00582 C[1] = t.GetLowHalf(); 00583 00584 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2]; 00585 C[2] = t.GetLowHalf(); 00586 00587 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3]; 00588 C[3] = t.GetLowHalf(); 00589 return t.GetHighHalf(); 00590 } 00591 00592 #define MulAcc(x, y) \ 00593 p = DWord::MultiplyAndAdd(A[x], B[y], c); \ 00594 c = p.GetLowHalf(); \ 00595 p = (DWord) d + p.GetHighHalf(); \ 00596 d = p.GetLowHalf(); \ 00597 e += p.GetHighHalf(); 00598 00599 #define SaveMulAcc(s, x, y) \ 00600 R[s] = c; \ 00601 p = DWord::MultiplyAndAdd(A[x], B[y], d); \ 00602 c = p.GetLowHalf(); \ 00603 p = (DWord) e + p.GetHighHalf(); \ 00604 d = p.GetLowHalf(); \ 00605 e = p.GetHighHalf(); 00606 00607 #define SquAcc(x, y) \ 00608 q = DWord::Multiply(A[x], A[y]); \ 00609 p = q + c; \ 00610 c = p.GetLowHalf(); \ 00611 p = (DWord) d + p.GetHighHalf(); \ 00612 d = p.GetLowHalf(); \ 00613 e += p.GetHighHalf(); \ 00614 p = q + c; \ 00615 c = p.GetLowHalf(); \ 00616 p = (DWord) d + p.GetHighHalf(); \ 00617 d = p.GetLowHalf(); \ 00618 e += p.GetHighHalf(); 00619 00620 #define SaveSquAcc(s, x, y) \ 00621 R[s] = c; \ 00622 q = DWord::Multiply(A[x], A[y]); \ 00623 p = q + d; \ 00624 c = p.GetLowHalf(); \ 00625 p = (DWord) e + p.GetHighHalf(); \ 00626 d = p.GetLowHalf(); \ 00627 e = p.GetHighHalf(); \ 00628 p = q + c; \ 00629 c = p.GetLowHalf(); \ 00630 p = (DWord) d + p.GetHighHalf(); \ 00631 d = p.GetLowHalf(); \ 00632 e += p.GetHighHalf(); 00633 00634 void Portable::Multiply4(word *R, const word *A, const word *B) 00635 { 00636 DWord p; 00637 word c, d, e; 00638 00639 p = DWord::Multiply(A[0], B[0]); 00640 R[0] = p.GetLowHalf(); 00641 c = p.GetHighHalf(); 00642 d = e = 0; 00643 00644 MulAcc(0, 1); 00645 MulAcc(1, 0); 00646 00647 SaveMulAcc(1, 2, 0); 00648 MulAcc(1, 1); 00649 MulAcc(0, 2); 00650 00651 SaveMulAcc(2, 0, 3); 00652 MulAcc(1, 2); 00653 MulAcc(2, 1); 00654 MulAcc(3, 0); 00655 00656 SaveMulAcc(3, 3, 1); 00657 MulAcc(2, 2); 00658 MulAcc(1, 3); 00659 00660 SaveMulAcc(4, 2, 3); 00661 MulAcc(3, 2); 00662 00663 R[5] = c; 00664 p = DWord::MultiplyAndAdd(A[3], B[3], d); 00665 R[6] = p.GetLowHalf(); 00666 R[7] = e + p.GetHighHalf(); 00667 } 00668 00669 void Portable::Square2(word *R, const word *A) 00670 { 00671 DWord p, q; 00672 word c, d, e; 00673 00674 p = DWord::Multiply(A[0], A[0]); 00675 R[0] = p.GetLowHalf(); 00676 c = p.GetHighHalf(); 00677 d = e = 0; 00678 00679 SquAcc(0, 1); 00680 00681 R[1] = c; 00682 p = DWord::MultiplyAndAdd(A[1], A[1], d); 00683 R[2] = p.GetLowHalf(); 00684 R[3] = e + p.GetHighHalf(); 00685 } 00686 00687 void Portable::Square4(word *R, const word *A) 00688 { 00689 #ifdef _MSC_VER 00690 // VC60 workaround: MSVC 6.0 has an optimization bug that makes 00691 // (dword)A*B where either A or B has been cast to a dword before 00692 // very expensive. Revisit this function when this 00693 // bug is fixed. 00694 Multiply4(R, A, A); 00695 #else 00696 const word *B = A; 00697 DWord p, q; 00698 word c, d, e; 00699 00700 p = DWord::Multiply(A[0], A[0]); 00701 R[0] = p.GetLowHalf(); 00702 c = p.GetHighHalf(); 00703 d = e = 0; 00704 00705 SquAcc(0, 1); 00706 00707 SaveSquAcc(1, 2, 0); 00708 MulAcc(1, 1); 00709 00710 SaveSquAcc(2, 0, 3); 00711 SquAcc(1, 2); 00712 00713 SaveSquAcc(3, 3, 1); 00714 MulAcc(2, 2); 00715 00716 SaveSquAcc(4, 2, 3); 00717 00718 R[5] = c; 00719 p = DWord::MultiplyAndAdd(A[3], A[3], d); 00720 R[6] = p.GetLowHalf(); 00721 R[7] = e + p.GetHighHalf(); 00722 #endif 00723 } 00724 00725 void Portable::Multiply8(word *R, const word *A, const word *B) 00726 { 00727 DWord p; 00728 word c, d, e; 00729 00730 p = DWord::Multiply(A[0], B[0]); 00731 R[0] = p.GetLowHalf(); 00732 c = p.GetHighHalf(); 00733 d = e = 0; 00734 00735 MulAcc(0, 1); 00736 MulAcc(1, 0); 00737 00738 SaveMulAcc(1, 2, 0); 00739 MulAcc(1, 1); 00740 MulAcc(0, 2); 00741 00742 SaveMulAcc(2, 0, 3); 00743 MulAcc(1, 2); 00744 MulAcc(2, 1); 00745 MulAcc(3, 0); 00746 00747 SaveMulAcc(3, 0, 4); 00748 MulAcc(1, 3); 00749 MulAcc(2, 2); 00750 MulAcc(3, 1); 00751 MulAcc(4, 0); 00752 00753 SaveMulAcc(4, 0, 5); 00754 MulAcc(1, 4); 00755 MulAcc(2, 3); 00756 MulAcc(3, 2); 00757 MulAcc(4, 1); 00758 MulAcc(5, 0); 00759 00760 SaveMulAcc(5, 0, 6); 00761 MulAcc(1, 5); 00762 MulAcc(2, 4); 00763 MulAcc(3, 3); 00764 MulAcc(4, 2); 00765 MulAcc(5, 1); 00766 MulAcc(6, 0); 00767 00768 SaveMulAcc(6, 0, 7); 00769 MulAcc(1, 6); 00770 MulAcc(2, 5); 00771 MulAcc(3, 4); 00772 MulAcc(4, 3); 00773 MulAcc(5, 2); 00774 MulAcc(6, 1); 00775 MulAcc(7, 0); 00776 00777 SaveMulAcc(7, 1, 7); 00778 MulAcc(2, 6); 00779 MulAcc(3, 5); 00780 MulAcc(4, 4); 00781 MulAcc(5, 3); 00782 MulAcc(6, 2); 00783 MulAcc(7, 1); 00784 00785 SaveMulAcc(8, 2, 7); 00786 MulAcc(3, 6); 00787 MulAcc(4, 5); 00788 MulAcc(5, 4); 00789 MulAcc(6, 3); 00790 MulAcc(7, 2); 00791 00792 SaveMulAcc(9, 3, 7); 00793 MulAcc(4, 6); 00794 MulAcc(5, 5); 00795 MulAcc(6, 4); 00796 MulAcc(7, 3); 00797 00798 SaveMulAcc(10, 4, 7); 00799 MulAcc(5, 6); 00800 MulAcc(6, 5); 00801 MulAcc(7, 4); 00802 00803 SaveMulAcc(11, 5, 7); 00804 MulAcc(6, 6); 00805 MulAcc(7, 5); 00806 00807 SaveMulAcc(12, 6, 7); 00808 MulAcc(7, 6); 00809 00810 R[13] = c; 00811 p = DWord::MultiplyAndAdd(A[7], B[7], d); 00812 R[14] = p.GetLowHalf(); 00813 R[15] = e + p.GetHighHalf(); 00814 } 00815 00816 void Portable::Multiply4Bottom(word *R, const word *A, const word *B) 00817 { 00818 DWord p; 00819 word c, d, e; 00820 00821 p = DWord::Multiply(A[0], B[0]); 00822 R[0] = p.GetLowHalf(); 00823 c = p.GetHighHalf(); 00824 d = e = 0; 00825 00826 MulAcc(0, 1); 00827 MulAcc(1, 0); 00828 00829 SaveMulAcc(1, 2, 0); 00830 MulAcc(1, 1); 00831 MulAcc(0, 2); 00832 00833 R[2] = c; 00834 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0]; 00835 } 00836 00837 void Portable::Multiply8Bottom(word *R, const word *A, const word *B) 00838 { 00839 DWord p; 00840 word c, d, e; 00841 00842 p = DWord::Multiply(A[0], B[0]); 00843 R[0] = p.GetLowHalf(); 00844 c = p.GetHighHalf(); 00845 d = e = 0; 00846 00847 MulAcc(0, 1); 00848 MulAcc(1, 0); 00849 00850 SaveMulAcc(1, 2, 0); 00851 MulAcc(1, 1); 00852 MulAcc(0, 2); 00853 00854 SaveMulAcc(2, 0, 3); 00855 MulAcc(1, 2); 00856 MulAcc(2, 1); 00857 MulAcc(3, 0); 00858 00859 SaveMulAcc(3, 0, 4); 00860 MulAcc(1, 3); 00861 MulAcc(2, 2); 00862 MulAcc(3, 1); 00863 MulAcc(4, 0); 00864 00865 SaveMulAcc(4, 0, 5); 00866 MulAcc(1, 4); 00867 MulAcc(2, 3); 00868 MulAcc(3, 2); 00869 MulAcc(4, 1); 00870 MulAcc(5, 0); 00871 00872 SaveMulAcc(5, 0, 6); 00873 MulAcc(1, 5); 00874 MulAcc(2, 4); 00875 MulAcc(3, 3); 00876 MulAcc(4, 2); 00877 MulAcc(5, 1); 00878 MulAcc(6, 0); 00879 00880 R[6] = c; 00881 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] + 00882 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0]; 00883 } 00884 00885 #undef MulAcc 00886 #undef SaveMulAcc 00887 #undef SquAcc 00888 #undef SaveSquAcc 00889 00890 #ifdef CRYPTOPP_X86ASM_AVAILABLE 00891 00892 // ************** x86 feature detection *************** 00893 00894 static bool s_sse2Enabled = true; 00895 00896 static void CpuId(word32 input, word32 *output) 00897 { 00898 #ifdef __GNUC__ 00899 __asm__ 00900 ( 00901 // save ebx in case -fPIC is being used 00902 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" 00903 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) 00904 : "a" (input) 00905 ); 00906 #else 00907 __asm 00908 { 00909 mov eax, input 00910 cpuid 00911 mov edi, output 00912 mov [edi], eax 00913 mov [edi+4], ebx 00914 mov [edi+8], ecx 00915 mov [edi+12], edx 00916 } 00917 #endif 00918 } 00919 00920 #ifdef SSE2_INTRINSICS_AVAILABLE 00921 #ifndef _MSC_VER 00922 static jmp_buf s_env; 00923 static void SigIllHandler(int) 00924 { 00925 longjmp(s_env, 1); 00926 } 00927 #endif 00928 00929 static bool HasSSE2() 00930 { 00931 if (!s_sse2Enabled) 00932 return false; 00933 00934 word32 cpuid[4]; 00935 CpuId(1, cpuid); 00936 if ((cpuid[3] & (1 << 26)) == 0) 00937 return false; 00938 00939 #ifdef _MSC_VER 00940 __try 00941 { 00942 __asm xorpd xmm0, xmm0 // executing SSE2 instruction 00943 } 00944 __except (1) 00945 { 00946 return false; 00947 } 00948 return true; 00949 #else 00950 typedef void (*SigHandler)(int); 00951 00952 SigHandler oldHandler = signal(SIGILL, SigIllHandler); 00953 if (oldHandler == SIG_ERR) 00954 return false; 00955 00956 bool result = true; 00957 if (setjmp(s_env)) 00958 result = false; 00959 else 00960 __asm __volatile ("xorps %xmm0, %xmm0"); 00961 00962 signal(SIGILL, oldHandler); 00963 return result; 00964 #endif 00965 } 00966 #endif 00967 00968 static bool IsP4() 00969 { 00970 word32 cpuid[4]; 00971 00972 CpuId(0, cpuid); 00973 std::swap(cpuid[2], cpuid[3]); 00974 if (memcmp(cpuid+1, "GenuineIntel", 12) != 0) 00975 return false; 00976 00977 CpuId(1, cpuid); 00978 return ((cpuid[0] >> 8) & 0xf) == 0xf; 00979 } 00980 00981 // ************** Pentium/P4 optimizations *************** 00982 00983 class PentiumOptimized : public Portable 00984 { 00985 public: 00986 static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N); 00987 static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N); 00988 static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B); 00989 static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B); 00990 static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B); 00991 }; 00992 00993 class P4Optimized 00994 { 00995 public: 00996 static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N); 00997 static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N); 00998 #ifdef SSE2_INTRINSICS_AVAILABLE 00999 static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B); 01000 static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B); 01001 static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B); 01002 #endif 01003 }; 01004 01005 typedef word (CRYPTOPP_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N); 01006 typedef void (CRYPTOPP_CDECL * PMul)(word *C, const word *A, const word *B); 01007 01008 static PAddSub s_pAdd, s_pSub; 01009 #ifdef SSE2_INTRINSICS_AVAILABLE 01010 static PMul s_pMul4, s_pMul8, s_pMul8B; 01011 #endif 01012 01013 static void SetPentiumFunctionPointers() 01014 { 01015 if (IsP4()) 01016 { 01017 s_pAdd = &P4Optimized::Add; 01018 s_pSub = &P4Optimized::Subtract; 01019 } 01020 else 01021 { 01022 s_pAdd = &PentiumOptimized::Add; 01023 s_pSub = &PentiumOptimized::Subtract; 01024 } 01025 01026 #ifdef SSE2_INTRINSICS_AVAILABLE 01027 if (HasSSE2()) 01028 { 01029 s_pMul4 = &P4Optimized::Multiply4; 01030 s_pMul8 = &P4Optimized::Multiply8; 01031 s_pMul8B = &P4Optimized::Multiply8Bottom; 01032 } 01033 else 01034 { 01035 s_pMul4 = &PentiumOptimized::Multiply4; 01036 s_pMul8 = &PentiumOptimized::Multiply8; 01037 s_pMul8B = &PentiumOptimized::Multiply8Bottom; 01038 } 01039 #endif 01040 } 01041 01042 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0); 01043 01044 void DisableSSE2() 01045 { 01046 s_sse2Enabled = false; 01047 SetPentiumFunctionPointers(); 01048 } 01049 01050 class LowLevel : public PentiumOptimized 01051 { 01052 public: 01053 inline static word Add(word *C, const word *A, const word *B, unsigned int N) 01054 {return s_pAdd(C, A, B, N);} 01055 inline static word Subtract(word *C, const word *A, const word *B, unsigned int N) 01056 {return s_pSub(C, A, B, N);} 01057 inline static void Square4(word *R, const word *A) 01058 {Multiply4(R, A, A);} 01059 #ifdef SSE2_INTRINSICS_AVAILABLE 01060 inline static void Multiply4(word *C, const word *A, const word *B) 01061 {s_pMul4(C, A, B);} 01062 inline static void Multiply8(word *C, const word *A, const word *B) 01063 {s_pMul8(C, A, B);} 01064 inline static void Multiply8Bottom(word *C, const word *A, const word *B) 01065 {s_pMul8B(C, A, B);} 01066 #endif 01067 }; 01068 01069 // use some tricks to share assembly code between MSVC and GCC 01070 #ifdef _MSC_VER 01071 #define CRYPTOPP_NAKED __declspec(naked) 01072 #define AS1(x) __asm x 01073 #define AS2(x, y) __asm x, y 01074 #define AddPrologue \ 01075 __asm push ebp \ 01076 __asm push ebx \ 01077 __asm push esi \ 01078 __asm push edi \ 01079 __asm mov ecx, [esp+20] \ 01080 __asm mov edx, [esp+24] \ 01081 __asm mov ebx, [esp+28] \ 01082 __asm mov esi, [esp+32] 01083 #define AddEpilogue \ 01084 __asm pop edi \ 01085 __asm pop esi \ 01086 __asm pop ebx \ 01087 __asm pop ebp \ 01088 __asm ret 01089 #define MulPrologue \ 01090 __asm push ebp \ 01091 __asm push ebx \ 01092 __asm push esi \ 01093 __asm push edi \ 01094 __asm mov ecx, [esp+28] \ 01095 __asm mov esi, [esp+24] \ 01096 __asm push [esp+20] 01097 #define MulEpilogue \ 01098 __asm add esp, 4 \ 01099 __asm pop edi \ 01100 __asm pop esi \ 01101 __asm pop ebx \ 01102 __asm pop ebp \ 01103 __asm ret 01104 #else 01105 #define CRYPTOPP_NAKED 01106 #define AS1(x) #x ";" 01107 #define AS2(x, y) #x ", " #y ";" 01108 #define AddPrologue \ 01109 __asm__ __volatile__ \ 01110 ( \ 01111 "push %%ebx;" /* save this manually, in case of -fPIC */ \ 01112 "mov %2, %%ebx;" \ 01113 ".intel_syntax noprefix;" \ 01114 "push ebp;" 01115 #define AddEpilogue \ 01116 "pop ebp;" \ 01117 ".att_syntax prefix;" \ 01118 "pop %%ebx;" \ 01119 : \ 01120 : "c" (C), "d" (A), "m" (B), "S" (N) \ 01121 : "%edi", "memory", "cc" \ 01122 ); 01123 #define MulPrologue \ 01124 __asm__ __volatile__ \ 01125 ( \ 01126 "push %%ebx;" /* save this manually, in case of -fPIC */ \ 01127 "push %%ebp;" \ 01128 "push %0;" \ 01129 ".intel_syntax noprefix;" 01130 #define MulEpilogue \ 01131 "add esp, 4;" \ 01132 "pop ebp;" \ 01133 "pop ebx;" \ 01134 ".att_syntax prefix;" \ 01135 : \ 01136 : "rm" (Z), "S" (X), "c" (Y) \ 01137 : "%eax", "%edx", "%edi", "memory", "cc" \ 01138 ); 01139 #endif 01140 01141 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N) 01142 { 01143 AddPrologue 01144 01145 // now: ebx = B, ecx = C, edx = A, esi = N 01146 AS2( sub ecx, edx) // hold the distance between C & A so we can add this to A to get C 01147 AS2( xor eax, eax) // clear eax 01148 01149 AS2( sub eax, esi) // eax is a negative index from end of B 01150 AS2( lea ebx, [ebx+4*esi]) // ebx is end of B 01151 01152 AS2( sar eax, 1) // unit of eax is now dwords; this also clears the carry flag 01153 AS1( jz loopendAdd) // if no dwords then nothing to do 01154 01155 AS1(loopstartAdd:) 01156 AS2( mov esi,[edx]) // load lower word of A 01157 AS2( mov ebp,[edx+4]) // load higher word of A 01158 01159 AS2( mov edi,[ebx+8*eax]) // load lower word of B 01160 AS2( lea edx,[edx+8]) // advance A and C 01161 01162 AS2( adc esi,edi) // add lower words 01163 AS2( mov edi,[ebx+8*eax+4]) // load higher word of B 01164 01165 AS2( adc ebp,edi) // add higher words 01166 AS1( inc eax) // advance B 01167 01168 AS2( mov [edx+ecx-8],esi) // store lower word result 01169 AS2( mov [edx+ecx-4],ebp) // store higher word result 01170 01171 AS1( jnz loopstartAdd) // loop until eax overflows and becomes zero 01172 01173 AS1(loopendAdd:) 01174 AS2( adc eax, 0) // store carry into eax (return result register) 01175 01176 AddEpilogue 01177 } 01178 01179 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N) 01180 { 01181 AddPrologue 01182 01183 // now: ebx = B, ecx = C, edx = A, esi = N 01184 AS2( sub ecx, edx) // hold the distance between C & A so we can add this to A to get C 01185 AS2( xor eax, eax) // clear eax 01186 01187 AS2( sub eax, esi) // eax is a negative index from end of B 01188 AS2( lea ebx, [ebx+4*esi]) // ebx is end of B 01189 01190 AS2( sar eax, 1) // unit of eax is now dwords; this also clears the carry flag 01191 AS1( jz loopendSub) // if no dwords then nothing to do 01192 01193 AS1(loopstartSub:) 01194 AS2( mov esi,[edx]) // load lower word of A 01195 AS2( mov ebp,[edx+4]) // load higher word of A 01196 01197 AS2( mov edi,[ebx+8*eax]) // load lower word of B 01198 AS2( lea edx,[edx+8]) // advance A and C 01199 01200 AS2( sbb esi,edi) // subtract lower words 01201 AS2( mov edi,[ebx+8*eax+4]) // load higher word of B 01202 01203 AS2( sbb ebp,edi) // subtract higher words 01204 AS1( inc eax) // advance B 01205 01206 AS2( mov [edx+ecx-8],esi) // store lower word result 01207 AS2( mov [edx+ecx-4],ebp) // store higher word result 01208 01209 AS1( jnz loopstartSub) // loop until eax overflows and becomes zero 01210 01211 AS1(loopendSub:) 01212 AS2( adc eax, 0) // store carry into eax (return result register) 01213 01214 AddEpilogue 01215 } 01216 01217 // On Pentium 4, the adc and sbb instructions are very expensive, so avoid them. 01218 01219 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N) 01220 { 01221 AddPrologue 01222 01223 // now: ebx = B, ecx = C, edx = A, esi = N 01224 AS2( xor eax, eax) 01225 AS1( neg esi) 01226 AS1( jz loopendAddP4) // if no dwords then nothing to do 01227 01228 AS2( mov edi, [edx]) 01229 AS2( mov ebp, [ebx]) 01230 AS1( jmp carry1AddP4) 01231 01232 AS1(loopstartAddP4:) 01233 AS2( mov edi, [edx+8]) 01234 AS2( add ecx, 8) 01235 AS2( add edx, 8) 01236 AS2( mov ebp, [ebx]) 01237 AS2( add edi, eax) 01238 AS1( jc carry1AddP4) 01239 AS2( xor eax, eax) 01240 01241 AS1(carry1AddP4:) 01242 AS2( add edi, ebp) 01243 AS2( mov ebp, 1) 01244 AS2( mov [ecx], edi) 01245 AS2( mov edi, [edx+4]) 01246 AS2( cmovc eax, ebp) 01247 AS2( mov ebp, [ebx+4]) 01248 AS2( add ebx, 8) 01249 AS2( add edi, eax) 01250 AS1( jc carry2AddP4) 01251 AS2( xor eax, eax) 01252 01253 AS1(carry2AddP4:) 01254 AS2( add edi, ebp) 01255 AS2( mov ebp, 1) 01256 AS2( cmovc eax, ebp) 01257 AS2( mov [ecx+4], edi) 01258 AS2( add esi, 2) 01259 AS1( jnz loopstartAddP4) 01260 01261 AS1(loopendAddP4:) 01262 01263 AddEpilogue 01264 } 01265 01266 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N) 01267 { 01268 AddPrologue 01269 01270 // now: ebx = B, ecx = C, edx = A, esi = N 01271 AS2( xor eax, eax) 01272 AS1( neg esi) 01273 AS1( jz loopendSubP4) // if no dwords then nothing to do 01274 01275 AS2( mov edi, [edx]) 01276 AS2( mov ebp, [ebx]) 01277 AS1( jmp carry1SubP4) 01278 01279 AS1(loopstartSubP4:) 01280 AS2( mov edi, [edx+8]) 01281 AS2( add edx, 8) 01282 AS2( add ecx, 8) 01283 AS2( mov ebp, [ebx]) 01284 AS2( sub edi, eax) 01285 AS1( jc carry1SubP4) 01286 AS2( xor eax, eax) 01287 01288 AS1(carry1SubP4:) 01289 AS2( sub edi, ebp) 01290 AS2( mov ebp, 1) 01291 AS2( mov [ecx], edi) 01292 AS2( mov edi, [edx+4]) 01293 AS2( cmovc eax, ebp) 01294 AS2( mov ebp, [ebx+4]) 01295 AS2( add ebx, 8) 01296 AS2( sub edi, eax) 01297 AS1( jc carry2SubP4) 01298 AS2( xor eax, eax) 01299 01300 AS1(carry2SubP4:) 01301 AS2( sub edi, ebp) 01302 AS2( mov ebp, 1) 01303 AS2( cmovc eax, ebp) 01304 AS2( mov [ecx+4], edi) 01305 AS2( add esi, 2) 01306 AS1( jnz loopstartSubP4) 01307 01308 AS1(loopendSubP4:) 01309 01310 AddEpilogue 01311 } 01312 01313 // multiply assembly code originally contributed by Leonard Janke 01314 01315 #define MulStartup \ 01316 AS2(xor ebp, ebp) \ 01317 AS2(xor edi, edi) \ 01318 AS2(xor ebx, ebx) 01319 01320 #define MulShiftCarry \ 01321 AS2(mov ebp, edx) \ 01322 AS2(mov edi, ebx) \ 01323 AS2(xor ebx, ebx) 01324 01325 #define MulAccumulateBottom(i,j) \ 01326 AS2(mov eax, [ecx+4*j]) \ 01327 AS2(imul eax, dword ptr [esi+4*i]) \ 01328 AS2(add ebp, eax) 01329 01330 #define MulAccumulate(i,j) \ 01331 AS2(mov eax, [ecx+4*j]) \ 01332 AS1(mul dword ptr [esi+4*i]) \ 01333 AS2(add ebp, eax) \ 01334 AS2(adc edi, edx) \ 01335 AS2(adc bl, bh) 01336 01337 #define MulStoreDigit(i) \ 01338 AS2(mov edx, edi) \ 01339 AS2(mov edi, [esp]) \ 01340 AS2(mov [edi+4*i], ebp) 01341 01342 #define MulLastDiagonal(digits) \ 01343 AS2(mov eax, [ecx+4*(digits-1)]) \ 01344 AS1(mul dword ptr [esi+4*(digits-1)]) \ 01345 AS2(add ebp, eax) \ 01346 AS2(adc edx, edi) \ 01347 AS2(mov edi, [esp]) \ 01348 AS2(mov [edi+4*(2*digits-2)], ebp) \ 01349 AS2(mov [edi+4*(2*digits-1)], edx) 01350 01351 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y) 01352 { 01353 MulPrologue 01354 // now: [esp] = Z, esi = X, ecx = Y 01355 MulStartup 01356 MulAccumulate(0,0) 01357 MulStoreDigit(0) 01358 MulShiftCarry 01359 01360 MulAccumulate(1,0) 01361 MulAccumulate(0,1) 01362 MulStoreDigit(1) 01363 MulShiftCarry 01364 01365 MulAccumulate(2,0) 01366 MulAccumulate(1,1) 01367 MulAccumulate(0,2) 01368 MulStoreDigit(2) 01369 MulShiftCarry 01370 01371 MulAccumulate(3,0) 01372 MulAccumulate(2,1) 01373 MulAccumulate(1,2) 01374 MulAccumulate(0,3) 01375 MulStoreDigit(3) 01376 MulShiftCarry 01377 01378 MulAccumulate(3,1) 01379 MulAccumulate(2,2) 01380 MulAccumulate(1,3) 01381 MulStoreDigit(4) 01382 MulShiftCarry 01383 01384 MulAccumulate(3,2) 01385 MulAccumulate(2,3) 01386 MulStoreDigit(5) 01387 MulShiftCarry 01388 01389 MulLastDiagonal(4) 01390 MulEpilogue 01391 } 01392 01393 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y) 01394 { 01395 MulPrologue 01396 // now: [esp] = Z, esi = X, ecx = Y 01397 MulStartup 01398 MulAccumulate(0,0) 01399 MulStoreDigit(0) 01400 MulShiftCarry 01401 01402 MulAccumulate(1,0) 01403 MulAccumulate(0,1) 01404 MulStoreDigit(1) 01405 MulShiftCarry 01406 01407 MulAccumulate(2,0) 01408 MulAccumulate(1,1) 01409 MulAccumulate(0,2) 01410 MulStoreDigit(2) 01411 MulShiftCarry 01412 01413 MulAccumulate(3,0) 01414 MulAccumulate(2,1) 01415 MulAccumulate(1,2) 01416 MulAccumulate(0,3) 01417 MulStoreDigit(3) 01418 MulShiftCarry 01419 01420 MulAccumulate(4,0) 01421 MulAccumulate(3,1) 01422 MulAccumulate(2,2) 01423 MulAccumulate(1,3) 01424 MulAccumulate(0,4) 01425 MulStoreDigit(4) 01426 MulShiftCarry 01427 01428 MulAccumulate(5,0) 01429 MulAccumulate(4,1) 01430 MulAccumulate(3,2) 01431 MulAccumulate(2,3) 01432 MulAccumulate(1,4) 01433 MulAccumulate(0,5) 01434 MulStoreDigit(5) 01435 MulShiftCarry 01436 01437 MulAccumulate(6,0) 01438 MulAccumulate(5,1) 01439 MulAccumulate(4,2) 01440 MulAccumulate(3,3) 01441 MulAccumulate(2,4) 01442 MulAccumulate(1,5) 01443 MulAccumulate(0,6) 01444 MulStoreDigit(6) 01445 MulShiftCarry 01446 01447 MulAccumulate(7,0) 01448 MulAccumulate(6,1) 01449 MulAccumulate(5,2) 01450 MulAccumulate(4,3) 01451 MulAccumulate(3,4) 01452 MulAccumulate(2,5) 01453 MulAccumulate(1,6) 01454 MulAccumulate(0,7) 01455 MulStoreDigit(7) 01456 MulShiftCarry 01457 01458 MulAccumulate(7,1) 01459 MulAccumulate(6,2) 01460 MulAccumulate(5,3) 01461 MulAccumulate(4,4) 01462 MulAccumulate(3,5) 01463 MulAccumulate(2,6) 01464 MulAccumulate(1,7) 01465 MulStoreDigit(8) 01466 MulShiftCarry 01467 01468 MulAccumulate(7,2) 01469 MulAccumulate(6,3) 01470 MulAccumulate(5,4) 01471 MulAccumulate(4,5) 01472 MulAccumulate(3,6) 01473 MulAccumulate(2,7) 01474 MulStoreDigit(9) 01475 MulShiftCarry 01476 01477 MulAccumulate(7,3) 01478 MulAccumulate(6,4) 01479 MulAccumulate(5,5) 01480 MulAccumulate(4,6) 01481 MulAccumulate(3,7) 01482 MulStoreDigit(10) 01483 MulShiftCarry 01484 01485 MulAccumulate(7,4) 01486 MulAccumulate(6,5) 01487 MulAccumulate(5,6) 01488 MulAccumulate(4,7) 01489 MulStoreDigit(11) 01490 MulShiftCarry 01491 01492 MulAccumulate(7,5) 01493 MulAccumulate(6,6) 01494 MulAccumulate(5,7) 01495 MulStoreDigit(12) 01496 MulShiftCarry 01497 01498 MulAccumulate(7,6) 01499 MulAccumulate(6,7) 01500 MulStoreDigit(13) 01501 MulShiftCarry 01502 01503 MulLastDiagonal(8) 01504 MulEpilogue 01505 } 01506 01507 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y) 01508 { 01509 MulPrologue 01510 // now: [esp] = Z, esi = X, ecx = Y 01511 MulStartup 01512 MulAccumulate(0,0) 01513 MulStoreDigit(0) 01514 MulShiftCarry 01515 01516 MulAccumulate(1,0) 01517 MulAccumulate(0,1) 01518 MulStoreDigit(1) 01519 MulShiftCarry 01520 01521 MulAccumulate(2,0) 01522 MulAccumulate(1,1) 01523 MulAccumulate(0,2) 01524 MulStoreDigit(2) 01525 MulShiftCarry 01526 01527 MulAccumulate(3,0) 01528 MulAccumulate(2,1) 01529 MulAccumulate(1,2) 01530 MulAccumulate(0,3) 01531 MulStoreDigit(3) 01532 MulShiftCarry 01533 01534 MulAccumulate(4,0) 01535 MulAccumulate(3,1) 01536 MulAccumulate(2,2) 01537 MulAccumulate(1,3) 01538 MulAccumulate(0,4) 01539 MulStoreDigit(4) 01540 MulShiftCarry 01541 01542 MulAccumulate(5,0) 01543 MulAccumulate(4,1) 01544 MulAccumulate(3,2) 01545 MulAccumulate(2,3) 01546 MulAccumulate(1,4) 01547 MulAccumulate(0,5) 01548 MulStoreDigit(5) 01549 MulShiftCarry 01550 01551 MulAccumulate(6,0) 01552 MulAccumulate(5,1) 01553 MulAccumulate(4,2) 01554 MulAccumulate(3,3) 01555 MulAccumulate(2,4) 01556 MulAccumulate(1,5) 01557 MulAccumulate(0,6) 01558 MulStoreDigit(6) 01559 MulShiftCarry 01560 01561 MulAccumulateBottom(7,0) 01562 MulAccumulateBottom(6,1) 01563 MulAccumulateBottom(5,2) 01564 MulAccumulateBottom(4,3) 01565 MulAccumulateBottom(3,4) 01566 MulAccumulateBottom(2,5) 01567 MulAccumulateBottom(1,6) 01568 MulAccumulateBottom(0,7) 01569 MulStoreDigit(7) 01570 MulEpilogue 01571 } 01572 01573 #undef AS1 01574 #undef AS2 01575 01576 #else // not x86 - no processor specific code at this layer 01577 01578 typedef Portable LowLevel; 01579 01580 #endif 01581 01582 #ifdef SSE2_INTRINSICS_AVAILABLE 01583 01584 #ifdef __GNUC__ 01585 #define CRYPTOPP_FASTCALL 01586 #else 01587 #define CRYPTOPP_FASTCALL __fastcall 01588 #endif 01589 01590 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B) 01591 { 01592 __m128i a3210 = _mm_load_si128(A); 01593 __m128i b3210 = _mm_load_si128(B); 01594 01595 __m128i sum; 01596 01597 __m128i z = _mm_setzero_si128(); 01598 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210); 01599 C[0] = a2b2_a0b0; 01600 01601 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0)); 01602 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1)); 01603 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021); 01604 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z); 01605 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z); 01606 C[1] = _mm_add_epi64(a1b0, a0b1); 01607 01608 __m128i a31 = _mm_srli_epi64(a3210, 32); 01609 __m128i b31 = _mm_srli_epi64(b3210, 32); 01610 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31); 01611 C[6] = a3b3_a1b1; 01612 01613 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z); 01614 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2)); 01615 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012); 01616 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z); 01617 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z); 01618 sum = _mm_add_epi64(a1b1, a0b2); 01619 C[2] = _mm_add_epi64(sum, a2b0); 01620 01621 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1)); 01622 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3)); 01623 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012); 01624 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103); 01625 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z); 01626 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z); 01627 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z); 01628 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z); 01629 __m128i sum1 = _mm_add_epi64(a3b0, a1b2); 01630 sum = _mm_add_epi64(a2b1, a0b3); 01631 C[3] = _mm_add_epi64(sum, sum1); 01632 01633 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103); 01634 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z); 01635 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z); 01636 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z); 01637 sum = _mm_add_epi64(a2b2, a3b1); 01638 C[4] = _mm_add_epi64(sum, a1b3); 01639 01640 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2)); 01641 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3)); 01642 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203); 01643 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z); 01644 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z); 01645 C[5] = _mm_add_epi64(a3b2, a2b3); 01646 } 01647 01648 void P4Optimized::Multiply4(word *C, const word *A, const word *B) 01649 { 01650 __m128i temp[7]; 01651 const word *w = (word *)temp; 01652 const __m64 *mw = (__m64 *)w; 01653 01654 P4_Mul(temp, (__m128i *)A, (__m128i *)B); 01655 01656 C[0] = w[0]; 01657 01658 __m64 s1, s2; 01659 01660 __m64 w1 = _mm_cvtsi32_si64(w[1]); 01661 __m64 w4 = mw[2]; 01662 __m64 w6 = mw[3]; 01663 __m64 w8 = mw[4]; 01664 __m64 w10 = mw[5]; 01665 __m64 w12 = mw[6]; 01666 __m64 w14 = mw[7]; 01667 __m64 w16 = mw[8]; 01668 __m64 w18 = mw[9]; 01669 __m64 w20 = mw[10]; 01670 __m64 w22 = mw[11]; 01671 __m64 w26 = _mm_cvtsi32_si64(w[26]); 01672 01673 s1 = _mm_add_si64(w1, w4); 01674 C[1] = _mm_cvtsi64_si32(s1); 01675 s1 = _mm_srli_si64(s1, 32); 01676 01677 s2 = _mm_add_si64(w6, w8); 01678 s1 = _mm_add_si64(s1, s2); 01679 C[2] = _mm_cvtsi64_si32(s1); 01680 s1 = _mm_srli_si64(s1, 32); 01681 01682 s2 = _mm_add_si64(w10, w12); 01683 s1 = _mm_add_si64(s1, s2); 01684 C[3] = _mm_cvtsi64_si32(s1); 01685 s1 = _mm_srli_si64(s1, 32); 01686 01687 s2 = _mm_add_si64(w14, w16); 01688 s1 = _mm_add_si64(s1, s2); 01689 C[4] = _mm_cvtsi64_si32(s1); 01690 s1 = _mm_srli_si64(s1, 32); 01691 01692 s2 = _mm_add_si64(w18, w20); 01693 s1 = _mm_add_si64(s1, s2); 01694 C[5] = _mm_cvtsi64_si32(s1); 01695 s1 = _mm_srli_si64(s1, 32); 01696 01697 s2 = _mm_add_si64(w22, w26); 01698 s1 = _mm_add_si64(s1, s2); 01699 C[6] = _mm_cvtsi64_si32(s1); 01700 s1 = _mm_srli_si64(s1, 32); 01701 01702 C[7] = _mm_cvtsi64_si32(s1) + w[27]; 01703 _mm_empty(); 01704 } 01705 01706 void P4Optimized::Multiply8(word *C, const word *A, const word *B) 01707 { 01708 __m128i temp[28]; 01709 const word *w = (word *)temp; 01710 const __m64 *mw = (__m64 *)w; 01711 const word *x = (word *)temp+7*4; 01712 const __m64 *mx = (__m64 *)x; 01713 const word *y = (word *)temp+7*4*2; 01714 const __m64 *my = (__m64 *)y; 01715 const word *z = (word *)temp+7*4*3; 01716 const __m64 *mz = (__m64 *)z; 01717 01718 P4_Mul(temp, (__m128i *)A, (__m128i *)B); 01719 01720 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B); 01721 01722 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1); 01723 01724 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1); 01725 01726 C[0] = w[0]; 01727 01728 __m64 s1, s2, s3, s4; 01729 01730 __m64 w1 = _mm_cvtsi32_si64(w[1]); 01731 __m64 w4 = mw[2]; 01732 __m64 w6 = mw[3]; 01733 __m64 w8 = mw[4]; 01734 __m64 w10 = mw[5]; 01735 __m64 w12 = mw[6]; 01736 __m64 w14 = mw[7]; 01737 __m64 w16 = mw[8]; 01738 __m64 w18 = mw[9]; 01739 __m64 w20 = mw[10]; 01740 __m64 w22 = mw[11]; 01741 __m64 w26 = _mm_cvtsi32_si64(w[26]); 01742 __m64 w27 = _mm_cvtsi32_si64(w[27]); 01743 01744 __m64 x0 = _mm_cvtsi32_si64(x[0]); 01745 __m64 x1 = _mm_cvtsi32_si64(x[1]); 01746 __m64 x4 = mx[2]; 01747 __m64 x6 = mx[3]; 01748 __m64 x8 = mx[4]; 01749 __m64 x10 = mx[5]; 01750 __m64 x12 = mx[6]; 01751 __m64 x14 = mx[7]; 01752 __m64 x16 = mx[8]; 01753 __m64 x18 = mx[9]; 01754 __m64 x20 = mx[10]; 01755 __m64 x22 = mx[11]; 01756 __m64 x26 = _mm_cvtsi32_si64(x[26]); 01757 __m64 x27 = _mm_cvtsi32_si64(x[27]); 01758 01759 __m64 y0 = _mm_cvtsi32_si64(y[0]); 01760 __m64 y1 = _mm_cvtsi32_si64(y[1]); 01761 __m64 y4 = my[2]; 01762 __m64 y6 = my[3]; 01763 __m64 y8 = my[4]; 01764 __m64 y10 = my[5]; 01765 __m64 y12 = my[6]; 01766 __m64 y14 = my[7]; 01767 __m64 y16 = my[8]; 01768 __m64 y18 = my[9]; 01769 __m64 y20 = my[10]; 01770 __m64 y22 = my[11]; 01771 __m64 y26 = _mm_cvtsi32_si64(y[26]); 01772 __m64 y27 = _mm_cvtsi32_si64(y[27]); 01773 01774 __m64 z0 = _mm_cvtsi32_si64(z[0]); 01775 __m64 z1 = _mm_cvtsi32_si64(z[1]); 01776 __m64 z4 = mz[2]; 01777 __m64 z6 = mz[3]; 01778 __m64 z8 = mz[4]; 01779 __m64 z10 = mz[5]; 01780 __m64 z12 = mz[6]; 01781 __m64 z14 = mz[7]; 01782 __m64 z16 = mz[8]; 01783 __m64 z18 = mz[9]; 01784 __m64 z20 = mz[10]; 01785 __m64 z22 = mz[11]; 01786 __m64 z26 = _mm_cvtsi32_si64(z[26]); 01787 01788 s1 = _mm_add_si64(w1, w4); 01789 C[1] = _mm_cvtsi64_si32(s1); 01790 s1 = _mm_srli_si64(s1, 32); 01791 01792 s2 = _mm_add_si64(w6, w8); 01793 s1 = _mm_add_si64(s1, s2); 01794 C[2] = _mm_cvtsi64_si32(s1); 01795 s1 = _mm_srli_si64(s1, 32); 01796 01797 s2 = _mm_add_si64(w10, w12); 01798 s1 = _mm_add_si64(s1, s2); 01799 C[3] = _mm_cvtsi64_si32(s1); 01800 s1 = _mm_srli_si64(s1, 32); 01801 01802 s3 = _mm_add_si64(x0, y0); 01803 s2 = _mm_add_si64(w14, w16); 01804 s1 = _mm_add_si64(s1, s3); 01805 s1 = _mm_add_si64(s1, s2); 01806 C[4] = _mm_cvtsi64_si32(s1); 01807 s1 = _mm_srli_si64(s1, 32); 01808 01809 s3 = _mm_add_si64(x1, y1); 01810 s4 = _mm_add_si64(x4, y4); 01811 s1 = _mm_add_si64(s1, w18); 01812 s3 = _mm_add_si64(s3, s4); 01813 s1 = _mm_add_si64(s1, w20); 01814 s1 = _mm_add_si64(s1, s3); 01815 C[5] = _mm_cvtsi64_si32(s1); 01816 s1 = _mm_srli_si64(s1, 32); 01817 01818 s3 = _mm_add_si64(x6, y6); 01819 s4 = _mm_add_si64(x8, y8); 01820 s1 = _mm_add_si64(s1, w22); 01821 s3 = _mm_add_si64(s3, s4); 01822 s1 = _mm_add_si64(s1, w26); 01823 s1 = _mm_add_si64(s1, s3); 01824 C[6] = _mm_cvtsi64_si32(s1); 01825 s1 = _mm_srli_si64(s1, 32); 01826 01827 s3 = _mm_add_si64(x10, y10); 01828 s4 = _mm_add_si64(x12, y12); 01829 s1 = _mm_add_si64(s1, w27); 01830 s3 = _mm_add_si64(s3, s4); 01831 s1 = _mm_add_si64(s1, s3); 01832 C[7] = _mm_cvtsi64_si32(s1); 01833 s1 = _mm_srli_si64(s1, 32); 01834 01835 s3 = _mm_add_si64(x14, y14); 01836 s4 = _mm_add_si64(x16, y16); 01837 s1 = _mm_add_si64(s1, z0); 01838 s3 = _mm_add_si64(s3, s4); 01839 s1 = _mm_add_si64(s1, s3); 01840 C[8] = _mm_cvtsi64_si32(s1); 01841 s1 = _mm_srli_si64(s1, 32); 01842 01843 s3 = _mm_add_si64(x18, y18); 01844 s4 = _mm_add_si64(x20, y20); 01845 s1 = _mm_add_si64(s1, z1); 01846 s3 = _mm_add_si64(s3, s4); 01847 s1 = _mm_add_si64(s1, z4); 01848 s1 = _mm_add_si64(s1, s3); 01849 C[9] = _mm_cvtsi64_si32(s1); 01850 s1 = _mm_srli_si64(s1, 32); 01851 01852 s3 = _mm_add_si64(x22, y22); 01853 s4 = _mm_add_si64(x26, y26); 01854 s1 = _mm_add_si64(s1, z6); 01855 s3 = _mm_add_si64(s3, s4); 01856 s1 = _mm_add_si64(s1, z8); 01857 s1 = _mm_add_si64(s1, s3); 01858 C[10] = _mm_cvtsi64_si32(s1); 01859 s1 = _mm_srli_si64(s1, 32); 01860 01861 s3 = _mm_add_si64(x27, y27); 01862 s1 = _mm_add_si64(s1, z10); 01863 s1 = _mm_add_si64(s1, z12); 01864 s1 = _mm_add_si64(s1, s3); 01865 C[11] = _mm_cvtsi64_si32(s1); 01866 s1 = _mm_srli_si64(s1, 32); 01867 01868 s3 = _mm_add_si64(z14, z16); 01869 s1 = _mm_add_si64(s1, s3); 01870 C[12] = _mm_cvtsi64_si32(s1); 01871 s1 = _mm_srli_si64(s1, 32); 01872 01873 s3 = _mm_add_si64(z18, z20); 01874 s1 = _mm_add_si64(s1, s3); 01875 C[13] = _mm_cvtsi64_si32(s1); 01876 s1 = _mm_srli_si64(s1, 32); 01877 01878 s3 = _mm_add_si64(z22, z26); 01879 s1 = _mm_add_si64(s1, s3); 01880 C[14] = _mm_cvtsi64_si32(s1); 01881 s1 = _mm_srli_si64(s1, 32); 01882 01883 C[15] = z[27] + _mm_cvtsi64_si32(s1); 01884 _mm_empty(); 01885 } 01886 01887 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B) 01888 { 01889 __m128i temp[21]; 01890 const word *w = (word *)temp; 01891 const __m64 *mw = (__m64 *)w; 01892 const word *x = (word *)temp+7*4; 01893 const __m64 *mx = (__m64 *)x; 01894 const word *y = (word *)temp+7*4*2; 01895 const __m64 *my = (__m64 *)y; 01896 01897 P4_Mul(temp, (__m128i *)A, (__m128i *)B); 01898 01899 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B); 01900 01901 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1); 01902 01903 C[0] = w[0]; 01904 01905 __m64 s1, s2, s3, s4; 01906 01907 __m64 w1 = _mm_cvtsi32_si64(w[1]); 01908 __m64 w4 = mw[2]; 01909 __m64 w6 = mw[3]; 01910 __m64 w8 = mw[4]; 01911 __m64 w10 = mw[5]; 01912 __m64 w12 = mw[6]; 01913 __m64 w14 = mw[7]; 01914 __m64 w16 = mw[8]; 01915 __m64 w18 = mw[9]; 01916 __m64 w20 = mw[10]; 01917 __m64 w22 = mw[11]; 01918 __m64 w26 = _mm_cvtsi32_si64(w[26]); 01919 01920 __m64 x0 = _mm_cvtsi32_si64(x[0]); 01921 __m64 x1 = _mm_cvtsi32_si64(x[1]); 01922 __m64 x4 = mx[2]; 01923 __m64 x6 = mx[3]; 01924 __m64 x8 = mx[4]; 01925 01926 __m64 y0 = _mm_cvtsi32_si64(y[0]); 01927 __m64 y1 = _mm_cvtsi32_si64(y[1]); 01928 __m64 y4 = my[2]; 01929 __m64 y6 = my[3]; 01930 __m64 y8 = my[4]; 01931 01932 s1 = _mm_add_si64(w1, w4); 01933 C[1] = _mm_cvtsi64_si32(s1); 01934 s1 = _mm_srli_si64(s1, 32); 01935 01936 s2 = _mm_add_si64(w6, w8); 01937 s1 = _mm_add_si64(s1, s2); 01938 C[2] = _mm_cvtsi64_si32(s1); 01939 s1 = _mm_srli_si64(s1, 32); 01940 01941 s2 = _mm_add_si64(w10, w12); 01942 s1 = _mm_add_si64(s1, s2); 01943 C[3] = _mm_cvtsi64_si32(s1); 01944 s1 = _mm_srli_si64(s1, 32); 01945 01946 s3 = _mm_add_si64(x0, y0); 01947 s2 = _mm_add_si64(w14, w16); 01948 s1 = _mm_add_si64(s1, s3); 01949 s1 = _mm_add_si64(s1, s2); 01950 C[4] = _mm_cvtsi64_si32(s1); 01951 s1 = _mm_srli_si64(s1, 32); 01952 01953 s3 = _mm_add_si64(x1, y1); 01954 s4 = _mm_add_si64(x4, y4); 01955 s1 = _mm_add_si64(s1, w18); 01956 s3 = _mm_add_si64(s3, s4); 01957 s1 = _mm_add_si64(s1, w20); 01958 s1 = _mm_add_si64(s1, s3); 01959 C[5] = _mm_cvtsi64_si32(s1); 01960 s1 = _mm_srli_si64(s1, 32); 01961 01962 s3 = _mm_add_si64(x6, y6); 01963 s4 = _mm_add_si64(x8, y8); 01964 s1 = _mm_add_si64(s1, w22); 01965 s3 = _mm_add_si64(s3, s4); 01966 s1 = _mm_add_si64(s1, w26); 01967 s1 = _mm_add_si64(s1, s3); 01968 C[6] = _mm_cvtsi64_si32(s1); 01969 s1 = _mm_srli_si64(s1, 32); 01970 01971 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12]; 01972 _mm_empty(); 01973 } 01974 01975 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE 01976 01977 // ******************************************************** 01978 01979 #define A0 A 01980 #define A1 (A+N2) 01981 #define B0 B 01982 #define B1 (B+N2) 01983 01984 #define T0 T 01985 #define T1 (T+N2) 01986 #define T2 (T+N) 01987 #define T3 (T+N+N2) 01988 01989 #define R0 R 01990 #define R1 (R+N2) 01991 #define R2 (R+N) 01992 #define R3 (R+N+N2) 01993 01994 // R[2*N] - result = A*B 01995 // T[2*N] - temporary work space 01996 // A[N] --- multiplier 01997 // B[N] --- multiplicant 01998 01999 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N) 02000 { 02001 assert(N>=2 && N%2==0); 02002 02003 if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8) 02004 LowLevel::Multiply8(R, A, B); 02005 else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4) 02006 LowLevel::Multiply4(R, A, B); 02007 else if (N==2) 02008 LowLevel::Multiply2(R, A, B); 02009 else 02010 { 02011 const unsigned int N2 = N/2; 02012 int carry; 02013 02014 int aComp = Compare(A0, A1, N2); 02015 int bComp = Compare(B0, B1, N2); 02016 02017 switch (2*aComp + aComp + bComp) 02018 { 02019 case -4: 02020 LowLevel::Subtract(R0, A1, A0, N2); 02021 LowLevel::Subtract(R1, B0, B1, N2); 02022 RecursiveMultiply(T0, T2, R0, R1, N2); 02023 LowLevel::Subtract(T1, T1, R0, N2); 02024 carry = -1; 02025 break; 02026 case -2: 02027 LowLevel::Subtract(R0, A1, A0, N2); 02028 LowLevel::Subtract(R1, B0, B1, N2); 02029 RecursiveMultiply(T0, T2, R0, R1, N2); 02030 carry = 0; 02031 break; 02032 case 2: 02033 LowLevel::Subtract(R0, A0, A1, N2); 02034 LowLevel::Subtract(R1, B1, B0, N2); 02035 RecursiveMultiply(T0, T2, R0, R1, N2); 02036 carry = 0; 02037 break; 02038 case 4: 02039 LowLevel::Subtract(R0, A1, A0, N2); 02040 LowLevel::Subtract(R1, B0, B1, N2); 02041 RecursiveMultiply(T0, T2, R0, R1, N2); 02042 LowLevel::Subtract(T1, T1, R1, N2); 02043 carry = -1; 02044 break; 02045 default: 02046 SetWords(T0, 0, N); 02047 carry = 0; 02048 } 02049 02050 RecursiveMultiply(R0, T2, A0, B0, N2); 02051 RecursiveMultiply(R2, T2, A1, B1, N2); 02052 02053 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1 02054 02055 carry += LowLevel::Add(T0, T0, R0, N); 02056 carry += LowLevel::Add(T0, T0, R2, N); 02057 carry += LowLevel::Add(R1, R1, T0, N); 02058 02059 assert (carry >= 0 && carry <= 2); 02060 Increment(R3, N2, carry); 02061 } 02062 } 02063 02064 // R[2*N] - result = A*A 02065 // T[2*N] - temporary work space 02066 // A[N] --- number to be squared 02067 02068 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N) 02069 { 02070 assert(N && N%2==0); 02071 if (LowLevel::SquareRecursionLimit() >= 8 && N==8) 02072 LowLevel::Square8(R, A); 02073 if (LowLevel::SquareRecursionLimit() >= 4 && N==4) 02074 LowLevel::Square4(R, A); 02075 else if (N==2) 02076 LowLevel::Square2(R, A); 02077 else 02078 { 02079 const unsigned int N2 = N/2; 02080 02081 RecursiveSquare(R0, T2, A0, N2); 02082 RecursiveSquare(R2, T2, A1, N2); 02083 RecursiveMultiply(T0, T2, A0, A1, N2); 02084 02085 word carry = LowLevel::Add(R1, R1, T0, N); 02086 carry += LowLevel::Add(R1, R1, T0, N); 02087 Increment(R3, N2, carry); 02088 } 02089 } 02090 02091 // R[N] - bottom half of A*B 02092 // T[N] - temporary work space 02093 // A[N] - multiplier 02094 // B[N] - multiplicant 02095 02096 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N) 02097 { 02098 assert(N>=2 && N%2==0); 02099 if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8) 02100 LowLevel::Multiply8Bottom(R, A, B); 02101 else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4) 02102 LowLevel::Multiply4Bottom(R, A, B); 02103 else if (N==2) 02104 LowLevel::Multiply2Bottom(R, A, B); 02105 else 02106 { 02107 const unsigned int N2 = N/2; 02108 02109 RecursiveMultiply(R, T, A0, B0, N2); 02110 RecursiveMultiplyBottom(T0, T1, A1, B0, N2); 02111 LowLevel::Add(R1, R1, T0, N2); 02112 RecursiveMultiplyBottom(T0, T1, A0, B1, N2); 02113 LowLevel::Add(R1, R1, T0, N2); 02114 } 02115 } 02116 02117 // R[N] --- upper half of A*B 02118 // T[2*N] - temporary work space 02119 // L[N] --- lower half of A*B 02120 // A[N] --- multiplier 02121 // B[N] --- multiplicant 02122 02123 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N) 02124 { 02125 assert(N>=2 && N%2==0); 02126 02127 if (N==4) 02128 { 02129 LowLevel::Multiply4(T, A, B); 02130 memcpy(R, T+4, 4*WORD_SIZE); 02131 } 02132 else if (N==2) 02133 { 02134 LowLevel::Multiply2(T, A, B); 02135 memcpy(R, T+2, 2*WORD_SIZE); 02136 } 02137 else 02138 { 02139 const unsigned int N2 = N/2; 02140 int carry; 02141 02142 int aComp = Compare(A0, A1, N2); 02143 int bComp = Compare(B0, B1, N2); 02144 02145 switch (2*aComp + aComp + bComp) 02146 { 02147 case -4: 02148 LowLevel::Subtract(R0, A1, A0, N2); 02149 LowLevel::Subtract(R1, B0, B1, N2); 02150 RecursiveMultiply(T0, T2, R0, R1, N2); 02151 LowLevel::Subtract(T1, T1, R0, N2); 02152 carry = -1; 02153 break; 02154 case -2: 02155 LowLevel::Subtract(R0, A1, A0, N2); 02156 LowLevel::Subtract(R1, B0, B1, N2); 02157 RecursiveMultiply(T0, T2, R0, R1, N2); 02158 carry = 0; 02159 break; 02160 case 2: 02161 LowLevel::Subtract(R0, A0, A1, N2); 02162 LowLevel::Subtract(R1, B1, B0, N2); 02163 RecursiveMultiply(T0, T2, R0, R1, N2); 02164 carry = 0; 02165 break; 02166 case 4: 02167 LowLevel::Subtract(R0, A1, A0, N2); 02168 LowLevel::Subtract(R1, B0, B1, N2); 02169 RecursiveMultiply(T0, T2, R0, R1, N2); 02170 LowLevel::Subtract(T1, T1, R1, N2); 02171 carry = -1; 02172 break; 02173 default: 02174 SetWords(T0, 0, N); 02175 carry = 0; 02176 } 02177 02178 RecursiveMultiply(T2, R0, A1, B1, N2); 02179 02180 // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1 02181 02182 word c2 = LowLevel::Subtract(R0, L+N2, L, N2); 02183 c2 += LowLevel::Subtract(R0, R0, T0, N2); 02184 word t = (Compare(R0, T2, N2) == -1); 02185 02186 carry += t; 02187 carry += Increment(R0, N2, c2+t); 02188 carry += LowLevel::Add(R0, R0, T1, N2); 02189 carry += LowLevel::Add(R0, R0, T3, N2); 02190 assert (carry >= 0 && carry <= 2); 02191 02192 CopyWords(R1, T3, N2); 02193 Increment(R1, N2, carry); 02194 } 02195 } 02196 02197 inline word Add(word *C, const word *A, const word *B, unsigned int N) 02198 { 02199 return LowLevel::Add(C, A, B, N); 02200 } 02201 02202 inline word Subtract(word *C, const word *A, const word *B, unsigned int N) 02203 { 02204 return LowLevel::Subtract(C, A, B, N); 02205 } 02206 02207 inline void Multiply(word *R, word *T, const word *A, const word *B, unsigned int N) 02208 { 02209 RecursiveMultiply(R, T, A, B, N); 02210 } 02211 02212 inline void Square(word *R, word *T, const word *A, unsigned int N) 02213 { 02214 RecursiveSquare(R, T, A, N); 02215 } 02216 02217 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N) 02218 { 02219 RecursiveMultiplyBottom(R, T, A, B, N); 02220 } 02221 02222 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N) 02223 { 02224 RecursiveMultiplyTop(R, T, L, A, B, N); 02225 } 02226 02227 static word LinearMultiply(word *C, const word *A, word B, unsigned int N) 02228 { 02229 word carry=0; 02230 for(unsigned i=0; i<N; i++) 02231 { 02232 DWord p = DWord::MultiplyAndAdd(A[i], B, carry); 02233 C[i] = p.GetLowHalf(); 02234 carry = p.GetHighHalf(); 02235 } 02236 return carry; 02237 } 02238 02239 // R[NA+NB] - result = A*B 02240 // T[NA+NB] - temporary work space 02241 // A[NA] ---- multiplier 02242 // B[NB] ---- multiplicant 02243 02244 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB) 02245 { 02246 if (NA == NB) 02247 { 02248 if (A == B) 02249 Square(R, T, A, NA); 02250 else 02251 Multiply(R, T, A, B, NA); 02252 02253 return; 02254 } 02255 02256 if (NA > NB) 02257 { 02258 std::swap(A, B); 02259 std::swap(NA, NB); 02260 } 02261 02262 assert(NB % NA == 0); 02263 assert((NB/NA)%2 == 0); // NB is an even multiple of NA 02264 02265 if (NA==2 && !A[1]) 02266 { 02267 switch (A[0]) 02268 { 02269 case 0: 02270 SetWords(R, 0, NB+2); 02271 return; 02272 case 1: 02273 CopyWords(R, B, NB); 02274 R[NB] = R[NB+1] = 0; 02275 return; 02276 default: 02277 R[NB] = LinearMultiply(R, B, A[0], NB); 02278 R[NB+1] = 0; 02279 return; 02280 } 02281 } 02282 02283 Multiply(R, T, A, B, NA); 02284 CopyWords(T+2*NA, R+NA, NA); 02285 02286 unsigned i; 02287 02288 for (i=2*NA; i<NB; i+=2*NA) 02289 Multiply(T+NA+i, T, A, B+i, NA); 02290 for (i=NA; i<NB; i+=2*NA) 02291 Multiply(R+i, T, A, B+i, NA); 02292 02293 if (Add(R+NA, R+NA, T+2*NA, NB-NA)) 02294 Increment(R+NB, NA); 02295 } 02296 02297 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N) 02298 // T[3*N/2] - temporary work space 02299 // A[N] ----- an odd number as input 02300 02301 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N) 02302 { 02303 if (N==2) 02304 { 02305 T[0] = AtomicInverseModPower2(A[0]); 02306 T[1] = 0; 02307 LowLevel::Multiply2Bottom(T+2, T, A); 02308 TwosComplement(T+2, 2); 02309 Increment(T+2, 2, 2); 02310 LowLevel::Multiply2Bottom(R, T, T+2); 02311 } 02312 else 02313 { 02314 const unsigned int N2 = N/2; 02315 RecursiveInverseModPower2(R0, T0, A0, N2); 02316 T0[0] = 1; 02317 SetWords(T0+1, 0, N2-1); 02318 MultiplyTop(R1, T1, T0, R0, A0, N2); 02319 MultiplyBottom(T0, T1, R0, A1, N2); 02320 Add(T0, R1, T0, N2); 02321 TwosComplement(T0, N2); 02322 MultiplyBottom(R1, T1, R0, T0, N2); 02323 } 02324 } 02325 02326 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M 02327 // T[3*N] - temporary work space 02328 // X[2*N] - number to be reduced 02329 // M[N] --- modulus 02330 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N) 02331 02332 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N) 02333 { 02334 MultiplyBottom(R, T, X, U, N); 02335 MultiplyTop(T, T+N, X, R, M, N); 02336 word borrow = Subtract(T, X+N, T, N); 02337 // defend against timing attack by doing this Add even when not needed 02338 word carry = Add(T+N, T, M, N); 02339 assert(carry || !borrow); 02340 CopyWords(R, T + (borrow ? N : 0), N); 02341 } 02342 02343 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M 02344 // T[2*N] - temporary work space 02345 // X[2*N] - number to be reduced 02346 // M[N] --- modulus 02347 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2) 02348 // V[N] --- 2**(WORD_BITS*3*N/2) mod M 02349 02350 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, unsigned int N) 02351 { 02352 assert(N%2==0 && N>=4); 02353 02354 #define M0 M 02355 #define M1 (M+N2) 02356 #define V0 V 02357 #define V1 (V+N2) 02358 02359 #define X0 X 02360 #define X1 (X+N2) 02361 #define X2 (X+N) 02362 #define X3 (X+N+N2) 02363 02364 const unsigned int N2 = N/2; 02365 Multiply(T0, T2, V0, X3, N2); 02366 int c2 = Add(T0, T0, X0, N); 02367 MultiplyBottom(T3, T2, T0, U, N2); 02368 MultiplyTop(T2, R, T0, T3, M0, N2); 02369 c2 -= Subtract(T2, T1, T2, N2); 02370 Multiply(T0, R, T3, M1, N2); 02371 c2 -= Subtract(T0, T2, T0, N2); 02372 int c3 = -(int)Subtract(T1, X2, T1, N2); 02373 Multiply(R0, T2, V1, X3, N2); 02374 c3 += Add(R, R, T, N); 02375 02376 if (c2>0) 02377 c3 += Increment(R1, N2); 02378 else if (c2<0) 02379 c3 -= Decrement(R1, N2, -c2); 02380 02381 assert(c3>=-1 && c3<=1); 02382 if (c3>0) 02383 Subtract(R, R, M, N); 02384 else if (c3<0) 02385 Add(R, R, M, N); 02386 02387 #undef M0 02388 #undef M1 02389 #undef V0 02390 #undef V1 02391 02392 #undef X0 02393 #undef X1 02394 #undef X2 02395 #undef X3 02396 } 02397 02398 #undef A0 02399 #undef A1 02400 #undef B0 02401 #undef B1 02402 02403 #undef T0 02404 #undef T1 02405 #undef T2 02406 #undef T3 02407 02408 #undef R0 02409 #undef R1 02410 #undef R2 02411 #undef R3 02412 02413 /* 02414 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A 02415 static word SubatomicDivide(word *A, word B0, word B1) 02416 { 02417 // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word 02418 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); 02419 02420 // estimate the quotient: do a 2 word by 1 word divide 02421 word Q; 02422 if (B1+1 == 0) 02423 Q = A[2]; 02424 else 02425 Q = DWord(A[1], A[2]).DividedBy(B1+1); 02426 02427 // now subtract Q*B from A 02428 DWord p = DWord::Multiply(B0, Q); 02429 DWord u = (DWord) A[0] - p.GetLowHalf(); 02430 A[0] = u.GetLowHalf(); 02431 u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); 02432 A[1] = u.GetLowHalf(); 02433 A[2] += u.GetHighHalf(); 02434 02435 // Q <= actual quotient, so fix it 02436 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) 02437 { 02438 u = (DWord) A[0] - B0; 02439 A[0] = u.GetLowHalf(); 02440 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); 02441 A[1] = u.GetLowHalf(); 02442 A[2] += u.GetHighHalf(); 02443 Q++; 02444 assert(Q); // shouldn't overflow 02445 } 02446 02447 return Q; 02448 } 02449 02450 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 02451 static inline void AtomicDivide(word *Q, const word *A, const word *B) 02452 { 02453 if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) 02454 { 02455 Q[0] = A[2]; 02456 Q[1] = A[3]; 02457 } 02458 else 02459 { 02460 word T[4]; 02461 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3]; 02462 Q[1] = SubatomicDivide(T+1, B[0], B[1]); 02463 Q[0] = SubatomicDivide(T, B[0], B[1]); 02464 02465 #ifndef NDEBUG 02466 // multiply quotient and divisor and add remainder, make sure it equals dividend 02467 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); 02468 word P[4]; 02469 LowLevel::Multiply2(P, Q, B); 02470 Add(P, P, T, 4); 02471 assert(memcmp(P, A, 4*WORD_SIZE)==0); 02472 #endif 02473 } 02474 } 02475 */ 02476 02477 static inline void AtomicDivide(word *Q, const word *A, const word *B) 02478 { 02479 word T[4]; 02480 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); 02481 Q[0] = q.GetLowHalf(); 02482 Q[1] = q.GetHighHalf(); 02483 02484 #ifndef NDEBUG 02485 if (B[0] || B[1]) 02486 { 02487 // multiply quotient and divisor and add remainder, make sure it equals dividend 02488 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); 02489 word P[4]; 02490 Portable::Multiply2(P, Q, B); 02491 Add(P, P, T, 4); 02492 assert(memcmp(P, A, 4*WORD_SIZE)==0); 02493 } 02494 #endif 02495 } 02496 02497 // for use by Divide(), corrects the underestimated quotient {Q1,Q0} 02498 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N) 02499 { 02500 assert(N && N%2==0); 02501 02502 if (Q[1]) 02503 { 02504 T[N] = T[N+1] = 0; 02505 unsigned i; 02506 for (i=0; i<N; i+=4) 02507 LowLevel::Multiply2(T+i, Q, B+i); 02508 for (i=2; i<N; i+=4) 02509 if (LowLevel::Multiply2Add(T+i, Q, B+i)) 02510 T[i+5] += (++T[i+4]==0); 02511 } 02512 else 02513 { 02514 T[N] = LinearMultiply(T, B, Q[0], N); 02515 T[N+1] = 0; 02516 } 02517 02518 word borrow = Subtract(R, R, T, N+2); 02519 assert(!borrow && !R[N+1]); 02520 02521 while (R[N] || Compare(R, B, N) >= 0) 02522 { 02523 R[N] -= Subtract(R, R, B, N); 02524 Q[1] += (++Q[0]==0); 02525 assert(Q[0] || Q[1]); // no overflow 02526 } 02527 } 02528 02529 // R[NB] -------- remainder = A%B 02530 // Q[NA-NB+2] --- quotient = A/B 02531 // T[NA+2*NB+4] - temp work space 02532 // A[NA] -------- dividend 02533 // B[NB] -------- divisor 02534 02535 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB) 02536 { 02537 assert(NA && NB && NA%2==0 && NB%2==0); 02538 assert(B[NB-1] || B[NB-2]); 02539 assert(NB <= NA); 02540 02541 // set up temporary work space 02542 word *const TA=T; 02543 word *const TB=T+NA+2; 02544 word *const TP=T+NA+2+NB; 02545 02546 // copy B into TB and normalize it so that TB has highest bit set to 1 02547 unsigned shiftWords = (B[NB-1]==0); 02548 TB[0] = TB[NB-1] = 0; 02549 CopyWords(TB+shiftWords, B, NB-shiftWords); 02550 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); 02551 assert(shiftBits < WORD_BITS); 02552 ShiftWordsLeftByBits(TB, NB, shiftBits); 02553 02554 // copy A into TA and normalize it 02555 TA[0] = TA[NA] = TA[NA+1] = 0; 02556 CopyWords(TA+shiftWords, A, NA); 02557 ShiftWordsLeftByBits(TA, NA+2, shiftBits); 02558 02559 if (TA[NA+1]==0 && TA[NA] <= 1) 02560 { 02561 Q[NA-NB+1] = Q[NA-NB] = 0; 02562 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) 02563 { 02564 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); 02565 ++Q[NA-NB]; 02566 } 02567 } 02568 else 02569 { 02570 NA+=2; 02571 assert(Compare(TA+NA-NB, TB, NB) < 0); 02572 } 02573 02574 word BT[2]; 02575 BT[0] = TB[NB-2] + 1; 02576 BT[1] = TB[NB-1] + (BT[0]==0); 02577 02578 // start reducing TA mod TB, 2 words at a time 02579 for (unsigned i=NA-2; i>=NB; i-=2) 02580 { 02581 AtomicDivide(Q+i-NB, TA+i-2, BT); 02582 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); 02583 } 02584 02585 // copy TA into R, and denormalize it 02586 CopyWords(R, TA+shiftWords, NB); 02587 ShiftWordsRightByBits(R, NB, shiftBits); 02588 } 02589 02590 static inline unsigned int EvenWordCount(const word *X, unsigned int N) 02591 { 02592 while (N && X[N-2]==0 && X[N-1]==0) 02593 N-=2; 02594 return N; 02595 } 02596 02597 // return k 02598 // R[N] --- result = A^(-1) * 2^k mod M 02599 // T[4*N] - temporary work space 02600 // A[NA] -- number to take inverse of 02601 // M[N] --- modulus 02602 02603 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N) 02604 { 02605 assert(NA<=N && N && N%2==0); 02606 02607 word *b = T; 02608 word *c = T+N; 02609 word *f = T+2*N; 02610 word *g = T+3*N; 02611 unsigned int bcLen=2, fgLen=EvenWordCount(M, N); 02612 unsigned int k=0, s=0; 02613 02614 SetWords(T, 0, 3*N); 02615 b[0]=1; 02616 CopyWords(f, A, NA); 02617 CopyWords(g, M, N); 02618 02619 while (1) 02620 { 02621 word t=f[0]; 02622 while (!t) 02623 { 02624 if (EvenWordCount(f, fgLen)==0) 02625 { 02626 SetWords(R, 0, N); 02627 return 0; 02628 } 02629 02630 ShiftWordsRightByWords(f, fgLen, 1); 02631 if (c[bcLen-1]) bcLen+=2; 02632 assert(bcLen <= N); 02633 ShiftWordsLeftByWords(c, bcLen, 1); 02634 k+=WORD_BITS; 02635 t=f[0]; 02636 } 02637 02638 unsigned int i=0; 02639 while (t%2 == 0) 02640 { 02641 t>>=1; 02642 i++; 02643 } 02644 k+=i; 02645 02646 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2) 02647 { 02648 if (s%2==0) 02649 CopyWords(R, b, N); 02650 else 02651 Subtract(R, M, b, N); 02652 return k; 02653 } 02654 02655 ShiftWordsRightByBits(f, fgLen, i); 02656 t=ShiftWordsLeftByBits(c, bcLen, i); 02657 if (t) 02658 { 02659 c[bcLen] = t; 02660 bcLen+=2; 02661 assert(bcLen <= N); 02662 } 02663 02664 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0) 02665 fgLen-=2; 02666 02667 if (Compare(f, g, fgLen)==-1) 02668 { 02669 std::swap(f, g); 02670 std::swap(b, c); 02671 s++; 02672 } 02673 02674 Subtract(f, f, g, fgLen); 02675 02676 if (Add(b, b, c, bcLen)) 02677 { 02678 b[bcLen] = 1; 02679 bcLen+=2; 02680 assert(bcLen <= N); 02681 } 02682 } 02683 } 02684 02685 // R[N] - result = A/(2^k) mod M 02686 // A[N] - input 02687 // M[N] - modulus 02688 02689 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N) 02690 { 02691 CopyWords(R, A, N); 02692 02693 while (k--) 02694 { 02695 if (R[0]%2==0) 02696 ShiftWordsRightByBits(R, N, 1); 02697 else 02698 { 02699 word carry = Add(R, R, M, N); 02700 ShiftWordsRightByBits(R, N, 1); 02701 R[N-1] += carry<<(WORD_BITS-1); 02702 } 02703 } 02704 } 02705 02706 // R[N] - result = A*(2^k) mod M 02707 // A[N] - input 02708 // M[N] - modulus 02709 02710 void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N) 02711 { 02712 CopyWords(R, A, N); 02713 02714 while (k--) 02715 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) 02716 Subtract(R, R, M, N); 02717 } 02718 02719 // ****************************************************************** 02720 02721 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; 02722 02723 static inline unsigned int RoundupSize(unsigned int n) 02724 { 02725 if (n<=8) 02726 return RoundupSizeTable[n]; 02727 else if (n<=16) 02728 return 16; 02729 else if (n<=32) 02730 return 32; 02731 else if (n<=64) 02732 return 64; 02733 else return 1U << BitPrecision(n-1); 02734 } 02735 02736 Integer::Integer() 02737 : reg(2), sign(POSITIVE) 02738 { 02739 reg[0] = reg[1] = 0; 02740 } 02741 02742 Integer::Integer(const Integer& t) 02743 : reg(RoundupSize(t.WordCount())), sign(t.sign) 02744 { 02745 CopyWords(reg, t.reg, reg.size()); 02746 } 02747 02748 Integer::Integer(Sign s, lword value) 02749 : reg(2), sign(s) 02750 { 02751 reg[0] = word(value); 02752 reg[1] = word(SafeRightShift<WORD_BITS>(value)); 02753 } 02754 02755 Integer::Integer(signed long value) 02756 : reg(2) 02757 { 02758 if (value >= 0) 02759 sign = POSITIVE; 02760 else 02761 { 02762 sign = NEGATIVE; 02763 value = -value; 02764 } 02765 reg[0] = word(value); 02766 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); 02767 } 02768 02769 Integer::Integer(Sign s, word high, word low) 02770 : reg(2), sign(s) 02771 { 02772 reg[0] = low; 02773 reg[1] = high; 02774 } 02775 02776 bool Integer::IsConvertableToLong() const 02777 { 02778 if (ByteCount() > sizeof(long)) 02779 return false; 02780 02781 unsigned long value = reg[0]; 02782 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]); 02783 02784 if (sign==POSITIVE) 02785 return (signed long)value >= 0; 02786 else 02787 return -(signed long)value < 0; 02788 } 02789 02790 signed long Integer::ConvertToLong() const 02791 { 02792 assert(IsConvertableToLong()); 02793 02794 unsigned long value = reg[0]; 02795 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]); 02796 return sign==POSITIVE ? value : -(signed long)value; 02797 } 02798 02799 Integer::Integer(BufferedTransformation &encodedInteger, unsigned int byteCount, Signedness s) 02800 { 02801 Decode(encodedInteger, byteCount, s); 02802 } 02803 02804 Integer::Integer(const byte *encodedInteger, unsigned int byteCount, Signedness s) 02805 { 02806 Decode(encodedInteger, byteCount, s); 02807 } 02808 02809 Integer::Integer(BufferedTransformation &bt) 02810 { 02811 BERDecode(bt); 02812 } 02813 02814 Integer::Integer(RandomNumberGenerator &rng, unsigned int bitcount) 02815 { 02816 Randomize(rng, bitcount); 02817 } 02818 02819 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) 02820 { 02821 if (!Randomize(rng, min, max, rnType, equiv, mod)) 02822 throw Integer::RandomNumberNotFound(); 02823 } 02824 02825 Integer Integer::Power2(unsigned int e) 02826 { 02827 Integer r((word)0, BitsToWords(e+1)); 02828 r.SetBit(e); 02829 return r; 02830 } 02831 02832 template <long i> 02833 struct NewInteger 02834 { 02835 Integer * operator()() const 02836 { 02837 return new Integer(i); 02838 } 02839 }; 02840 02841 const Integer &Integer::Zero() 02842 { 02843 return Singleton<Integer>().Ref(); 02844 } 02845 02846 const Integer &Integer::One() 02847 { 02848 return Singleton<Integer, NewInteger<1> >().Ref(); 02849 } 02850 02851 const Integer &Integer::Two() 02852 { 02853 return Singleton<Integer, NewInteger<2> >().Ref(); 02854 } 02855 02856 bool Integer::operator!() const 02857 { 02858 return IsNegative() ? false : (reg[0]==0 && WordCount()==0); 02859 } 02860 02861 Integer& Integer::operator=(const Integer& t) 02862 { 02863 if (this != &t) 02864 { 02865 reg.New(RoundupSize(t.WordCount())); 02866 CopyWords(reg, t.reg, reg.size()); 02867 sign = t.sign; 02868 } 02869 return *this; 02870 } 02871 02872 bool Integer::GetBit(unsigned int n) const 02873 { 02874 if (n/WORD_BITS >= reg.size()) 02875 return 0; 02876 else 02877 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); 02878 } 02879 02880 void Integer::SetBit(unsigned int n, bool value) 02881 { 02882 if (value) 02883 { 02884 reg.CleanGrow(RoundupSize(BitsToWords(n+1))); 02885 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); 02886 } 02887 else 02888 { 02889 if (n/WORD_BITS < reg.size()) 02890 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); 02891 } 02892 } 02893 02894 byte Integer::GetByte(unsigned int n) const 02895 { 02896 if (n/WORD_SIZE >= reg.size()) 02897 return 0; 02898 else 02899 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); 02900 } 02901 02902 void Integer::SetByte(unsigned int n, byte value) 02903 { 02904 reg.CleanGrow(RoundupSize(BytesToWords(n+1))); 02905 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); 02906 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); 02907 } 02908 02909 unsigned long Integer::GetBits(unsigned int i, unsigned int n) const 02910 { 02911 assert(n <= sizeof(unsigned long)*8); 02912 unsigned long v = 0; 02913 for (unsigned int j=0; j<n; j++) 02914 v |= GetBit(i+j) << j; 02915 return v; 02916 } 02917 02918 Integer Integer::operator-() const 02919 { 02920 Integer result(*this); 02921 result.Negate(); 02922 return result; 02923 } 02924 02925 Integer Integer::AbsoluteValue() const 02926 { 02927 Integer result(*this); 02928 result.sign = POSITIVE; 02929 return result; 02930 } 02931 02932 void Integer::swap(Integer &a) 02933 { 02934 reg.swap(a.reg); 02935 std::swap(sign, a.sign); 02936 } 02937 02938 Integer::Integer(word value, unsigned int length) 02939 : reg(RoundupSize(length)), sign(POSITIVE) 02940 { 02941 reg[0] = value; 02942 SetWords(reg+1, 0, reg.size()-1); 02943 } 02944 02945 template <class T> 02946 static Integer StringToInteger(const T *str) 02947 { 02948 word radix; 02949 // GCC workaround 02950 // std::char_traits doesn't exist in GCC 2.x 02951 // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3 02952 unsigned int length; 02953 for (length = 0; str[length] != 0; length++) {} 02954 02955 Integer v; 02956 02957 if (length == 0) 02958 return v; 02959 02960 switch (str[length-1]) 02961 { 02962 case 'h': 02963 case 'H': 02964 radix=16; 02965 break; 02966 case 'o': 02967 case 'O': 02968 radix=8; 02969 break; 02970 case 'b': 02971 case 'B': 02972 radix=2; 02973 break; 02974 default: 02975 radix=10; 02976 } 02977 02978 if (length > 2 && str[0] == '0' && str[1] == 'x') 02979 radix = 16; 02980 02981 for (unsigned i=0; i<length; i++) 02982 { 02983 word digit; 02984 02985 if (str[i] >= '0' && str[i] <= '9') 02986 digit = str[i] - '0'; 02987 else if (str[i] >= 'A' && str[i] <= 'F') 02988 digit = str[i] - 'A' + 10; 02989 else if (str[i] >= 'a' && str[i] <= 'f') 02990 digit = str[i] - 'a' + 10; 02991 else 02992 digit = radix; 02993 02994 if (digit < radix) 02995 { 02996 v *= radix; 02997 v += digit; 02998 } 02999 } 03000 03001 if (str[0] == '-') 03002 v.Negate(); 03003 03004 return v; 03005 } 03006 03007 Integer::Integer(const char *str) 03008 : reg(2), sign(POSITIVE) 03009 { 03010 *this = StringToInteger(str); 03011 } 03012 03013 Integer::Integer(const wchar_t *str) 03014 : reg(2), sign(POSITIVE) 03015 { 03016 *this = StringToInteger(str); 03017 } 03018 03019 unsigned int Integer::WordCount() const 03020 { 03021 return CountWords(reg, reg.size()); 03022 } 03023 03024 unsigned int Integer::ByteCount() const 03025 { 03026 unsigned wordCount = WordCount(); 03027 if (wordCount) 03028 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); 03029 else 03030 return 0; 03031 } 03032 03033 unsigned int Integer::BitCount() const 03034 { 03035 unsigned wordCount = WordCount(); 03036 if (wordCount) 03037 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); 03038 else 03039 return 0; 03040 } 03041 03042 void Integer::Decode(const byte *input, unsigned int inputLen, Signedness s) 03043 { 03044 StringStore store(input, inputLen); 03045 Decode(store, inputLen, s); 03046 } 03047 03048 void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedness s) 03049 { 03050 assert(bt.MaxRetrievable() >= inputLen); 03051 03052 byte b; 03053 bt.Peek(b); 03054 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; 03055 03056 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) 03057 { 03058 bt.Skip(1); 03059 inputLen--; 03060 bt.Peek(b); 03061 } 03062 03063 reg.CleanNew(RoundupSize(BytesToWords(inputLen))); 03064 03065 for (unsigned int i=inputLen; i > 0; i--) 03066 { 03067 bt.Get(b); 03068 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; 03069 } 03070 03071 if (sign == NEGATIVE) 03072 { 03073 for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++) 03074 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; 03075 TwosComplement(reg, reg.size()); 03076 } 03077 } 03078 03079 unsigned int Integer::MinEncodedSize(Signedness signedness) const 03080 { 03081 unsigned int outputLen = STDMAX(1U, ByteCount()); 03082 if (signedness == UNSIGNED) 03083 return outputLen; 03084 if (NotNegative() && (GetByte(outputLen-1) & 0x80)) 03085 outputLen++; 03086 if (IsNegative() && *this < -Power2(outputLen*8-1)) 03087 outputLen++; 03088 return outputLen; 03089 } 03090 03091 unsigned int Integer::Encode(byte *output, unsigned int outputLen, Signedness signedness) const 03092 { 03093 ArraySink sink(output, outputLen); 03094 return Encode(sink, outputLen, signedness); 03095 } 03096 03097 unsigned int Integer::Encode(BufferedTransformation &bt, unsigned int outputLen, Signedness signedness) const 03098 { 03099 if (signedness == UNSIGNED || NotNegative()) 03100 { 03101 for (unsigned int i=outputLen; i > 0; i--) 03102 bt.Put(GetByte(i-1)); 03103 } 03104 else 03105 { 03106 // take two's complement of *this 03107 Integer temp = Integer::Power2(8*STDMAX(ByteCount(), outputLen)) + *this; 03108 for (unsigned i=0; i<outputLen; i++) 03109 bt.Put(temp.GetByte(outputLen-i-1)); 03110 } 03111 return outputLen; 03112 } 03113 03114 void Integer::DEREncode(BufferedTransformation &bt) const 03115 { 03116 DERGeneralEncoder enc(bt, INTEGER); 03117 Encode(enc, MinEncodedSize(SIGNED), SIGNED); 03118 enc.MessageEnd(); 03119 } 03120 03121 void Integer::BERDecode(const byte *input, unsigned int len) 03122 { 03123 StringStore store(input, len); 03124 BERDecode(store); 03125 } 03126 03127 void Integer::BERDecode(BufferedTransformation &bt) 03128 { 03129 BERGeneralDecoder dec(bt, INTEGER); 03130 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) 03131 BERDecodeError(); 03132 Decode(dec, dec.RemainingLength(), SIGNED); 03133 dec.MessageEnd(); 03134 } 03135 03136 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, unsigned int length) const 03137 { 03138 DERGeneralEncoder enc(bt, OCTET_STRING); 03139 Encode(enc, length); 03140 enc.MessageEnd(); 03141 } 03142 03143 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, unsigned int length) 03144 { 03145 BERGeneralDecoder dec(bt, OCTET_STRING); 03146 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) 03147 BERDecodeError(); 03148 Decode(dec, length); 03149 dec.MessageEnd(); 03150 } 03151 03152 unsigned int Integer::OpenPGPEncode(byte *output, unsigned int len) const 03153 { 03154 ArraySink sink(output, len); 03155 return OpenPGPEncode(sink); 03156 } 03157 03158 unsigned int Integer::OpenPGPEncode(BufferedTransformation &bt) const 03159 { 03160 word16 bitCount = BitCount(); 03161 bt.PutWord16(bitCount); 03162 return 2 + Encode(bt, BitsToBytes(bitCount)); 03163 } 03164 03165 void Integer::OpenPGPDecode(const byte *input, unsigned int len) 03166 { 03167 StringStore store(input, len); 03168 OpenPGPDecode(store); 03169 } 03170 03171 void Integer::OpenPGPDecode(BufferedTransformation &bt) 03172 { 03173 word16 bitCount; 03174 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) 03175 throw OpenPGPDecodeErr(); 03176 Decode(bt, BitsToBytes(bitCount)); 03177 } 03178 03179 void Integer::Randomize(RandomNumberGenerator &rng, unsigned int nbits) 03180 { 03181 const unsigned int nbytes = nbits/8 + 1; 03182 SecByteBlock buf(nbytes); 03183 rng.GenerateBlock(buf, nbytes); 03184 if (nbytes) 03185 buf[0] = (byte)Crop(buf[0], nbits % 8); 03186 Decode(buf, nbytes, UNSIGNED); 03187 } 03188 03189 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) 03190 { 03191 if (min > max) 03192 throw InvalidArgument("Integer: Min must be no greater than Max"); 03193 03194 Integer range = max - min; 03195 const unsigned int nbits = range.BitCount(); 03196 03197 do 03198 { 03199 Randomize(rng, nbits); 03200 } 03201 while (*this > range); 03202 03203 *this += min; 03204 } 03205 03206 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) 03207 { 03208 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); 03209 } 03210 03211 class KDF2_RNG : public RandomNumberGenerator 03212 { 03213 public: 03214 KDF2_RNG(const byte *seed, unsigned int seedSize) 03215 : m_counter(0), m_counterAndSeed(seedSize + 4) 03216 { 03217 memcpy(m_counterAndSeed + 4, seed, seedSize); 03218 } 03219 03220 byte GenerateByte() 03221 { 03222 byte b; 03223 GenerateBlock(&b, 1); 03224 return b; 03225 } 03226 03227 void GenerateBlock(byte *output, unsigned int size) 03228 { 03229 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); 03230 ++m_counter; 03231 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0); 03232 } 03233 03234 private: 03235 word32 m_counter; 03236 SecByteBlock m_counterAndSeed; 03237 }; 03238 03239 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params) 03240 { 03241 Integer min = params.GetValueWithDefault("Min", Integer::Zero()); 03242 Integer max; 03243 if (!params.GetValue("Max", max)) 03244 { 03245 int bitLength; 03246 if (params.GetIntValue("BitLength", bitLength)) 03247 max = Integer::Power2(bitLength); 03248 else 03249 throw InvalidArgument("Integer: missing Max argument"); 03250 } 03251 if (min > max) 03252 throw InvalidArgument("Integer: Min must be no greater than Max"); 03253 03254 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); 03255 Integer mod = params.GetValueWithDefault("Mod", Integer::One()); 03256 03257 if (equiv.IsNegative() || equiv >= mod) 03258 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument"); 03259 03260 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY); 03261 03262 member_ptr<KDF2_RNG> kdf2Rng; 03263 ConstByteArrayParameter seed; 03264 if (params.GetValue("Seed", seed)) 03265 { 03266 ByteQueue bq; 03267 DERSequenceEncoder seq(bq); 03268 min.DEREncode(seq); 03269 max.DEREncode(seq); 03270 equiv.DEREncode(seq); 03271 mod.DEREncode(seq); 03272 DEREncodeUnsigned(seq, rnType); 03273 DEREncodeOctetString(seq, seed.begin(), seed.size()); 03274 seq.MessageEnd(); 03275 03276 SecByteBlock finalSeed(bq.MaxRetrievable()); 03277 bq.Get(finalSeed, finalSeed.size()); 03278 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); 03279 } 03280 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng; 03281 03282 switch (rnType) 03283 { 03284 case ANY: 03285 if (mod == One()) 03286 Randomize(rng, min, max); 03287 else 03288 { 03289 Integer min1 = min + (equiv-min)%mod; 03290 if (max < min1) 03291 return false; 03292 Randomize(rng, Zero(), (max - min1) / mod); 03293 *this *= mod; 03294 *this += min1; 03295 } 03296 return true; 03297 03298 case PRIME: 03299 { 03300 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL); 03301 03302 int i; 03303 i = 0; 03304 while (1) 03305 { 03306 if (++i==16) 03307 { 03308 // check if there are any suitable primes in [min, max] 03309 Integer first = min; 03310 if (FirstPrime(first, max, equiv, mod, pSelector)) 03311 { 03312 // if there is only one suitable prime, we're done 03313 *this = first; 03314 if (!FirstPrime(first, max, equiv, mod, pSelector)) 03315 return true; 03316 } 03317 else 03318 return false; 03319 } 03320 03321 Randomize(rng, min, max); 03322 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) 03323 return true; 03324 } 03325 } 03326 03327 default: 03328 throw InvalidArgument("Integer: invalid RandomNumberType argument"); 03329 } 03330 } 03331 03332 std::istream& operator>>(std::istream& in, Integer &a) 03333 { 03334 char c; 03335 unsigned int length = 0; 03336 SecBlock<char> str(length + 16); 03337 03338 std::ws(in); 03339 03340 do 03341 { 03342 in.read(&c, 1); 03343 str[length++] = c; 03344 if (length >= str.size()) 03345 str.Grow(length + 16); 03346 } 03347 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.')); 03348 03349 if (in.gcount()) 03350 in.putback(c); 03351 str[length-1] = '\0'; 03352 a = Integer(str); 03353 03354 return in; 03355 } 03356 03357 std::ostream& operator<<(std::ostream& out, const Integer &a) 03358 { 03359 // Get relevant conversion specifications from ostream. 03360 long f = out.flags() & std::ios::basefield; // Get base digits. 03361 int base, block; 03362 char suffix; 03363 switch(f) 03364 { 03365 case std::ios::oct : 03366 base = 8; 03367 block = 8; 03368 suffix = 'o'; 03369 break; 03370 case std::ios::hex : 03371 base = 16; 03372 block = 4; 03373 suffix = 'h'; 03374 break; 03375 default : 03376 base = 10; 03377 block = 3; 03378 suffix = '.'; 03379 } 03380 03381 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1); 03382 Integer temp1=a, temp2; 03383 unsigned i=0; 03384 const char vec[]="0123456789ABCDEF"; 03385 03386 if (a.IsNegative()) 03387 { 03388 out << '-'; 03389 temp1.Negate(); 03390 } 03391 03392 if (!a) 03393 out << '0'; 03394 03395 while (!!temp1) 03396 { 03397 word digit; 03398 Integer::Divide(digit, temp2, temp1, base); 03399 s[i++]=vec[digit]; 03400 temp1=temp2; 03401 } 03402 03403 while (i--) 03404 { 03405 out << s[i]; 03406 // if (i && !(i%block)) 03407 // out << ","; 03408 } 03409 return out << suffix; 03410 } 03411 03412 Integer& Integer::operator++() 03413 { 03414 if (NotNegative()) 03415 { 03416 if (Increment(reg, reg.size())) 03417 { 03418 reg.CleanGrow(2*reg.size()); 03419 reg[reg.size()/2]=1; 03420 } 03421 } 03422 else 03423 { 03424 word borrow = Decrement(reg, reg.size()); 03425 assert(!borrow); 03426 if (WordCount()==0) 03427 *this = Zero(); 03428 } 03429 return *this; 03430 } 03431 03432 Integer& Integer::operator--() 03433 { 03434 if (IsNegative()) 03435 { 03436 if (Increment(reg, reg.size())) 03437 { 03438 reg.CleanGrow(2*reg.size()); 03439 reg[reg.size()/2]=1; 03440 } 03441 } 03442 else 03443 { 03444 if (Decrement(reg, reg.size())) 03445 *this = -One(); 03446 } 03447 return *this; 03448 } 03449 03450 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) 03451 { 03452 word carry; 03453 if (a.reg.size() == b.reg.size()) 03454 carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); 03455 else if (a.reg.size() > b.reg.size()) 03456 { 03457 carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); 03458 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); 03459 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); 03460 } 03461 else 03462 { 03463 carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); 03464 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); 03465 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); 03466 } 03467 03468 if (carry) 03469 { 03470 sum.reg.CleanGrow(2*sum.reg.size()); 03471 sum.reg[sum.reg.size()/2] = 1; 03472 } 03473 sum.sign = Integer::POSITIVE; 03474 } 03475 03476 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) 03477 { 03478 unsigned aSize = a.WordCount(); 03479 aSize += aSize%2; 03480 unsigned bSize = b.WordCount(); 03481 bSize += bSize%2; 03482 03483 if (aSize == bSize) 03484 { 03485 if (Compare(a.reg, b.reg, aSize) >= 0) 03486 { 03487 Subtract(diff.reg, a.reg, b.reg, aSize); 03488 diff.sign = Integer::POSITIVE; 03489 } 03490 else 03491 { 03492 Subtract(diff.reg, b.reg, a.reg, aSize); 03493 diff.sign = Integer::NEGATIVE; 03494 } 03495 } 03496 else if (aSize > bSize) 03497 { 03498 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); 03499 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); 03500 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); 03501 assert(!borrow); 03502 diff.sign = Integer::POSITIVE; 03503 } 03504 else 03505 { 03506 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); 03507 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); 03508 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); 03509 assert(!borrow); 03510 diff.sign = Integer::NEGATIVE; 03511 } 03512 } 03513 03514 Integer Integer::Plus(const Integer& b) const 03515 { 03516 Integer sum((word)0, STDMAX(reg.size(), b.reg.size())); 03517 if (NotNegative()) 03518 { 03519 if (b.NotNegative()) 03520 PositiveAdd(sum, *this, b); 03521 else 03522 PositiveSubtract(sum, *this, b); 03523 } 03524 else 03525 { 03526 if (b.NotNegative()) 03527 PositiveSubtract(sum, b, *this); 03528 else 03529 { 03530 PositiveAdd(sum, *this, b); 03531 sum.sign = Integer::NEGATIVE; 03532 } 03533 } 03534 return sum; 03535 } 03536 03537 Integer& Integer::operator+=(const Integer& t) 03538 { 03539 reg.CleanGrow(t.reg.size()); 03540 if (NotNegative()) 03541 { 03542 if (t.NotNegative()) 03543 PositiveAdd(*this, *this, t); 03544 else 03545 PositiveSubtract(*this, *this, t); 03546 } 03547 else 03548 { 03549 if (t.NotNegative()) 03550 PositiveSubtract(*this, t, *this); 03551 else 03552 { 03553 PositiveAdd(*this, *this, t); 03554 sign = Integer::NEGATIVE; 03555 } 03556 } 03557 return *this; 03558 } 03559 03560 Integer Integer::Minus(const Integer& b) const 03561 { 03562 Integer diff((word)0, STDMAX(reg.size(), b.reg.size())); 03563 if (NotNegative()) 03564 { 03565 if (b.NotNegative()) 03566 PositiveSubtract(diff, *this, b); 03567 else 03568 PositiveAdd(diff, *this, b); 03569 } 03570 else 03571 { 03572 if (b.NotNegative()) 03573 { 03574 PositiveAdd(diff, *this, b); 03575 diff.sign = Integer::NEGATIVE; 03576 } 03577 else 03578 PositiveSubtract(diff, b, *this); 03579 } 03580 return diff; 03581 } 03582 03583 Integer& Integer::operator-=(const Integer& t) 03584 { 03585 reg.CleanGrow(t.reg.size()); 03586 if (NotNegative()) 03587 { 03588 if (t.NotNegative()) 03589 PositiveSubtract(*this, *this, t); 03590 else 03591 PositiveAdd(*this, *this, t); 03592 } 03593 else 03594 { 03595 if (t.NotNegative()) 03596 { 03597 PositiveAdd(*this, *this, t); 03598 sign = Integer::NEGATIVE; 03599 } 03600 else 03601 PositiveSubtract(*this, t, *this); 03602 } 03603 return *this; 03604 } 03605 03606 Integer& Integer::operator<<=(unsigned int n) 03607 { 03608 const unsigned int wordCount = WordCount(); 03609 const unsigned int shiftWords = n / WORD_BITS; 03610 const unsigned int shiftBits = n % WORD_BITS; 03611 03612 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); 03613 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); 03614 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); 03615 return *this; 03616 } 03617 03618 Integer& Integer::operator>>=(unsigned int n) 03619 { 03620 const unsigned int wordCount = WordCount(); 03621 const unsigned int shiftWords = n / WORD_BITS; 03622 const unsigned int shiftBits = n % WORD_BITS; 03623 03624 ShiftWordsRightByWords(reg, wordCount, shiftWords); 03625 if (wordCount > shiftWords) 03626 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); 03627 if (IsNegative() && WordCount()==0) // avoid -0 03628 *this = Zero(); 03629 return *this; 03630 } 03631 03632 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) 03633 { 03634 unsigned aSize = RoundupSize(a.WordCount()); 03635 unsigned bSize = RoundupSize(b.WordCount()); 03636 03637 product.reg.CleanNew(RoundupSize(aSize+bSize)); 03638 product.sign = Integer::POSITIVE; 03639 03640 SecAlignedWordBlock workspace(aSize + bSize); 03641 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); 03642 } 03643 03644 void Multiply(Integer &product, const Integer &a, const Integer &b) 03645 { 03646 PositiveMultiply(product, a, b); 03647 03648 if (a.NotNegative() != b.NotNegative()) 03649 product.Negate(); 03650 } 03651 03652 Integer Integer::Times(const Integer &b) const 03653 { 03654 Integer product; 03655 Multiply(product, *this, b); 03656 return product; 03657 } 03658 03659 /* 03660 void PositiveDivide(Integer &remainder, Integer &quotient, 03661 const Integer &dividend, const Integer &divisor) 03662 { 03663 remainder.reg.CleanNew(divisor.reg.size()); 03664 remainder.sign = Integer::POSITIVE; 03665 quotient.reg.New(0); 03666 quotient.sign = Integer::POSITIVE; 03667 unsigned i=dividend.BitCount(); 03668 while (i--) 03669 { 03670 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1); 03671 remainder.reg[0] |= dividend[i]; 03672 if (overflow || remainder >= divisor) 03673 { 03674 Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size()); 03675 quotient.SetBit(i); 03676 } 03677 } 03678 } 03679 */ 03680 03681 void PositiveDivide(Integer &remainder, Integer &quotient, 03682 const Integer &a, const Integer &b) 03683 { 03684 unsigned aSize = a.WordCount(); 03685 unsigned bSize = b.WordCount(); 03686 03687 if (!bSize) 03688 throw Integer::DivideByZero(); 03689 03690 if (a.PositiveCompare(b) == -1) 03691 { 03692 remainder = a; 03693 remainder.sign = Integer::POSITIVE; 03694 quotient = Integer::Zero(); 03695 return; 03696 } 03697 03698 aSize += aSize%2; // round up to next even number 03699 bSize += bSize%2; 03700 03701 remainder.reg.CleanNew(RoundupSize(bSize)); 03702 remainder.sign = Integer::POSITIVE; 03703 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); 03704 quotient.sign = Integer::POSITIVE; 03705 03706 SecAlignedWordBlock T(aSize+2*bSize+4); 03707 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); 03708 } 03709 03710 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor) 03711 { 03712 PositiveDivide(remainder, quotient, dividend, divisor); 03713 03714 if (dividend.IsNegative()) 03715 { 03716 quotient.Negate(); 03717 if (remainder.NotZero()) 03718 { 03719 --quotient; 03720 remainder = divisor.AbsoluteValue() - remainder; 03721 } 03722 } 03723 03724 if (divisor.IsNegative()) 03725 quotient.Negate(); 03726 } 03727 03728 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) 03729 { 03730 q = a; 03731 q >>= n; 03732 03733 const unsigned int wordCount = BitsToWords(n); 03734 if (wordCount <= a.WordCount()) 03735 { 03736 r.reg.resize(RoundupSize(wordCount)); 03737 CopyWords(r.reg, a.reg, wordCount); 03738 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); 03739 if (n % WORD_BITS != 0) 03740 r.reg[wordCount-1] %= (1 << (n % WORD_BITS)); 03741 } 03742 else 03743 { 03744 r.reg.resize(RoundupSize(a.WordCount())); 03745 CopyWords(r.reg, a.reg, r.reg.size()); 03746 } 03747 r.sign = POSITIVE; 03748 03749 if (a.IsNegative() && r.NotZero()) 03750 { 03751 --q; 03752 r = Power2(n) - r; 03753 } 03754 } 03755 03756 Integer Integer::DividedBy(const Integer &b) const 03757 { 03758 Integer remainder, quotient; 03759 Integer::Divide(remainder, quotient, *this, b); 03760 return quotient; 03761 } 03762 03763 Integer Integer::Modulo(const Integer &b) const 03764 { 03765 Integer remainder, quotient; 03766 Integer::Divide(remainder, quotient, *this, b); 03767 return remainder; 03768 } 03769 03770 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor) 03771 { 03772 if (!divisor) 03773 throw Integer::DivideByZero(); 03774 03775 assert(divisor); 03776 03777 if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 03778 { 03779 quotient = dividend >> (BitPrecision(divisor)-1); 03780 remainder = dividend.reg[0] & (divisor-1); 03781 return; 03782 } 03783 03784 unsigned int i = dividend.WordCount(); 03785 quotient.reg.CleanNew(RoundupSize(i)); 03786 remainder = 0; 03787 while (i--) 03788 { 03789 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; 03790 remainder = DWord(dividend.reg[i], remainder) % divisor; 03791 } 03792 03793 if (dividend.NotNegative()) 03794 quotient.sign = POSITIVE; 03795 else 03796 { 03797 quotient.sign = NEGATIVE; 03798 if (remainder) 03799 { 03800 --quotient; 03801 remainder = divisor - remainder; 03802 } 03803 } 03804 } 03805 03806 Integer Integer::DividedBy(word b) const 03807 { 03808 word remainder; 03809 Integer quotient; 03810 Integer::Divide(remainder, quotient, *this, b); 03811 return quotient; 03812 } 03813 03814 word Integer::Modulo(word divisor) const 03815 { 03816 if (!divisor) 03817 throw Integer::DivideByZero(); 03818 03819 assert(divisor); 03820 03821 word remainder; 03822 03823 if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 03824 remainder = reg[0] & (divisor-1); 03825 else 03826 { 03827 unsigned int i = WordCount(); 03828 03829 if (divisor <= 5) 03830 { 03831 DWord sum(0, 0); 03832 while (i--) 03833 sum += reg[i]; 03834 remainder = sum % divisor; 03835 } 03836 else 03837 { 03838 remainder = 0; 03839 while (i--) 03840 remainder = DWord(reg[i], remainder) % divisor; 03841 } 03842 } 03843 03844 if (IsNegative() && remainder) 03845 remainder = divisor - remainder; 03846 03847 return remainder; 03848 } 03849 03850 void Integer::Negate() 03851 { 03852 if (!!(*this)) // don't flip sign if *this==0 03853 sign = Sign(1-sign); 03854 } 03855 03856 int Integer::PositiveCompare(const Integer& t) const 03857 { 03858 unsigned size = WordCount(), tSize = t.WordCount(); 03859 03860 if (size == tSize) 03861 return CryptoPP::Compare(reg, t.reg, size); 03862 else 03863 return size > tSize ? 1 : -1; 03864 } 03865 03866 int Integer::Compare(const Integer& t) const 03867 { 03868 if (NotNegative()) 03869 { 03870 if (t.NotNegative()) 03871 return PositiveCompare(t); 03872 else 03873 return 1; 03874 } 03875 else 03876 { 03877 if (t.NotNegative()) 03878 return -1; 03879 else 03880 return -PositiveCompare(t); 03881 } 03882 } 03883 03884 Integer Integer::SquareRoot() const 03885 { 03886 if (!IsPositive()) 03887 return Zero(); 03888 03889 // overestimate square root 03890 Integer x, y = Power2((BitCount()+1)/2); 03891 assert(y*y >= *this); 03892 03893 do 03894 { 03895 x = y; 03896 y = (x + *this/x) >> 1; 03897 } while (y<x); 03898 03899 return x; 03900 } 03901 03902 bool Integer::IsSquare() const 03903 { 03904 Integer r = SquareRoot(); 03905 return *this == r.Squared(); 03906 } 03907 03908 bool Integer::IsUnit() const 03909 { 03910 return (WordCount() == 1) && (reg[0] == 1); 03911 } 03912 03913 Integer Integer::MultiplicativeInverse() const 03914 { 03915 return IsUnit() ? *this : Zero(); 03916 } 03917 03918 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) 03919 { 03920 return x*y%m; 03921 } 03922 03923 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) 03924 { 03925 ModularArithmetic mr(m); 03926 return mr.Exponentiate(x, e); 03927 } 03928 03929 Integer Integer::Gcd(const Integer &a, const Integer &b) 03930 { 03931 return EuclideanDomainOf<Integer>().Gcd(a, b); 03932 } 03933 03934 Integer Integer::InverseMod(const Integer &m) const 03935 { 03936 assert(m.NotNegative()); 03937 03938 if (IsNegative() || *this>=m) 03939 return (*this%m).InverseMod(m); 03940 03941 if (m.IsEven()) 03942 { 03943 if (!m || IsEven()) 03944 return Zero(); // no inverse 03945 if (*this == One()) 03946 return One(); 03947 03948 Integer u = m.InverseMod(*this); 03949 return !u ? Zero() : (m*(*this-u)+1)/(*this); 03950 } 03951 03952 SecBlock<word> T(m.reg.size() * 4); 03953 Integer r((word)0, m.reg.size()); 03954 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); 03955 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); 03956 return r; 03957 } 03958 03959 word Integer::InverseMod(const word mod) const 03960 { 03961 word g0 = mod, g1 = *this % mod; 03962 word v0 = 0, v1 = 1; 03963 word y; 03964 03965 while (g1) 03966 { 03967 if (g1 == 1) 03968 return v1; 03969 y = g0 / g1; 03970 g0 = g0 % g1; 03971 v0 += y * v1; 03972 03973 if (!g0) 03974 break; 03975 if (g0 == 1) 03976 return mod-v0; 03977 y = g1 / g0; 03978 g1 = g1 % g0; 03979 v1 += y * v0; 03980 } 03981 return 0; 03982 } 03983 03984 // ******************************************************** 03985 03986 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) 03987 { 03988 BERSequenceDecoder seq(bt); 03989 OID oid(seq); 03990 if (oid != ASN1::prime_field()) 03991 BERDecodeError(); 03992 modulus.BERDecode(seq); 03993 seq.MessageEnd(); 03994 result.reg.resize(modulus.reg.size()); 03995 } 03996 03997 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const 03998 { 03999 DERSequenceEncoder seq(bt); 04000 ASN1::prime_field().DEREncode(seq); 04001 modulus.DEREncode(seq); 04002 seq.MessageEnd(); 04003 } 04004 04005 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const 04006 { 04007 a.DEREncodeAsOctetString(out, MaxElementByteLength()); 04008 } 04009 04010 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const 04011 { 04012 a.BERDecodeAsOctetString(in, MaxElementByteLength()); 04013 } 04014 04015 const Integer& ModularArithmetic::Half(const Integer &a) const 04016 { 04017 if (a.reg.size()==modulus.reg.size()) 04018 { 04019 CryptoPP::DivideByPower2Mod(result.reg.begin(), a.reg, 1, modulus.reg, a.reg.size()); 04020 return result; 04021 } 04022 else 04023 return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1)); 04024 } 04025 04026 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const 04027 { 04028 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size()) 04029 { 04030 if (CryptoPP::Add(result.reg.begin(), a.reg, b.reg, a.reg.size()) 04031 || Compare(result.reg, modulus.reg, a.reg.size()) >= 0) 04032 { 04033 CryptoPP::Subtract(result.reg.begin(), result.reg, modulus.reg, a.reg.size()); 04034 } 04035 return result; 04036 } 04037 else 04038 { 04039 result1 = a+b; 04040 if (result1 >= modulus) 04041 result1 -= modulus; 04042 return result1; 04043 } 04044 } 04045 04046 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const 04047 { 04048 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size()) 04049 { 04050 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) 04051 || Compare(a.reg, modulus.reg, a.reg.size()) >= 0) 04052 { 04053 CryptoPP::Subtract(a.reg, a.reg, modulus.reg, a.reg.size()); 04054 } 04055 } 04056 else 04057 { 04058 a+=b; 04059 if (a>=modulus) 04060 a-=modulus; 04061 } 04062 04063 return a; 04064 } 04065 04066 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const 04067 { 04068 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size()) 04069 { 04070 if (CryptoPP::Subtract(result.reg.begin(), a.reg, b.reg, a.reg.size())) 04071 CryptoPP::Add(result.reg.begin(), result.reg, modulus.reg, a.reg.size()); 04072 return result; 04073 } 04074 else 04075 { 04076 result1 = a-b; 04077 if (result1.IsNegative()) 04078 result1 += modulus; 04079 return result1; 04080 } 04081 } 04082 04083 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const 04084 { 04085 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size()) 04086 { 04087 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) 04088 CryptoPP::Add(a.reg, a.reg, modulus.reg, a.reg.size()); 04089 } 04090 else 04091 { 04092 a-=b; 04093 if (a.IsNegative()) 04094 a+=modulus; 04095 } 04096 04097 return a; 04098 } 04099 04100 const Integer& ModularArithmetic::Inverse(const Integer &a) const 04101 { 04102 if (!a) 04103 return a; 04104 04105 CopyWords(result.reg.begin(), modulus.reg, modulus.reg.size()); 04106 if (CryptoPP::Subtract(result.reg.begin(), result.reg, a.reg, a.reg.size())) 04107 Decrement(result.reg.begin()+a.reg.size(), 1, modulus.reg.size()-a.reg.size()); 04108 04109 return result; 04110 } 04111 04112 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const 04113 { 04114 if (modulus.IsOdd()) 04115 { 04116 MontgomeryRepresentation dr(modulus); 04117 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); 04118 } 04119 else 04120 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); 04121 } 04122 04123 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const 04124 { 04125 if (modulus.IsOdd()) 04126 { 04127 MontgomeryRepresentation dr(modulus); 04128 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); 04129 for (unsigned int i=0; i<exponentsCount; i++) 04130 results[i] = dr.ConvertOut(results[i]); 04131 } 04132 else 04133 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); 04134 } 04135 04136 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd 04137 : ModularArithmetic(m), 04138 u((word)0, modulus.reg.size()), 04139 workspace(5*modulus.reg.size()) 04140 { 04141 if (!modulus.IsOdd()) 04142 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus"); 04143 04144 RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size()); 04145 } 04146 04147 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const 04148 { 04149 word *const T = workspace.begin(); 04150 word *const R = result.reg.begin(); 04151 const unsigned int N = modulus.reg.size(); 04152 assert(a.reg.size()<=N && b.reg.size()<=N); 04153 04154 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); 04155 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); 04156 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N); 04157 return result; 04158 } 04159 04160 const Integer& MontgomeryRepresentation::Square(const Integer &a) const 04161 { 04162 word *const T = workspace.begin(); 04163 word *const R = result.reg.begin(); 04164 const unsigned int N = modulus.reg.size(); 04165 assert(a.reg.size()<=N); 04166 04167 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); 04168 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); 04169 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N); 04170 return result; 04171 } 04172 04173 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const 04174 { 04175 word *const T = workspace.begin(); 04176 word *const R = result.reg.begin(); 04177 const unsigned int N = modulus.reg.size(); 04178 assert(a.reg.size()<=N); 04179 04180 CopyWords(T, a.reg, a.reg.size()); 04181 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); 04182 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N); 04183 return result; 04184 } 04185 04186 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const 04187 { 04188 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus; 04189 word *const T = workspace.begin(); 04190 word *const R = result.reg.begin(); 04191 const unsigned int N = modulus.reg.size(); 04192 assert(a.reg.size()<=N); 04193 04194 CopyWords(T, a.reg, a.reg.size()); 04195 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); 04196 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N); 04197 unsigned k = AlmostInverse(R, T, R, N, modulus.reg, N); 04198 04199 // cout << "k=" << k << " N*32=" << 32*N << endl; 04200 04201 if (k>N*WORD_BITS) 04202 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.reg, N); 04203 else 04204 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.reg, N); 04205 04206 return result; 04207 } 04208 04209 NAMESPACE_END 04210 04211 #endif

Generated on Wed Jul 21 19:15:28 2004 for Crypto++ by doxygen 1.3.7-20040704