diff --git a/src/share/classes/java/math/BigInteger.java b/src/share/classes/java/math/BigInteger.java --- a/src/share/classes/java/math/BigInteger.java +++ b/src/share/classes/java/math/BigInteger.java @@ -93,6 +93,7 @@ * @see BigDecimal * @author Josh Bloch * @author Michael McCloskey + * @author Alan Eliasen * @since JDK1.1 */ @@ -173,6 +174,38 @@ */ final static long LONG_MASK = 0xffffffffL; + /** + * The threshold value for using Karatsuba multiplication. If the number + * of ints in both mag arrays are greater than this number, then + * Karatsuba multiplication will be used. This value is found + * experimentally to work well. + */ + private static final int KARATSUBA_THRESHOLD = 50; + + /** + * The threshold value for using 3-way Toom-Cook multiplication. + * If the number of ints in both mag arrays are greater than this number, + * then Toom-Cook multiplication will be used. This value is found + * experimentally to work well. + */ + private static final int TOOM_COOK_THRESHOLD = 75; + + /** + * The threshold value for using Karatsuba squaring. If the number + * of ints in the number are larger than this value, + * Karatsuba squaring will be used. This value is found + * experimentally to work well. + */ + private static final int KARATSUBA_SQUARE_THRESHOLD = 90; + + /** + * The threshold value for using Toom-Cook squaring. If the number + * of ints in the number are larger than this value, + * Karatsuba squaring will be used. This value is found + * experimentally to work well. + */ + private static final int TOOM_COOK_SQUARE_THRESHOLD = 140; + //Constructors /** @@ -533,7 +566,8 @@ if (bitLength < 2) throw new ArithmeticException("bitLength < 2"); // The cutoff of 95 was chosen empirically for best performance - prime = (bitLength < 95 ? smallPrime(bitLength, certainty, rnd) + prime = (bitLength < SMALL_PRIME_THRESHOLD + ? smallPrime(bitLength, certainty, rnd) : largePrime(bitLength, certainty, rnd)); signum = 1; mag = prime.mag; @@ -1025,6 +1059,11 @@ private static final BigInteger TWO = valueOf(2); /** + * The BigInteger constant -1. (Not exported.) + */ + private static final BigInteger NEGATIVE_ONE = valueOf(-1); + + /** * The BigInteger constant ten. * * @since 1.5 @@ -1166,10 +1205,21 @@ if (val.signum == 0 || signum == 0) return ZERO; - int[] result = multiplyToLen(mag, mag.length, - val.mag, val.mag.length, null); - result = trustedStripLeadingZeroInts(result); - return new BigInteger(result, signum == val.signum ? 1 : -1); + int xlen = mag.length; + int ylen = val.mag.length; + + if ((xlen < KARATSUBA_THRESHOLD) || (ylen < KARATSUBA_THRESHOLD)) + { + int[] result = multiplyToLen(mag, xlen, + val.mag, ylen, null); + result = trustedStripLeadingZeroInts(result); + return new BigInteger(result, signum == val.signum ? 1 : -1); + } + else + if ((xlen < TOOM_COOK_THRESHOLD) && (ylen < TOOM_COOK_THRESHOLD)) + return multiplyKaratsuba(this, val); + else + return multiplyToomCook3(this, val); } /** @@ -1249,6 +1299,280 @@ } /** + * Multiplies two BigIntegers using the Karatsuba multiplication + * algorithm. This is a recursive divide-and-conquer algorithm which is + * more efficient for large numbers than what is commonly called the + * "grade-school" algorithm used in multiplyToLen. If the numbers to be + * multiplied have length n, the "grade-school" algorithm has an + * asymptotic complexity of O(n^2). In contrast, the Karatsuba algorithm + * has complexity of O(n^(log2(3))), or O(n^1.585). It achieves this + * increased performance by doing 3 multiplies instead of 4 when + * evaluating the product. As it has some overhead, should be used when + * both numbers are larger than a certain threshold (found + * experimentally). + * + */ + private static BigInteger multiplyKaratsuba(BigInteger x, BigInteger y) + { + int xlen = x.mag.length; + int ylen = y.mag.length; + + // The number of ints in each half of the number. + int half = (Math.max(xlen, ylen)+1) / 2; + + // xl and yl are the lower halves of x and y respectively, + // xh and yh are the upper halves. + BigInteger xl = x.getLower(half); + BigInteger xh = x.getUpper(half); + BigInteger yl = y.getLower(half); + BigInteger yh = y.getUpper(half); + + BigInteger p1 = xh.multiply(yh); // p1 = xh*yh + BigInteger p2 = xl.multiply(yl); // p2 = xl*yl + + // The following is p3=(xh+xl)*(yh+yl) + BigInteger p3 = xh.add(xl).multiply(yh.add(yl)); + + // p1 * 2^(32*2*half) + (p3 - p1 - p2) * 2^(32*half) + p2 + BigInteger result = p1.shiftLeft(32*half).add(p3.subtract(p1).subtract(p2)).shiftLeft(32*half).add(p2); + + if (x.signum * y.signum == -1) + return result.negate(); + else + return result; + } + + /** + * Multiplies two BigIntegers using a 3-way Toom-Cook multiplication + * algorithm. This is a recursive divide-and-conquer algorithm which is + * more efficient for large numbers than what is commonly called the + * "grade-school" algorithm used in multiplyToLen. If the numbers to be + * multiplied have length n, the "grade-school" algorithm has an + * asymptotic complexity of O(n^2). In contrast, 3-way Toom-Cook has a + * complexity of about O(n^1.465). It achieves this increased asymptotic + * performance by breaking each number into three parts and by doing 5 + * multiplies instead of 9 when evaluating the product. Due to overhead + * (additions, shifts, and one division) in the Toom-Cook algorithm, it + * should only be used when both numbers are larger than a certain + * threshold (found experimentally). This threshold is generally larger + * than that for Karatsuba multiplication, so this algorithm is generally + * only used when numbers become significantly larger. + * + * The algorithm used is the "optimal" 3-way Toom-Cook algorithm outlined + * by Marco Bodrato. + * + * See: http://bodrato.it/papers/#WAIFI2007 + * http://bodrato.it/toom-cook/ + * + * "Towards Optimal Toom-Cook Multiplication for Univariate and + * Multivariate Polynomials in Characteristic 2 and 0." by Marco BODRATO; + * In C.Carlet and B.Sunar, Eds., "WAIFI'07 proceedings", p. 116-133, + * LNCS #4547. Springer, Madrid, Spain, June 21-22, 2007. + * + */ + private static BigInteger multiplyToomCook3(BigInteger a, BigInteger b) + { + int alen = a.mag.length; + int blen = b.mag.length; + + int largest = Math.max(alen, blen); + + // k is the size (in ints) of the lower-order slices. + int k = (largest+2)/3; // Equal to ceil(largest/3) + + // r is the size (in ints) of the highest-order slice. + int r = largest - 2*k; + + // Obtain slices of the numbers. a2 and b2 are the most significant + // bits of the number, and a0 and b0 the least significant. + BigInteger a0, a1, a2, b0, b1, b2; + a2 = a.getToomSlice(k, r, 0, largest); + a1 = a.getToomSlice(k, r, 1, largest); + a0 = a.getToomSlice(k, r, 2, largest); + b2 = b.getToomSlice(k, r, 0, largest); + b1 = b.getToomSlice(k, r, 1, largest); + b0 = b.getToomSlice(k, r, 2, largest); + + BigInteger v0, v1, v2, vm1, vinf, t1, t2, tm1, da1, db1; + + v0 = a0.multiply(b0); + da1 = a2.add(a0); + db1 = b2.add(b0); + vm1 = da1.subtract(a1).multiply(db1.subtract(b1)); + da1 = da1.add(a1); + db1 = db1.add(b1); + v1 = da1.multiply(db1); + v2 = da1.add(a2).shiftLeft(1).subtract(a0).multiply( + db1.add(b2).shiftLeft(1).subtract(b0)); + vinf = a2.multiply(b2); + + /* The algorithm requires two divisions by 2 and one by 3. + All divisions are known to be exact, that is, they do not produce + remainders, and all results are positive. The divisions by 2 are + implemented as right shifts which are relatively efficient, leaving + only an exact division by 3, which is done by a specialized + linear-time algorithm. */ + t2 = v2.subtract(vm1).exactDivideBy3(); + tm1 = v1.subtract(vm1).shiftRight(1); + t1 = v1.subtract(v0); + t2 = t2.subtract(t1).shiftRight(1); + t1 = t1.subtract(tm1).subtract(vinf); + t2 = t2.subtract(vinf.shiftLeft(1)); + tm1 = tm1.subtract(t2); + + // Number of bits to shift left. + int ss = k*32; + + BigInteger result = vinf.shiftLeft(ss).add(t2).shiftLeft(ss).add(t1).shiftLeft(ss).add(tm1).shiftLeft(ss).add(v0); + + if (a.signum * b.signum == -1) + return result.negate(); + else + return result; + } + + + /** Returns a slice of a BigInteger for use in Toom-Cook multiplication. + @param lowerSize The size of the lower-order bit slices. + @param upperSize The size of the higher-order bit slices. + @param slice The index of which slice is requested, which must be a + number from 0 to size-1. Slice 0 is the highest-order bits, + and slice size-1 are the lowest-order bits. + Slice 0 may be of different size than the other slices. */ + private BigInteger getToomSlice(int lowerSize, int upperSize, int slice, int fullsize) + { + int start, end, sliceSize, len, offset; + + len = mag.length; + offset = fullsize - len; + + if (slice == 0) + { + start = 0 - offset; + end = upperSize - 1 - offset; + } + else + { + start = upperSize + (slice-1)*lowerSize - offset; + end = start + lowerSize - 1; + } + + if (start < 0) + start = 0; + if (end < 0) + return ZERO; + + sliceSize = (end-start) + 1; + + if (sliceSize <= 0) + return ZERO; + if (start==0 && sliceSize >= len) + return this.abs(); + + int intSlice[] = new int[sliceSize]; + System.arraycopy(mag, start, intSlice, 0, sliceSize); + + return new BigInteger(trustedStripLeadingZeroInts(intSlice), 1); + } + + /** Does an exact division (that is, the remainder is known to be zero) + of the specified number by 3. This is used in Toom-Cook + multiplication. This is an efficient algorithm that runs in linear + time. If the argument is not exactly divisible by 3, results are + undefined. */ + private BigInteger exactDivideBy3() + { + int len = mag.length; + int[] result = new int[len]; + long w, q, carry; + carry = 0L; + for (int i=len-1; i>=0; i--) + { + w = (mag[i] & LONG_MASK) - carry; + carry = 0L; + + // 0xAAAAAAAB is the modular inverse of 3 (mod 2^32). Thus, + // the effect of this is to divide by 3 (mod 2^32). + // This is much faster than division on most architectures. + q = (w * 0xAAAAAAABL) & LONG_MASK; + result[i] = (int) q; + + // Now check the carry. The carry may be 0, 1, or 2 after these + // two checks. The second check can of course be eliminated if + // the first fails. + if (q >= 0x55555556L) + { + carry++; + if (q >= 0xAAAAAAABL) + carry++; + } + } + result = trustedStripLeadingZeroInts(result); + return new BigInteger(result, signum); + } + + /** Returns a slice of a BigInteger for use in Karatsuba multiplication. + @param size The approximate size of each slice, in number of ints. + @param numSlices The number of pieces to slice the number into. + @param slice The index of which slice is requested, which must be a + number from 0 to size-1. Slice 0 is the highest-order bits, + and slice size-1 are the lowest-order bits. + Slice 0 may be of different size than the other slices. */ + public BigInteger getSlice(int size, int numSlices, int slice) + { + int len = mag.length; + int end = (len-1) - (numSlices - slice - 1) * size; + int start = end - size + 1; + if (slice == 0 || start < 0) + start = 0; // Expand slice 0 to contain remaining bits + + int slicelen = (end-start) + 1; + if (slicelen <= 0) + return ZERO; + if (start==0 && slicelen >= len) + return this; + + int intSlice[] = new int[slicelen]; + System.arraycopy(mag, start, intSlice, 0, slicelen); + + return new BigInteger(trustedStripLeadingZeroInts(intSlice), 1); + } + + /** + * Returns a new BigInteger representing n lower ints of the number. + * This is used by Karatsuba multiplication and squaring. + */ + private BigInteger getLower(int n) { + int len = mag.length; + + if (len <= n) + return this; + + int lowerInts[] = new int[n]; + System.arraycopy(mag, len-n, lowerInts, 0, n); + + return new BigInteger(trustedStripLeadingZeroInts(lowerInts), 1); + } + + /** + * Returns a new BigInteger representing mag.length-n upper + * ints of the number. This is used by Karatsuba multiplication and + * squaring. + */ + private BigInteger getUpper(int n) { + int len = mag.length; + + if (len <= n) + return ZERO; + + int upperLen = len - n; + int upperInts[] = new int[upperLen]; + System.arraycopy(mag, 0, upperInts, 0, upperLen); + + return new BigInteger(trustedStripLeadingZeroInts(upperInts), 1); + } + + /** * Returns a BigInteger whose value is {@code (this2)}. * * @return {@code this2} @@ -1256,8 +1580,18 @@ private BigInteger square() { if (signum == 0) return ZERO; - int[] z = squareToLen(mag, mag.length, null); - return new BigInteger(trustedStripLeadingZeroInts(z), 1); + int len = mag.length; + + if (len < KARATSUBA_SQUARE_THRESHOLD) + { + int[] z = squareToLen(mag, len, null); + return new BigInteger(trustedStripLeadingZeroInts(z), 1); + } + else + if (len < TOOM_COOK_SQUARE_THRESHOLD) + return squareKaratsuba(); + else + return squareToomCook3(); } /** @@ -1328,6 +1662,81 @@ } /** + * Squares a BigInteger using the Karatsuba squaring algorithm. It should + * be used when both numbers are larger than a certain threshold (found + * experimentally). It is a recursive divide-and-conquer algorithm that + * has better asymptotic performance than the algorithm used in + * squareToLen. + */ + private BigInteger squareKaratsuba() + { + int half = (mag.length+1) / 2; + + BigInteger xl = getLower(half); + BigInteger xh = getUpper(half); + + BigInteger xhs = xh.square(); // xhs = xh^2 + BigInteger xls = xl.square(); // xls = xl^2 + + // xh^2 << 64 + (((xl+xh)^2 - (xh^2 + xl^2)) << 32) + xl^2 + return xhs.shiftLeft(half*32).add(xl.add(xh).square().subtract(xhs.add(xls))).shiftLeft(half*32).add(xls); + } + + /** + * Squares a BigInteger using the 3-way Toom-Cook squaring algorithm. It + * should be used when both numbers are larger than a certain threshold + * (found experimentally). It is a recursive divide-and-conquer algorithm + * that has better asymptotic performance than the algorithm used in + * squareToLen or squareKaratsuba. + */ + private BigInteger squareToomCook3() + { + int len = mag.length; + + // k is the size (in ints) of the lower-order slices. + int k = (len+2)/3; // Equal to ceil(largest/3) + + // r is the size (in ints) of the highest-order slice. + int r = len - 2*k; + + // Obtain slices of the numbers. a2 is the most significant + // bits of the number, and a0 the least significant. + BigInteger a0, a1, a2; + a2 = getToomSlice(k, r, 0, len); + a1 = getToomSlice(k, r, 1, len); + a0 = getToomSlice(k, r, 2, len); + BigInteger v0, v1, v2, vm1, vinf, t1, t2, tm1, da1; + + v0 = a0.square(); + da1 = a2.add(a0); + vm1 = da1.subtract(a1).square(); + da1 = da1.add(a1); + v1 = da1.square(); + vinf = a2.square(); + v2 = da1.add(a2).shiftLeft(1).subtract(a0).square(); + + /* The algorithm requires two divisions by 2 and one by 3. + All divisions are known to be exact, that is, they do not produce + remainders, and all results are positive. The divisions by 2 are + implemented as right shifts which are relatively efficient, leaving + only a division by 3. + The division by 3 is done by an optimized algorithm for this case. + */ + t2 = v2.subtract(vm1).exactDivideBy3(); + tm1 = v1.subtract(vm1).shiftRight(1); + t1 = v1.subtract(v0); + t2 = t2.subtract(t1).shiftRight(1); + t1 = t1.subtract(tm1).subtract(vinf); + t2 = t2.subtract(vinf.shiftLeft(1)); + tm1 = tm1.subtract(t2); + + // Number of bits to shift left. + int ss = k*32; + + return vinf.shiftLeft(ss).add(t2).shiftLeft(ss).add(t1).shiftLeft(ss).add(tm1).shiftLeft(ss).add(v0); + } + + /** * Returns a BigInteger whose value is {@code (this / val)}. * * @param val value by which this BigInteger is to be divided.