RFR 8017540: Improve multi-threaded contention behavior of BigInteger.toString() radix conversion cache

Thu Jun 27 08:51:45 UTC 2013

Hi,

Expanding on the idea of building single growing linked list of values 
and treating the array just as index for quick access which can be 
re-created at any time without synchronization or volatile publication, 
here's the attempt:

     private static class Node {
         final BigInteger value;
         Node next;
         Node(BigInteger value) { this.value = value; }
     }

     private static final Node[] powerCache;
     private static final Node[][] powerCacheIndex;
     private static final int POWER_CACHE_LINE_CHUNK = 16;

     static {
         powerCache = new Node[Character.MAX_RADIX + 1];
         for (int i = Character.MIN_RADIX; i <= Character.MAX_RADIX; i++) {
             powerCache[i] = new Node(BigInteger.valueOf(i));
         }
         powerCacheIndex = new Node[Character.MAX_RADIX + 1][];
     }

     private static BigInteger getRadixConversionCache(int radix, int 
exponent) {
         Node[] cacheLine = powerCacheIndex[radix];
         if (cacheLine != null && exponent < cacheLine.length) { // 
cache line is long enough
             Node node = cacheLine[exponent];
             if (node != null) {
                 return node.value;
             }
             return fillCacheLine(cacheLine, powerCache[radix], exponent);
         }
         else { // we need to extend / create cache line
             cacheLine = new Node[(exponent / POWER_CACHE_LINE_CHUNK + 
1) * POWER_CACHE_LINE_CHUNK];
             BigInteger result = fillCacheLine(cacheLine, 
powerCache[radix], exponent);
             powerCacheIndex[radix] = cacheLine; // install new line
             return result;
         }
     }

     private static BigInteger fillCacheLine(Node[] cacheLine, Node 
node0, int exponent) {
         Node node = cacheLine[0] = node0;
         for (int i = 1; i <= exponent; i++) {
             Node nextNode;
             synchronized (node) {
                 nextNode = node.next;
                 if (nextNode == null) {
                     nextNode = new Node(node.value.square());
                     node.next = nextNode;
                 }
             }
             cacheLine[i] = node = nextNode;
         }
         return node.value;
     }

What do you think?

Regards, Peter

On 06/27/2013 09:27 AM, Peter Levart wrote:
> On 06/27/2013 08:37 AM, Peter Levart wrote:
>> Hi,
>>
>> This version seems correct. Maybe just replace double volatile read 
>> at length re-check with a single one:
>>
>>
>>     private static BigInteger getRadixConversionCache(int radix, int 
>> exponent) {
>>         BigInteger[] cacheLine = powerCache[radix]; // volatile read
>>         if (exponent < cacheLine.length)
>>             return cacheLine[exponent];
>>
>>         int oldLength = cacheLine.length;
>>         cacheLine = Arrays.copyOf(cacheLine, exponent + 1);
>>         for (int i = oldLength; i <= exponent; i++)
>>             cacheLine[i] = cacheLine[i - 1].pow(2);
>>
>>         BigInteger[][] pc = powerCache; // volatile read again
>>         if (exponent >= pc[radix].length) {
>>             pc = pc.clone();
>>             pc[radix] = cacheLine;
>>             powerCache = pc; // volatile write, publish
>>         }
>>         return cacheLine[exponent];
>>     }
>>
>>
>> I like this, since it tries to avoid overwriting larger cacheLines 
>> with smaller ones when unexistent exponents are requested 
>> concurrently for same radix. This is good, since computation for 
>> larger exponents takes quadratically longer time (as Alan Eliasen 
>> points out) and possibility of concurrent threads stomping on each 
>> other increases. Re-reading and cloning powerCache reference at end 
>> of computation also takes care of cacheLines for other radixes that 
>> might have expanded while the computation was taking place. So the 
>> only overhead remaining is when concurrent threads are uselessly 
>> computing same results at same time. To avoid this, locking and 
>> waiting would have to be introduced which would *complicate things*.
>>
>> Regards, Peter
>
> On the other hand, it doesn't complicate thing too much. So if this 
> extra CPU time proves to be a problem, here's a variation of above 
> code which prevents multiple threads from calculating the same result 
> at the same time, by serializing computation with precise granularity:
>
>     private static class Node {
>         final BigInteger value;
>         Node next;
>         Node(BigInteger value) { this.value = value; }
>     }
>
>     private static volatile Node[][] powerCache;
>
>     static {
>         powerCache = new Node[Character.MAX_RADIX + 1][];
>         for (int i = Character.MIN_RADIX; i <= Character.MAX_RADIX; i++) {
>             powerCache[i] = new Node[]{new Node(BigInteger.valueOf(i))};
>         }
>     }
>
>     private static BigInteger getRadixConversionCache(int radix, int 
> exponent) {
>         Node[] cacheLine = powerCache[radix]; // volatile read
>         if (exponent < cacheLine.length)
>             return cacheLine[exponent].value;
>
>         int oldLength = cacheLine.length;
>         cacheLine = Arrays.copyOf(cacheLine, exponent + 1);
>         Node prevNode = cacheLine[oldLength - 1];
>         for (int i = oldLength; i <= exponent; i++) {
>             Node node;
>             synchronized (prevNode) {
>                 node = prevNode.next;
>                 if (node == null) {
>                     node = new Node(prevNode.value.pow(2));
>                     prevNode.next = node;
>                 }
>             }
>             cacheLine[i] = prevNode = node;
>         }
>
>         Node[][] pc = powerCache; // volatile read again
>         if (exponent >= pc[radix].length) {
>             pc = pc.clone();
>             pc[radix] = cacheLine;
>             powerCache = pc; // volatile write, publish
>         }
>         return cacheLine[exponent].value;
>     }
>
>
> This variation differs only in a subtelty. It wraps each BigInteger 
> with a Node, which has a link to next node in chain. Computation of 
> next node is serailized using previous node as a lock. So although 
> Nodes can end up in several arrays (which are used as index for quick 
> access), there is only a single growing chain of Nodes per radix and 
> each Node is computed by single thread.
>
> Regards, Peter
>
>>
>> On 06/26/2013 08:13 PM, Brian Burkhalter wrote:
>>> So do we have consensus on this version?
>>>
>>> Thanks for the lively "conversation."
>>>
>>> Brian
>>>
>>> On Jun 26, 2013, at 12:05 AM, Aleksey Shipilev wrote:
>>>
>>>> Yes, like that.
>>>>
>>>> -Aleksey
>>>>
>>>> On 26.06.2013, at 10:53, Dmitry Nadezhin 
>>>> <dmitry.nadezhin at gmail.com> wrote:
>>>>
>>>>>> We could check for the existing cacheLine.length right before 
>>>>>> installing
>>>>> the new one
>>>>>
>>>>> Do you mean something like this ?
>>>>>
>>>>> BigInteger getRadixConversionCache(int radix, int exponent) {
>>>>> BigInteger[] cacheLine = powerCache[radix]; // volatile read
>>>>> if (exponent < cacheLine.length)
>>>>>     return cacheLine[exponent];
>>>>>
>>>>> int oldLength = cacheLine.length;
>>>>> cacheLine = Arrays.copyOf(cacheLine, exponent + 1);
>>>>> for (int i = oldLength; i < exponent + 1; i++)
>>>>>     cacheLine[i] = cacheLine[i - 1].square();
>>>>>
>>>>> if (exponent >= powerCache[radix].length) { // volatile read again
>>>>>    BigInteger[][] pc = Arrays.copyOf(powerCache);
>>>>>    pc[radix] = cacheLine;
>>>>>    powerCache = pc; // volatile write, publish
>>>>> }
>>>>> return cacheLine[exponent];
>>>>> }
>>
>