RFR: 8352075: Perf regression accessing fields [v23]
Johan Sjölen
jsjolen at openjdk.org
Tue Jun 10 12:52:45 UTC 2025
On Mon, 9 Jun 2025 15:36:14 GMT, Radim Vansa <rvansa at openjdk.org> wrote:
>> To integrate hotspot changes, you need two reviewers and people 'requesting changes' to withdraw their requests. Thank goodness the bots prevented this from being integrated. You need to wait for all the comments to be resolved.
>> This is a P3 bug so you have more time to get this integrated for JDK 25. I posted the schedule in the issue. The process is that this change would be integrated into the main repository (destined for JDK 26 and then slash-backported to JDK 25 a couple days later if testing is clean).
>> My tier 1-7 testing passes with the dynamic CDS patch above.
>
> @coleenp Can't find the comment to reply... I've replaced all `_r.next_uint()` with just `next_uint()`, it's a bikeshed argument.
Hi @rvansa ,
What about this type of API for dealing with the compressed table? We do the 8 byte accesses as unsigned chars (important so that they're 0-extended and not sign extended) and write the compressed KV down in little-endian. We use bitwise OR to not squash whatever was there before. Comparing GCC x64 and PPC (power), it looks good.
```c++
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
// For this hard-coded example we use 2 byte keys
// and 3 byte values -- for an element_bytes of 5
uint32_t key_mask = (1 << 16) - 1;
uint32_t value_mask = (1 << 24) - 1;
uint32_t value_shift = 16;
uint32_t element_bytes = 5;
uint64_t* kv_array;
uint32_t unpack_key(uint64_t kv) {
return kv & key_mask;
}
uint32_t unpack_value(uint64_t kv) { return (kv >> value_shift) & value_mask; }
uint64_t pack_kv(uint64_t k, uint64_t v) {
return k | (v << value_shift);
}
uint32_t align_down(uint32_t x, uint32_t align) { return x & -align; }
uint32_t align_up(uint32_t x, uint32_t align) {
return (x + (align - 1)) & -align;
}
uint64_t u64s_required(int n) {
uint32_t bytes_required = element_bytes * n;
return align_up(bytes_required, 8) / 8;
}
uint64_t read_u8(uint8_t* p) {
uint64_t result = 0;
result |= ((uint64_t)*(p + 0)) << (0 * 8);
result |= ((uint64_t)*(p + 1)) << (1 * 8);
result |= ((uint64_t)*(p + 2)) << (2 * 8);
result |= ((uint64_t)*(p + 3)) << (3 * 8);
result |= ((uint64_t)*(p + 4)) << (4 * 8);
result |= ((uint64_t)*(p + 5)) << (5 * 8);
result |= ((uint64_t)*(p + 6)) << (6 * 8);
result |= ((uint64_t)*(p + 7)) << (7 * 8);
return result;
}
void write_u8(uint8_t* p, uint64_t u8) {
p[0] |= u8 & 0xFF;
p[1] |= (u8 >> 8*1) & 0xFF;
p[2] |= (u8 >> 8*2) & 0xFF;
p[3] |= (u8 >> 8*3) & 0xFF;
p[4] |= (u8 >> 8*4) & 0xFF;
p[5] |= (u8 >> 8*5) & 0xFF;
p[6] |= (u8 >> 8*6) & 0xFF;
p[7] |= (u8 >> 8*7) & 0xFF;
}
uint64_t read(int n) {
uint32_t byte_index = element_bytes * n;
return read_u8(&reinterpret_cast<uint8_t*>(kv_array)[byte_index]);
}
void fill(int n, uint64_t kv) {
uint32_t byte_index = element_bytes * n;
write_u8(&reinterpret_cast<uint8_t*>(kv_array)[byte_index], kv);
return;
}
int main() {
int num_elts = 65536;
uint64_t sz = u64s_required(num_elts);
kv_array = (uint64_t*)malloc(sz * 8);
for (int i = 0; i < sz; i++) {
kv_array[i] = 0;
}
for (int i = 0; i < num_elts; i++) {
uint64_t kv = pack_kv(i, 2 * i);
fill(i, kv);
}
for (int i = 0; i < num_elts; i++) {
uint64_t kv = read(i);
uint32_t k = unpack_key(kv);
uint32_t v = unpack_value(kv);
printf("K: %d, V: %d\n", k, v);
}
}
-------------
PR Comment: https://git.openjdk.org/jdk/pull/24847#issuecomment-2959079093
More information about the hotspot-dev
mailing list