RFR: 8353500: [s390x] Intrinsify Unsafe::setMemory [v4]
Amit Kumar
amitkumar at openjdk.org
Tue Apr 29 14:47:51 UTC 2025
On Tue, 29 Apr 2025 09:02:45 GMT, Martin Doerr <mdoerr at openjdk.org> wrote:
> How is the behavior of mvc specified when hitting a signal (SIGSEGV or SIGBUS)? Will all Bytes before that place be written? I guess that is the reason why the C++ code uses the atomic version. Did you check which instructions are generated by the C++ compiler?
I couldn't find out any such information in book but I will try to find. @RealLucy can you help :-)
I looked at the cpp generated code initially. I see some further improvements being generated but that will come with additional checks. But in the end you can see the `mvc` instruction being used for unaligned case:
(gdb) disassemble Copy::fill_to_memory_atomic(void*, unsigned long, unsigned char)
Dump of assembler code for function _ZN4Copy21fill_to_memory_atomicEPvmh:
0x000003fffc7b7358 <+0>: lgr %r0,%r2
0x000003fffc7b735c <+4>: ogr %r0,%r3
0x000003fffc7b7360 <+8>: lgr %r1,%r4
0x000003fffc7b7364 <+12>: risbgz %r5,%r0,61,63,0
0x000003fffc7b736a <+18>: jne 0x3fffc7b73aa <_ZN4Copy21fill_to_memory_atomicEPvmh+82>
0x000003fffc7b736e <+22>: cgijne %r4,0,0x3fffc7b756e <_ZN4Copy21fill_to_memory_atomicEPvmh+534>
0x000003fffc7b7374 <+28>: lghi %r4,0
0x000003fffc7b7378 <+32>: cgije %r3,0,0x3fffc7b7598 <_ZN4Copy21fill_to_memory_atomicEPvmh+576>
0x000003fffc7b737e <+38>: lgr %r1,%r3
0x000003fffc7b7382 <+42>: aghi %r1,-1
0x000003fffc7b7386 <+46>: aghi %r3,7
0x000003fffc7b738a <+50>: srlg %r1,%r1,3
0x000003fffc7b7390 <+56>: aghi %r1,1
0x000003fffc7b7394 <+60>: clgijnh %r3,7,0x3fffc7b7590 <_ZN4Copy21fill_to_memory_atomicEPvmh+568>
0x000003fffc7b739a <+66>: stg %r4,0(%r5,%r2)
0x000003fffc7b73a0 <+72>: aghi %r5,8
0x000003fffc7b73a4 <+76>: brctg %r1,0x3fffc7b739a <_ZN4Copy21fill_to_memory_atomicEPvmh+66>
0x000003fffc7b73a8 <+80>: br %r14
0x000003fffc7b73aa <+82>: risbgz %r4,%r0,62,63,0
0x000003fffc7b73b0 <+88>: ldgr %f0,%r11
0x000003fffc7b73b4 <+92>: ldgr %f1,%r7
0x000003fffc7b73b8 <+96>: ldgr %f6,%r8
0x000003fffc7b73bc <+100>: ldgr %f4,%r9
0x000003fffc7b73c0 <+104>: ldgr %f2,%r10
0x000003fffc7b73c4 <+108>: lgr %r11,%r15
0x000003fffc7b73c8 <+112>: jne 0x3fffc7b747c <_ZN4Copy21fill_to_memory_atomicEPvmh+292>
0x000003fffc7b73cc <+116>: cgijne %r1,0,0x3fffc7b759a <_ZN4Copy21fill_to_memory_atomicEPvmh+578>
0x000003fffc7b73d2 <+122>: lhi %r5,0
0x000003fffc7b73d6 <+126>: cgije %r3,0,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b73dc <+132>: lghi %r0,-4
0x000003fffc7b73e0 <+136>: lgr %r1,%r3
0x000003fffc7b73e4 <+140>: aghi %r1,-1
0x000003fffc7b73e8 <+144>: clgrjh %r3,%r0,0x3fffc7b7626 <_ZN4Copy21fill_to_memory_atomicEPvmh+718>
0x000003fffc7b73ee <+150>: clgijnh %r1,103,0x3fffc7b7626 <_ZN4Copy21fill_to_memory_atomicEPvmh+718>
0x000003fffc7b73f4 <+156>: srlg %r0,%r1,2
0x000003fffc7b73fa <+162>: risbgz %r3,%r2,63,63,62
0x000003fffc7b7400 <+168>: aghi %r0,1
0x000003fffc7b7404 <+172>: cgije %r3,0,0x3fffc7b760e <_ZN4Copy21fill_to_memory_atomicEPvmh+694>
0x000003fffc7b740a <+178>: st %r5,0(%r2)
0x000003fffc7b740e <+182>: lghi %r8,4
0x000003fffc7b7412 <+186>: sgr %r0,%r3
0x000003fffc7b7416 <+190>: sllg %r3,%r3,2
0x000003fffc7b741c <+196>: lghi %r9,0
--Type <RET> for more, q to quit, c to continue without paging--c
0x000003fffc7b7420 <+200>: risbg %r9,%r5,0,31,32
0x000003fffc7b7426 <+206>: lr %r9,%r5
0x000003fffc7b7428 <+208>: srlg %r1,%r0,1
0x000003fffc7b742e <+214>: la %r3,0(%r3,%r2)
0x000003fffc7b7432 <+218>: cgije %r1,0,0x3fffc7b7606 <_ZN4Copy21fill_to_memory_atomicEPvmh+686>
0x000003fffc7b7438 <+224>: sllg %r10,%r4,3
0x000003fffc7b743e <+230>: aghi %r4,1
0x000003fffc7b7442 <+234>: stg %r9,0(%r10,%r3)
0x000003fffc7b7448 <+240>: brctg %r1,0x3fffc7b7438 <_ZN4Copy21fill_to_memory_atomicEPvmh+224>
0x000003fffc7b744c <+244>: risbgz %r3,%r0,0,62,0
0x000003fffc7b7452 <+250>: sllg %r1,%r3,2
0x000003fffc7b7458 <+256>: agr %r1,%r8
0x000003fffc7b745c <+260>: cgrje %r0,%r3,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b7462 <+266>: st %r5,0(%r1,%r2)
0x000003fffc7b7466 <+270>: lgdr %r11,%f0
0x000003fffc7b746a <+274>: lgdr %r10,%f2
0x000003fffc7b746e <+278>: lgdr %r9,%f4
0x000003fffc7b7472 <+282>: lgdr %r8,%f6
0x000003fffc7b7476 <+286>: lgdr %r7,%f1
0x000003fffc7b747a <+290>: br %r14
0x000003fffc7b747c <+292>: risbgz %r4,%r0,63,63,0
0x000003fffc7b7482 <+298>: lr %r5,%r1
0x000003fffc7b7484 <+300>: jne 0x3fffc7b75ae <_ZN4Copy21fill_to_memory_atomicEPvmh+598>
0x000003fffc7b7488 <+304>: lr %r0,%r1
0x000003fffc7b748a <+306>: sll %r0,8
0x000003fffc7b748e <+310>: ar %r0,%r1
0x000003fffc7b7490 <+312>: lr %r5,%r0
0x000003fffc7b7492 <+314>: cgije %r3,0,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b7498 <+320>: lgr %r1,%r3
0x000003fffc7b749c <+324>: aghi %r1,-1
0x000003fffc7b74a0 <+328>: clgijnh %r1,39,0x3fffc7b7674 <_ZN4Copy21fill_to_memory_atomicEPvmh+796>
0x000003fffc7b74a6 <+334>: cgije %r3,-1,0x3fffc7b7674 <_ZN4Copy21fill_to_memory_atomicEPvmh+796>
0x000003fffc7b74ac <+340>: srlg %r5,%r2,1
0x000003fffc7b74b2 <+346>: srlg %r9,%r1,1
0x000003fffc7b74b8 <+352>: lcgr %r5,%r5
0x000003fffc7b74bc <+356>: aghi %r9,1
0x000003fffc7b74c0 <+360>: risbgz %r5,%r5,62,63,0
0x000003fffc7b74c6 <+366>: je 0x3fffc7b766c <_ZN4Copy21fill_to_memory_atomicEPvmh+788>
0x000003fffc7b74ca <+370>: sth %r0,0(%r2)
0x000003fffc7b74ce <+374>: cgije %r5,1,0x3fffc7b7664 <_ZN4Copy21fill_to_memory_atomicEPvmh+780>
0x000003fffc7b74d4 <+380>: sth %r0,2(%r2)
0x000003fffc7b74d8 <+384>: cgijne %r5,3,0x3fffc7b7616 <_ZN4Copy21fill_to_memory_atomicEPvmh+702>
0x000003fffc7b74de <+390>: sth %r0,4(%r2)
0x000003fffc7b74e2 <+394>: lghi %r7,6
0x000003fffc7b74e6 <+398>: lghi %r8,0
0x000003fffc7b74ea <+402>: risbg %r8,%r0,0,15,48
0x000003fffc7b74f0 <+408>: sgr %r9,%r5
0x000003fffc7b74f4 <+412>: sllg %r5,%r5,1
0x000003fffc7b74fa <+418>: risbg %r8,%r0,16,31,32
0x000003fffc7b7500 <+424>: srlg %r1,%r9,2
0x000003fffc7b7506 <+430>: la %r5,0(%r5,%r2)
0x000003fffc7b750a <+434>: risbg %r8,%r0,32,47,16
0x000003fffc7b7510 <+440>: risbg %r8,%r0,48,63,0
0x000003fffc7b7516 <+446>: cgije %r1,0,0x3fffc7b761e <_ZN4Copy21fill_to_memory_atomicEPvmh+710>
0x000003fffc7b751c <+452>: sllg %r10,%r4,3
0x000003fffc7b7522 <+458>: aghi %r4,1
0x000003fffc7b7526 <+462>: stg %r8,0(%r10,%r5)
0x000003fffc7b752c <+468>: brctg %r1,0x3fffc7b751c <_ZN4Copy21fill_to_memory_atomicEPvmh+452>
0x000003fffc7b7530 <+472>: risbgz %r4,%r9,0,61,0
0x000003fffc7b7536 <+478>: sllg %r1,%r4,1
0x000003fffc7b753c <+484>: agr %r1,%r7
0x000003fffc7b7540 <+488>: cgrje %r9,%r4,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b7546 <+494>: sth %r0,0(%r1,%r2)
0x000003fffc7b754a <+498>: lgr %r4,%r1
0x000003fffc7b754e <+502>: aghi %r4,2
0x000003fffc7b7552 <+506>: clgrjnl %r4,%r3,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b7558 <+512>: sth %r0,2(%r1,%r2)
0x000003fffc7b755c <+516>: la %r4,2(%r4)
0x000003fffc7b7560 <+520>: clgrjnl %r4,%r3,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b7566 <+526>: sth %r0,4(%r1,%r2)
0x000003fffc7b756a <+530>: j 0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b756e <+534>: sllg %r4,%r4,8
0x000003fffc7b7574 <+540>: agr %r4,%r1
0x000003fffc7b7578 <+544>: sllg %r1,%r4,16
0x000003fffc7b757e <+550>: agr %r4,%r1
0x000003fffc7b7582 <+554>: sllg %r1,%r4,32
0x000003fffc7b7588 <+560>: agr %r4,%r1
0x000003fffc7b758c <+564>: j 0x3fffc7b7378 <_ZN4Copy21fill_to_memory_atomicEPvmh+32>
0x000003fffc7b7590 <+568>: lghi %r1,1
0x000003fffc7b7594 <+572>: j 0x3fffc7b739a <_ZN4Copy21fill_to_memory_atomicEPvmh+66>
0x000003fffc7b7598 <+576>: br %r14
0x000003fffc7b759a <+578>: lr %r5,%r1
0x000003fffc7b759c <+580>: sll %r5,8
0x000003fffc7b75a0 <+584>: ar %r5,%r1
0x000003fffc7b75a2 <+586>: lr %r1,%r5
0x000003fffc7b75a4 <+588>: sll %r1,16
0x000003fffc7b75a8 <+592>: ar %r5,%r1
0x000003fffc7b75aa <+594>: j 0x3fffc7b73d6 <_ZN4Copy21fill_to_memory_atomicEPvmh+126>
0x000003fffc7b75ae <+598>: cgije %r3,0,0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b75b4 <+604>: lgr %r4,%r2
0x000003fffc7b75b8 <+608>: cgije %r3,1,0x3fffc7b76b2 <_ZN4Copy21fill_to_memory_atomicEPvmh+858>
0x000003fffc7b75be <+614>: aghi %r3,-2
0x000003fffc7b75c2 <+618>: srlg %r2,%r3,8
0x000003fffc7b75c8 <+624>: cgije %r2,0,0x3fffc7b75e6 <_ZN4Copy21fill_to_memory_atomicEPvmh+654>
0x000003fffc7b75ce <+630>: pfd 2,1024(%r4)
0x000003fffc7b75d4 <+636>: stc %r5,0(%r4)
0x000003fffc7b75d8 <+640>: mvc 1(255,%r4),0(%r4)
0x000003fffc7b75de <+646>: la %r4,256(%r4)
0x000003fffc7b75e2 <+650>: brctg %r2,0x3fffc7b75ce <_ZN4Copy21fill_to_memory_atomicEPvmh+630>
0x000003fffc7b75e6 <+654>: stc %r1,0(%r4)
0x000003fffc7b75ea <+658>: exrl %r3,0x3fffc7b76ba <_ZN4Copy21fill_to_memory_atomicEPvmh+866>
0x000003fffc7b75f0 <+664>: lgdr %r11,%f0
0x000003fffc7b75f4 <+668>: lgdr %r10,%f2
0x000003fffc7b75f8 <+672>: lgdr %r9,%f4
0x000003fffc7b75fc <+676>: lgdr %r8,%f6
0x000003fffc7b7600 <+680>: lgdr %r7,%f1
0x000003fffc7b7604 <+684>: br %r14
0x000003fffc7b7606 <+686>: lghi %r1,1
0x000003fffc7b760a <+690>: j 0x3fffc7b7438 <_ZN4Copy21fill_to_memory_atomicEPvmh+224>
0x000003fffc7b760e <+694>: lghi %r8,0
0x000003fffc7b7612 <+698>: j 0x3fffc7b7412 <_ZN4Copy21fill_to_memory_atomicEPvmh+186>
0x000003fffc7b7616 <+702>: lghi %r7,4
0x000003fffc7b761a <+706>: j 0x3fffc7b74e6 <_ZN4Copy21fill_to_memory_atomicEPvmh+398>
0x000003fffc7b761e <+710>: lghi %r1,1
0x000003fffc7b7622 <+714>: j 0x3fffc7b751c <_ZN4Copy21fill_to_memory_atomicEPvmh+452>
0x000003fffc7b7626 <+718>: lgr %r1,%r3
0x000003fffc7b762a <+722>: aghi %r1,-1
0x000003fffc7b762e <+726>: lgr %r0,%r3
0x000003fffc7b7632 <+730>: aghi %r0,3
0x000003fffc7b7636 <+734>: srlg %r1,%r1,2
0x000003fffc7b763c <+740>: aghi %r1,1
0x000003fffc7b7640 <+744>: clgijnh %r0,3,0x3fffc7b765c <_ZN4Copy21fill_to_memory_atomicEPvmh+772>
0x000003fffc7b7646 <+750>: cgije %r3,0,0x3fffc7b765c <_ZN4Copy21fill_to_memory_atomicEPvmh+772>
0x000003fffc7b764c <+756>: st %r5,0(%r4,%r2)
0x000003fffc7b7650 <+760>: aghi %r4,4
0x000003fffc7b7654 <+764>: brctg %r1,0x3fffc7b764c <_ZN4Copy21fill_to_memory_atomicEPvmh+756>
0x000003fffc7b7658 <+768>: j 0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b765c <+772>: lghi %r1,1
0x000003fffc7b7660 <+776>: j 0x3fffc7b764c <_ZN4Copy21fill_to_memory_atomicEPvmh+756>
0x000003fffc7b7664 <+780>: lghi %r7,2
0x000003fffc7b7668 <+784>: j 0x3fffc7b74e6 <_ZN4Copy21fill_to_memory_atomicEPvmh+398>
0x000003fffc7b766c <+788>: lghi %r7,0
0x000003fffc7b7670 <+792>: j 0x3fffc7b74e6 <_ZN4Copy21fill_to_memory_atomicEPvmh+398>
0x000003fffc7b7674 <+796>: lgr %r1,%r3
0x000003fffc7b7678 <+800>: aghi %r1,-1
0x000003fffc7b767c <+804>: lgr %r0,%r3
0x000003fffc7b7680 <+808>: aghi %r0,1
0x000003fffc7b7684 <+812>: srlg %r1,%r1,1
0x000003fffc7b768a <+818>: aghi %r1,1
0x000003fffc7b768e <+822>: clgijnh %r0,1,0x3fffc7b76aa <_ZN4Copy21fill_to_memory_atomicEPvmh+850>
0x000003fffc7b7694 <+828>: cgije %r3,0,0x3fffc7b76aa <_ZN4Copy21fill_to_memory_atomicEPvmh+850>
0x000003fffc7b769a <+834>: sth %r5,0(%r4,%r2)
0x000003fffc7b769e <+838>: aghi %r4,2
0x000003fffc7b76a2 <+842>: brctg %r1,0x3fffc7b769a <_ZN4Copy21fill_to_memory_atomicEPvmh+834>
0x000003fffc7b76a6 <+846>: j 0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b76aa <+850>: lghi %r1,1
0x000003fffc7b76ae <+854>: j 0x3fffc7b769a <_ZN4Copy21fill_to_memory_atomicEPvmh+834>
0x000003fffc7b76b2 <+858>: stc %r1,0(%r2)
0x000003fffc7b76b6 <+862>: j 0x3fffc7b7466 <_ZN4Copy21fill_to_memory_atomicEPvmh+270>
0x000003fffc7b76ba <+866>: mvc 1(1,%r4),0(%r4)
-------------
PR Comment: https://git.openjdk.org/jdk/pull/24480#issuecomment-2839202077
More information about the hotspot-compiler-dev
mailing list