RFR: 8359965: Enable paired pushp and popp instruction usage for APX enabled CPUs

Volodymyr Paprotski vpaprotski at openjdk.org
Wed Jul 2 17:30:51 UTC 2025


On Tue, 1 Jul 2025 06:45:58 GMT, Jatin Bhateja <jbhateja at openjdk.org> wrote:

> For a cleaner interface, I think we can also maintain a RAII style APXPushPopPairTracker ...

Using the suggested code as a base, Vamsi and I tinkered with the idea some more! Here is what we came up with. This also tracks the correct order of registers being pushed/poped.. (haven't compiled it, so might have some syntax bugs).

@dholmes-ora would you mind sharing your opinion? We seem to be making things more complicated, but hopefully in a good way?

Also included a sample usage in a stub.


#define __ _masm->

class PushPopTracker {
   private:
      int _counter;
      MacroAssembler *_masm;
      const int REGS = 32; // Increase as needed
      int regs[REGS];
   public:
      PushPopTracker(MacroAssembler *_masm) : _counter(0), _masm(_masm) {}
      ~PushPopTracker() {
         assert(_counter == 0, "Push/pop pair mismatch");
      }

      void push(Register reg) {
         assert(_counter<REGS, "Push/pop overflow");
         regs[_counter++] = reg.encoding();
         if (VM_Version::supports_apx_f()) {
            __ pushp(reg);
         } else {
            __ push(reg);
         }
      }
      void pop(Register reg) {
         assert(_counter>0, "Push/pop underflow");
         assert(regs[_counter] == reg.encoding(), "Push/pop pair mismatch: %d != %d", regs[_counter], reg.encoding());
         _counter--;
         if (VM_Version::supports_apx_f()) {
            __ popp(reg);
         } else {
            __ pop(reg);
         }
      }
}

address StubGenerator::generate_intpoly_montgomeryMult_P256() {
  __ align(CodeEntryAlignment);
  /*...*/
  address start = __ pc();
  __ enter();
  PushPopTracker s(_masm);
  s.push(r12); //1
  s.push(r13); //2
  s.push(r14); //3
  #ifdef _WIN64
  s.push(rsi); //4
  s.push(rdi); //5
  #endif
  s.push(rbp); //6
  __ movq(rbp, rsp);
  __ andq(rsp, -32);
  __ subptr(rsp, 32);
  // Register Map
  const Register aLimbs  = c_rarg0; // c_rarg0: rdi | rcx
  const Register bLimbs  = rsi;     // c_rarg1: rsi | rdx
  const Register rLimbs  = r8;      // c_rarg2: rdx | r8
  const Register tmp1    = r9;
  const Register tmp2    = r10;
  /*...*/
  __ movq(rsp, rbp);
  s.pop(rbp); //5
  #ifdef _WIN64
  s.pop(rdi); //4
  s.pop(rsi); //3
  #endif
  s.pop(r14); //2
  s.pop(r13); //1
  s.pop(r12); //0
  __ leave();
  __ ret(0);
  return start;
}

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/25889#discussion_r2180606586


More information about the hotspot-compiler-dev mailing list