diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -309,8 +309,10 @@ // and the caller wants to perform that instruction's operation on an // address that has displacement Offset. Return the opcode of a suitable // instruction (which might be Opcode itself) or 0 if no such instruction - // exists. - unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // exists. MI may be passed in order to allow examination of physical + // register operands (i.e. if a VR32/64 reg ended up as an FP or Vector reg). + unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset, + const MachineInstr *MI = nullptr) const; // Return true if Opcode has a mapping in 12 <-> 20 bit displacements. bool hasDisplacementPairInsn(unsigned Opcode) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1632,7 +1632,8 @@ } unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, - int64_t Offset) const { + int64_t Offset, + const MachineInstr *MI) const { const MCInstrDesc &MCID = get(Opcode); int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset); if (isUInt<12>(Offset) && isUInt<12>(Offset2)) { @@ -1654,6 +1655,24 @@ // Check whether Opcode allows signed 20-bit displacements. if (MCID.TSFlags & SystemZII::Has20BitOffset) return Opcode; + + // If a VR32/VR64 reg ended up in an FP register, use the FP opcode. + if (MI && MI->getOperand(0).isReg()) { + Register Reg = MI->getOperand(0).getReg(); + if (Reg.isPhysical() && SystemZMC::getFirstReg(Reg) < 16) { + switch (Opcode) { + case SystemZ::VL32: + return SystemZ::LEY; + case SystemZ::VST32: + return SystemZ::STEY; + case SystemZ::VL64: + return SystemZ::LDY; + case SystemZ::VST64: + return SystemZ::STDY; + default: break; + } + } + } } return 0; } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -321,7 +321,7 @@ // See if the offset is in range, or if an equivalent instruction that // accepts the offset exists. unsigned Opcode = MI->getOpcode(); - unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); + unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset, &*MI); if (OpcodeForOffset) { if (OpcodeForOffset == SystemZ::LE && MF.getSubtarget().hasVector()) { diff --git a/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll b/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that a spill/reload of a VR32/VR64 reg uses the FP opcode supporting +; 20-bit displacement if needed and possible. + +define void @f1(i32 %arg, ...) { +; CHECK-LABEL: f1: +; CHECK-NOT: lay +; CHECK: stdy %f0, 4400(%r15) +bb: + %i = alloca [4096 x i8] + ret void +} + +define void @f2(float %Arg) { +; CHECK-LABEL: f2: +; CHECK-NOT: lay +; CHECK: stey %f0, 4172(%r15) +bb: + %i = alloca [1000 x float] + %i2 = getelementptr inbounds [1000 x float], [1000 x float]* %i, i64 0, i64 999 + br i1 undef, label %bb3, label %bb2 + +bb2: + store float %Arg , float* %i2 + br label %bb3 + +bb3: + ret void +} + +define void @f3(double* %Dst) { +; CHECK-LABEL: f3: +; CHECK-NOT: lay +; CHECK: ldy %f0, 4168(%r15) +bb: + %i = alloca [500 x double] + br i1 undef, label %bb3, label %bb2 + +bb2: + %i12 = getelementptr inbounds [500 x double], [500 x double]* %i, i64 0, i64 499 + %i13 = load double, double* %i12 + %i14 = fdiv double %i13, 0.000000e+00 + store double %i14, double* %Dst + br label %bb3 + +bb3: + ret void +} + +define void @f4(float* %Dst) { +; CHECK-LABEL: f4: +; CHECK-NOT: lay +; CHECK: ley %f0, 4172(%r15) +bb: + %i = alloca [1000 x float] + br i1 undef, label %bb3, label %bb2 + +bb2: + %i12 = getelementptr inbounds [1000 x float], [1000 x float]* %i, i64 0, i64 999 + %i13 = load float, float* %i12 + %i14 = fdiv float %i13, 0.000000e+00 + store float %i14, float* %Dst + br label %bb3 + +bb3: + ret void +}