Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -638,6 +638,9 @@ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; + MachineBasicBlock *emitFPScalarImm(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned BitWidth) const; const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -577,9 +577,34 @@ return false; } +static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start, + unsigned &End, const SystemZInstrInfo *TII) { + APInt IntImm = Imm.bitcastToAPInt(); + if (IntImm.getActiveBits() > 64) + return false; + + // See if this immediate could be generated with VGM. + bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End); + if (!Success) + return false; + // isRxSBGMask returns the bit numbers for a full 64-bit value, + // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to + // bit numbers for an BitsPerElement value, so that 0 denotes + // 1 << (BitsPerElement-1). + Start -= 64 - BitWidth; + End -= 64 - BitWidth; + return true; +} + bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. - return Imm.isZero() || Imm.isNegZero(); + if (Imm.isZero() || Imm.isNegZero()) + return true; + + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + unsigned Start, End; + return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII); } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -7172,6 +7197,33 @@ return MBB; } +MachineBasicBlock *SystemZTargetLowering::emitFPScalarImm( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned BitWidth) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + + APFloat Imm = MI.getOperand(1).getFPImm()->getValueAPF(); + assert(!Imm.isZero() && !Imm.isNegZero() && "Expected non-zero FP immediate"); + unsigned Start, End; + bool Success = analyzeFPImm(Imm, BitWidth, Start, End, TII); + assert(Success && "Can't build FP immediate."); + unsigned V128Reg = MRI->createVirtualRegister(&SystemZ::VF128BitRegClass); + unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG); + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(Opcode), V128Reg) + .addImm(Start) + .addImm(End); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32 + : SystemZ::subreg_h64); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(V128Reg, RegState::Kill, SubRegIdx); + MI.eraseFromParent(); + return MBB; +} + MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { @@ -7436,6 +7488,11 @@ case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); + case SystemZ::FP32ScalarImmPseudo: + return emitFPScalarImm(MI, MBB, 32); + case SystemZ::FP64ScalarImmPseudo: + return emitFPScalarImm(MI, MBB, 64); + default: llvm_unreachable("Unexpected instr type to insert"); } Index: lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFP.td +++ lib/Target/SystemZ/SystemZInstrFP.td @@ -41,6 +41,15 @@ def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>; } +// Load scalar floating-point immediate with a VGM. +let isAsCheapAsAMove = 1, isMoveImm = 1, usesCustomInserter = 1, + hasNoSchedulingInfo = 1 in { + def FP32ScalarImmPseudo : Pseudo<(outs FP32:$R1), (ins FP32:$Imm), + [(set FP32:$R1, (fpimm:$Imm))]>; + def FP64ScalarImmPseudo : Pseudo<(outs FP64:$R1), (ins FP64:$Imm), + [(set FP64:$R1, (fpimm:$Imm))]>; +} + // Moves between two floating-point registers. def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; Index: test/CodeGen/SystemZ/args-07.ll =================================================================== --- test/CodeGen/SystemZ/args-07.ll +++ test/CodeGen/SystemZ/args-07.ll @@ -29,13 +29,11 @@ define { double, double, double, double } @f3() { ; CHECK-LABEL: f3: ; CHECK: larl [[TMP:%r[0-5]]], .LCPI -; CHECK: ldeb %f0, 0([[TMP]]) -; CHECK: larl [[TMP:%r[0-5]]], .LCPI -; CHECK: ldeb %f2, 0([[TMP]]) -; CHECK: larl [[TMP:%r[0-5]]], .LCPI ; CHECK: ldeb %f4, 0([[TMP]]) ; CHECK: larl [[TMP:%r[0-5]]], .LCPI ; CHECK: ldeb %f6, 0([[TMP]]) +; CHECK: vgmg %v0, 2, 11 +; CHECK: vgmg %v2, 1, 1 ; CHECK: br %r14 ret { double, double, double, double } { double 1.0, double 2.0, double 3.0, double 4.0 } Index: test/CodeGen/SystemZ/fp-const-12.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/fp-const-12.ll @@ -0,0 +1,63 @@ +; Test loads of FP constants with VGM. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define double @f1() { +; CHECK-LABEL: f1: +; CHECK: vgmg %v0, 2, 11 + ret double 1.0 +} + +define double @f2() { +; CHECK-LABEL: f2: +; CHECK: vgmg %v0, 1, 1 + ret double 2.0 +} + +define double @f3() { +; CHECK-LABEL: f3: +; CHECK: vgmg %v0, 0, 1 + ret double -2.0 +} + +define double @f4() { +; CHECK-LABEL: f4: +; CHECK: vgmg %v0, 2, 10 + ret double 0.5 +} + +define double @f5() { +; CHECK-LABEL: f5: +; CHECK: vgmg %v0, 2, 9 + ret double 0.125 +} + +define float @f6() { +; CHECK-LABEL: f6: +; CHECK: vgmf %v0, 2, 8 + ret float 1.0 +} + +define float @f7() { +; CHECK-LABEL: f7: +; CHECK: vgmf %v0, 1, 1 + ret float 2.0 +} + +define float @f8() { +; CHECK-LABEL: f8: +; CHECK: vgmf %v0, 0, 1 + ret float -2.0 +} + +define float @f9() { +; CHECK-LABEL: f9: +; CHECK: vgmf %v0, 2, 7 + ret float 0.5 +} + +define float @f10() { +; CHECK-LABEL: f10: +; CHECK: vgmf %v0, 2, 6 + ret float 0.125 +} Index: test/CodeGen/SystemZ/subregliveness-02.ll =================================================================== --- test/CodeGen/SystemZ/subregliveness-02.ll +++ test/CodeGen/SystemZ/subregliveness-02.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -systemz-subreg-liveness < %s | FileCheck %s ; Check for successful compilation. -; CHECK: meeb %f0, 0(%r1) +; CHECK: meebr %f1, %f0 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" target triple = "s390x-ibm-linux" Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -71,7 +71,7 @@ ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo ; CHECK: cgijlh %r9, 0, -; CHECK: ceb %f0, +; CHECK: cebr %f0, ; CHECK: jnh ; Access part of the error object and save it to error_ref ; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) @@ -157,7 +157,7 @@ ; CHECK: lghi %r2, 16 ; CHECK: brasl %r14, malloc ; CHECK: mvi 8(%r2), 1 -; CHECK: ceb %f8, +; CHECK: cebr %f8, ; CHECK: jnh ; CHECK: lgr %r9, %r2 ; CHECK: br %r14 Index: test/CodeGen/SystemZ/tdc-03.ll =================================================================== --- test/CodeGen/SystemZ/tdc-03.ll +++ test/CodeGen/SystemZ/tdc-03.ll @@ -34,7 +34,7 @@ define i32 @f3(float %x) { ; CHECK-LABEL: f3 ; CHECK-NOT: tceb -; CHECK: ceb %f0, 0(%r{{[0-9]+}}) +; CHECK: cebr %f0, %f{{[0-9]+}} ; CHECK-NOT: tceb %res = fcmp ult float %x, 0x7ff0000000000000 %xres = zext i1 %res to i32 @@ -55,7 +55,7 @@ define i32 @f5(float %x) { ; CHECK-LABEL: f5 ; CHECK-NOT: tceb -; CHECK: ceb %f0, 0(%r{{[0-9]+}}) +; CHECK: cebr %f0, %f{{[0-9]+}} ; CHECK-NOT: tceb %res = fcmp ult float %x, 0x3810000000000000 %xres = zext i1 %res to i32 @@ -77,7 +77,7 @@ ; CHECK-LABEL: f7 ; CHECK-NOT: tceb ; CHECK: lpdfr [[REG:%f[0-9]+]], %f0 -; CHECK: ceb [[REG]], 0(%r{{[0-9]+}}) +; CHECK: cebr [[REG]], %f{{[0-9]+}} ; CHECK-NOT: tceb %y = call float @llvm.fabs.f32(float %x) %res = fcmp ugt float %y, 0x3810000000000000 @@ -90,7 +90,7 @@ ; CHECK-LABEL: f8 ; CHECK-NOT: tceb ; CHECK: lpdfr [[REG:%f[0-9]+]], %f0 -; CHECK: ceb [[REG]], 0(%r{{[0-9]+}}) +; CHECK: cebr [[REG]], %f{{[0-9]+}} ; CHECK-NOT: tceb %y = call float @llvm.fabs.f32(float %x) %res = fcmp ult float %y, 0x3ff0000000000000