Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -414,7 +414,8 @@ MachineInstr **KilledDef = nullptr) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; - bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const; + bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, + bool PostRA) const; /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2319,7 +2319,7 @@ Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINMo || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; - if (!instrHasImmForm(MI, III) && !ConvertibleImmForm) + if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. @@ -2421,7 +2421,7 @@ *KilledDef = DefMI; ImmInstrInfo III; - bool HasImmForm = instrHasImmForm(MI, III); + bool HasImmForm = instrHasImmForm(MI, III, PostRA); // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. @@ -2644,8 +2644,12 @@ return false; } +static bool isVRReg(unsigned Reg) { + return PPC::VRRCRegClass.contains(Reg); +} + bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, - ImmInstrInfo &III) const { + ImmInstrInfo &III, bool PostRA) const { unsigned Opc = MI.getOpcode(); // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception @@ -2946,13 +2950,20 @@ case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break; } break; - // Power9 only. + // Power9 and up only. For some of these, the X-Form version has access to all + // 64 VSR's whereas the D-Form only has access to the VR's. We replace those + // with pseudo-ops pre-ra and for post-ra, we check that the register loaded + // into or stored from is one of the VR registers. case PPC::LXVX: case PPC::LXSSPX: case PPC::LXSDX: case PPC::STXVX: case PPC::STXSSPX: case PPC::STXSDX: + case PPC::XFLOADf32: + case PPC::XFLOADf64: + case PPC::XFSTOREf32: + case PPC::XFSTOREf64: if (!Subtarget.hasP9Vector()) return false; III.SignedImm = true; @@ -2962,6 +2973,7 @@ III.IsSummingOperands = true; III.ImmOpNo = 1; III.OpNoForForwarding = 2; + III.ImmMustBeMultipleOf = 4; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LXVX: @@ -2969,24 +2981,56 @@ III.ImmMustBeMultipleOf = 16; break; case PPC::LXSSPX: - III.ImmOpcode = PPC::LXSSP; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVRReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::LXSSP; + else + III.ImmOpcode = PPC::LFS; + break; + } + LLVM_FALLTHROUGH; + case PPC::XFLOADf32: + III.ImmOpcode = PPC::DFLOADf32; break; case PPC::LXSDX: - III.ImmOpcode = PPC::LXSD; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVRReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::LXSD; + else + III.ImmOpcode = PPC::LFD; + break; + } + LLVM_FALLTHROUGH; + case PPC::XFLOADf64: + III.ImmOpcode = PPC::DFLOADf64; break; case PPC::STXVX: III.ImmOpcode = PPC::STXV; III.ImmMustBeMultipleOf = 16; break; case PPC::STXSSPX: - III.ImmOpcode = PPC::STXSSP; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVRReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::STXSSP; + else + III.ImmOpcode = PPC::STFS; + break; + } + LLVM_FALLTHROUGH; + case PPC::XFSTOREf32: + III.ImmOpcode = PPC::DFSTOREf32; break; case PPC::STXSDX: - III.ImmOpcode = PPC::STXSD; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVRReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::STXSD; + else + III.ImmOpcode = PPC::STFD; + break; + } + LLVM_FALLTHROUGH; + case PPC::XFSTOREf64: + III.ImmOpcode = PPC::DFSTOREf64; break; } break; Index: test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir =================================================================== --- test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -3265,15 +3265,15 @@ %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 100 %7 = LXSDX %0, killed %6, implicit $rm :: (load 8 from %ir.arrayidx, !tbaa !12) - ; CHECK: LXSD 100, %0 - ; CHECK-LATE: lxsd 0, 100(3) + ; CHECK: DFLOADf64 100, %0 + ; CHECK-LATE: lfd 0, 100(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -120 %12 = LXSDX %0, killed %11, implicit $rm :: (load 8 from %ir.arrayidx3, !tbaa !12) - ; CHECK: LXSD -120, %0 - ; CHECK-LATE: lxsd 1, -120(3) + ; CHECK: DFLOADf64 -120, %0 + ; CHECK-LATE: lfd 1, -120(3) %13 = XSADDDP killed %7, killed %12, implicit $rm $f1 = COPY %13 BLR8 implicit $lr8, implicit $rm, implicit $f1 @@ -3338,15 +3338,15 @@ %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 96 %7 = LXSSPX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !14) - ; CHECK: LXSSP 96, %0 - ; CHECK-LATE: lxssp 0, 96(3) + ; CHECK: DFLOADf32 96, %0 + ; CHECK-LATE: lfs 0, 96(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -92 %12 = LXSSPX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !14) - ; CHECK: LXSSP -92, %0 - ; CHECK-LATE: lxssp 1, -92(3) + ; CHECK: DFLOADf32 -92, %0 + ; CHECK-LATE: lfs 1, -92(3) %13 = XSADDSP killed %7, killed %12 $f1 = COPY %13 BLR8 implicit $lr8, implicit $rm, implicit $f1 @@ -6031,8 +6031,8 @@ %0 = COPY $x3 %3 = LI8 444 STXSSPX %1, %0, killed %3 :: (store 4 into %ir.arrayidx, !tbaa !14) - ; CHECK: STXSSP %1, 444, %0 - ; CHECK-LATE: stxssp 1, 444(3) + ; CHECK: DFSTOREf32 %1, 444, %0 + ; CHECK-LATE: stfs 1, 444(3) BLR8 implicit $lr8, implicit $rm ... @@ -6083,8 +6083,8 @@ %0 = COPY $x3 %3 = LI8 4 STXSDX %1, %0, killed %3, implicit $rm :: (store 8 into %ir.arrayidx, !tbaa !12) - ; CHECK: STXSD %1, 4, %0 - ; CHECK-LATE: stxsd 1, 4(3) + ; CHECK: DFSTOREf64 %1, 4, %0 + ; CHECK-LATE: stfd 1, 4(3) BLR8 implicit $lr8, implicit $rm ...