diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -258,6 +258,51 @@ TOCSaves[MI] = Keep; } +// This function checks if registers ultimately feeding a splat or swap are both +// the same register and are defined by the same instrution. When optimizing a +// splat/swap to a splat/copy, using TargetRegisterInfo::lookThruCopyLike and +// comparing the register numbers to determine the feed registers for the splat +// can lead to a misoptimization if both operands feed from the same physical +// register and the register has been clobbered since feeding one of the +// operands. +static bool areFeedRegsSame(const MachineInstr *DefMI, + const MachineRegisterInfo *MRI) { + assert(DefMI->getOpcode() == PPC::XXPERMDI && + "areFeedRegsSame called with invalid opcode"); + + const MachineInstr *MIs[2]; + unsigned FeedRegs[2]; + + for (int I = 0; I < 2; ++I) { + const MachineInstr *MI; + + unsigned SrcReg = DefMI->getOperand(I+1).getReg(); + + // logic from TargetRegisterInfo::lookThruCopyLike: + do { + MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + break; + + unsigned CopySrcReg; + + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "Bad opcode for areFeedRegsSame"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + SrcReg = CopySrcReg; + } while (Register::isVirtualRegister(SrcReg)); + + MIs[I] = MI; + FeedRegs[I] = SrcReg; + } + + return FeedRegs[0] == FeedRegs[1] && MIs[0] == MIs[1]; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -381,12 +426,9 @@ // can replace it with a copy. if (DefOpc == PPC::XXPERMDI) { unsigned FeedImmed = DefMI->getOperand(3).getImm(); - unsigned FeedReg1 = - TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - unsigned FeedReg2 = - TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); + bool FeedRegsSame = areFeedRegsSame(DefMI, MRI); - if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { + if ((FeedImmed == 0 || FeedImmed == 3) && FeedRegsSame) { LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " "to splat/copy: "); LLVM_DEBUG(MI.dump()); @@ -401,7 +443,7 @@ // the splat to splat the other value from the swap's input // parameter. else if ((Immed == 0 || Immed == 3) - && FeedImmed == 2 && FeedReg1 == FeedReg2) { + && FeedImmed == 2 && FeedRegsSame) { LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: "); LLVM_DEBUG(MI.dump()); MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); @@ -412,7 +454,7 @@ // If this is a swap fed by a swap, we can replace it // with a copy from the first swap's input. - else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { + else if (Immed == 2 && FeedImmed == 2 && FeedRegsSame) { LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: "); LLVM_DEBUG(MI.dump()); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll @@ -0,0 +1,44 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \ +; RUN: | FileCheck --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9BE %s + +define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind { + %added = fadd <2 x double> %x, %y + %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone + %res1 = extractelement <2 x double> %call, i32 0 + %res2 = extractelement <2 x double> %call, i32 1 + %ret = fsub double %res1, %res2 + ret double %ret + +; CHECK-LE-LABEL: splat_swap: +; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-BE-LABEL: splat_swap: +; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9LE-LABEL: splat_swap: +; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9BE-LABEL: splat_swap: +; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +} + +declare <2 x double> @llvm.rint.v2f64(<2 x double>) + diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -468,12 +468,13 @@ ; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -509,10 +510,11 @@ ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1715,12 +1717,13 @@ ; PC64LE-NEXT: lfd 1, .LCPI33_3@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1756,10 +1759,11 @@ ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2113,11 +2117,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI38_2@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2150,9 +2155,10 @@ ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2477,11 +2483,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI43_2@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2511,9 +2518,10 @@ ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2828,11 +2836,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI48_2@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2862,9 +2871,10 @@ ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3179,11 +3189,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI53_2@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3213,9 +3224,10 @@ ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3530,11 +3542,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI58_2@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3564,9 +3577,10 @@ ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3881,11 +3895,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI63_2@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3915,9 +3930,10 @@ ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4232,11 +4248,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI68_2@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4266,9 +4283,10 @@ ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4583,11 +4601,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI73_2@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4617,9 +4636,10 @@ ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4934,11 +4954,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3) ; PC64LE-NEXT: bl rint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4968,9 +4989,10 @@ ; PC64LE9-NEXT: bl rint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5285,11 +5307,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5319,9 +5342,10 @@ ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5672,11 +5696,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5712,9 +5737,10 @@ ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -6084,11 +6110,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -6124,9 +6151,10 @@ ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0