diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -380,13 +380,23 @@ // If this is a splat or a swap fed by another splat, we // can replace it with a copy. if (DefOpc == PPC::XXPERMDI) { - unsigned FeedImmed = DefMI->getOperand(3).getImm(); - unsigned FeedReg1 = - TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - unsigned FeedReg2 = - TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); + unsigned DefReg1 = DefMI->getOperand(1).getReg(); + unsigned DefReg2 = DefMI->getOperand(2).getReg(); + unsigned DefImmed = DefMI->getOperand(3).getImm(); + + // If the two inputs are not the same register, check to see if + // they originate from the same virtual register after only + // copy-like instructions. + if (DefReg1 != DefReg2) { + unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI); + unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI); + + if (FeedReg1 != FeedReg2 || + Register::isPhysicalRegister(FeedReg1)) + break; + } - if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { + if (DefImmed == 0 || DefImmed == 3) { LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " "to splat/copy: "); LLVM_DEBUG(MI.dump()); @@ -400,19 +410,18 @@ // If this is a splat fed by a swap, we can simplify modify // the splat to splat the other value from the swap's input // parameter. - else if ((Immed == 0 || Immed == 3) - && FeedImmed == 2 && FeedReg1 == FeedReg2) { + else if ((Immed == 0 || Immed == 3) && DefImmed == 2) { LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: "); LLVM_DEBUG(MI.dump()); - MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); - MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); + MI.getOperand(1).setReg(DefReg1); + MI.getOperand(2).setReg(DefReg2); MI.getOperand(3).setImm(3 - Immed); Simplified = true; } // If this is a swap fed by a swap, we can replace it // with a copy from the first swap's input. - else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { + else if (Immed == 2 && DefImmed == 2) { LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: "); LLVM_DEBUG(MI.dump()); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll @@ -0,0 +1,44 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \ +; RUN: | FileCheck --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9BE %s + +define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind { + %added = fadd <2 x double> %x, %y + %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone + %res1 = extractelement <2 x double> %call, i32 0 + %res2 = extractelement <2 x double> %call, i32 1 + %ret = fsub double %res1, %res2 + ret double %ret + +; CHECK-LE-LABEL: splat_swap: +; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-BE-LABEL: splat_swap: +; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9LE-LABEL: splat_swap: +; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9BE-LABEL: splat_swap: +; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +} + +declare <2 x double> @llvm.rint.v2f64(<2 x double>) + diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -468,12 +468,13 @@ ; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -509,10 +510,11 @@ ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1715,12 +1717,13 @@ ; PC64LE-NEXT: lfd 1, .LCPI33_3@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1756,10 +1759,11 @@ ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2113,11 +2117,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI38_2@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2150,9 +2155,10 @@ ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2477,11 +2483,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI43_2@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2511,9 +2518,10 @@ ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2828,11 +2836,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI48_2@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2862,9 +2871,10 @@ ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3179,11 +3189,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI53_2@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3213,9 +3224,10 @@ ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3530,11 +3542,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI58_2@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3564,9 +3577,10 @@ ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3881,11 +3895,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI63_2@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3915,9 +3930,10 @@ ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4232,11 +4248,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI68_2@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4266,9 +4283,10 @@ ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4583,11 +4601,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI73_2@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4617,9 +4636,10 @@ ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4934,11 +4954,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3) ; PC64LE-NEXT: bl rint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4968,9 +4989,10 @@ ; PC64LE9-NEXT: bl rint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5285,11 +5307,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5319,9 +5342,10 @@ ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5672,11 +5696,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5712,9 +5737,10 @@ ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -6084,11 +6110,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -6124,9 +6151,10 @@ ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0