diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -386,6 +386,10 @@ unsigned FeedReg2 = TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); + if (!Register::isVirtualRegister(FeedReg1) || + !Register::isVirtualRegister(FeedReg2)) + break; + if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " "to splat/copy: "); diff --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll --- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll +++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll @@ -396,7 +396,9 @@ define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { ; CHECK-P8-LABEL: swap_store00: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvd2x v2, 0, r7 +; CHECK-P8-NEXT: xxswapd v2, v2 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store00: @@ -423,7 +425,9 @@ define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { ; CHECK-P8-LABEL: swap_store01: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvd2x v3, 0, r7 +; CHECK-P8-NEXT: xxswapd v2, v3 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store01: @@ -666,7 +670,9 @@ define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) { ; CHECK-P8-LABEL: swap_store40: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvd2x v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store40: @@ -693,7 +699,9 @@ define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) { ; CHECK-P8-LABEL: swap_store41: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvd2x v3, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store41: diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll @@ -0,0 +1,44 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \ +; RUN: | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \ +; RUN: | FileCheck --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9LE %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9BE %s + +define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind { + %added = fadd <2 x double> %x, %y + %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone + %res1 = extractelement <2 x double> %call, i32 0 + %res2 = extractelement <2 x double> %call, i32 1 + %ret = fsub double %res1, %res2 + ret double %ret + +; CHECK-LE-LABEL: splat_swap: +; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-BE-LABEL: splat_swap: +; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]] +; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9LE-LABEL: splat_swap: +; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]] +; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +; +; CHECK-P9BE-LABEL: splat_swap: +; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]] +; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]] +; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]] +; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}} +} + +declare <2 x double> @llvm.rint.v2f64(<2 x double>) + diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -468,12 +468,13 @@ ; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -509,10 +510,11 @@ ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1715,12 +1717,13 @@ ; PC64LE-NEXT: lfd 1, .LCPI33_3@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1756,10 +1759,11 @@ ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2113,11 +2117,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI38_2@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2150,9 +2155,10 @@ ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2477,11 +2483,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI43_2@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2511,9 +2518,10 @@ ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2828,11 +2836,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI48_2@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2862,9 +2871,10 @@ ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3179,11 +3189,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI53_2@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3213,9 +3224,10 @@ ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3530,11 +3542,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI58_2@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3564,9 +3577,10 @@ ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3881,11 +3895,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI63_2@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3915,9 +3930,10 @@ ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4232,11 +4248,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI68_2@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4266,9 +4283,10 @@ ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4583,11 +4601,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI73_2@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4617,9 +4636,10 @@ ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4934,11 +4954,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3) ; PC64LE-NEXT: bl rint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4968,9 +4989,10 @@ ; PC64LE9-NEXT: bl rint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5285,11 +5307,12 @@ ; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5319,9 +5342,10 @@ ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5672,11 +5696,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5712,9 +5737,10 @@ ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -6084,11 +6110,12 @@ ; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 ; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -6124,9 +6151,10 @@ ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0