diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -380,6 +380,50 @@ } } +// Check if this instruction is a doubleword swap. +// Looking for XXPERMDI Result, A, A, 2 +static bool isDoublewordSwap(MachineInstr *PermInstr) { + if (PermInstr->getOpcode() != PPC::XXPERMDI) + return false; + + MachineOperand &PermInput1 = PermInstr->getOperand(1); + MachineOperand &PermInput2 = PermInstr->getOperand(2); + unsigned PermType = PermInstr->getOperand(3).getImm(); + return PermInput1.getReg() == PermInput2.getReg() && PermType == 2; +} + +// Can we prove that this machine operand is a symmetrical vector. +// Will return true only if it is known that: +// doubleword[0] == doubleword[1] for Operand. +static bool isVectorSymmetrical(MachineOperand &VecInput, + MachineRegisterInfo *MRI) { + if (!VecInput.isReg()) + return false; + + MachineInstr *DefVecReg = getVRegDefOrNull(&VecInput, MRI); + + if (!DefVecReg) + return false; + + if (!DefVecReg->getDesc().isCommutable()) + return false; + + MachineOperand &OpInput1 = DefVecReg->getOperand(1); + MachineOperand &OpInput2 = DefVecReg->getOperand(2); + MachineInstr *Def1 = getVRegDefOrNull(&OpInput1, MRI); + MachineInstr *Def2 = getVRegDefOrNull(&OpInput2, MRI); + + if (Def1 && isDoublewordSwap(Def1) && + Def1->getOperand(1).getReg() == OpInput2.getReg()) + return true; + + if (Def2 && isDoublewordSwap(Def2) && + Def2->getOperand(2).getReg() == OpInput1.getReg()) + return true; + + return false; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode() { bool Simplified = false; @@ -1064,6 +1108,19 @@ Simplified = true; break; } + case PPC::MFVSRLD: + // It is more efficient to use MFVSRD instead of MFVSRLD in cases where + // it is known that the two doublewords of the vector are identical. + MachineOperand &VecInput = MI.getOperand(1); + if (!isVectorSymmetrical(VecInput, MRI)) + break; + + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::MFVSRD), + MI.getOperand(0).getReg()) + .addReg(VecInput.getReg(), 0, PPC::sub_64); + ToErase = &MI; + Simplified = true; + break; } } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-mfvsrld-removal.ll b/llvm/test/CodeGen/PowerPC/ppc64-mfvsrld-removal.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-mfvsrld-removal.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE + +; Function Attrs: nounwind +define dso_local i64 @getVecSplit(i32 noundef signext %a, <2 x i64> noundef %Va, <2 x i64> noundef %Vb) local_unnamed_addr #0 { +; PWR10LE-LABEL: getVecSplit: +; PWR10LE: # %bb.0: # %entry +; PWR10LE-NEXT: xxswapd v4, v2 +; PWR10LE-NEXT: cmplwi r3, 0 +; PWR10LE-NEXT: vaddudm v2, v4, v2 +; PWR10LE-NEXT: beq cr0, .LBB0_2 +; PWR10LE-NEXT: # %bb.1: # %if.then +; PWR10LE-NEXT: mfvsrd r3, v2 +; PWR10LE-NEXT: blr +; PWR10LE-NEXT: .LBB0_2: # %if.else +; PWR10LE-NEXT: mflr r0 +; PWR10LE-NEXT: std r0, 16(r1) +; PWR10LE-NEXT: stdu r1, -32(r1) +; PWR10LE-NEXT: vaddudm v2, v2, v3 +; PWR10LE-NEXT: bl callee +; PWR10LE-NEXT: nop +; PWR10LE-NEXT: addi r3, r3, 42 +; PWR10LE-NEXT: addi r1, r1, 32 +; PWR10LE-NEXT: ld r0, 16(r1) +; PWR10LE-NEXT: mtlr r0 +; PWR10LE-NEXT: blr +; +; PWR10BE-LABEL: getVecSplit: +; PWR10BE: # %bb.0: # %entry +; PWR10BE-NEXT: xxswapd v4, v2 +; PWR10BE-NEXT: cmplwi r3, 0 +; PWR10BE-NEXT: vaddudm v2, v4, v2 +; PWR10BE-NEXT: beq cr0, .LBB0_2 +; PWR10BE-NEXT: # %bb.1: # %if.then +; PWR10BE-NEXT: mfvsrd r3, v2 +; PWR10BE-NEXT: blr +; PWR10BE-NEXT: .LBB0_2: # %if.else +; PWR10BE-NEXT: mflr r0 +; PWR10BE-NEXT: std r0, 16(r1) +; PWR10BE-NEXT: stdu r1, -112(r1) +; PWR10BE-NEXT: vaddudm v2, v2, v3 +; PWR10BE-NEXT: bl callee +; PWR10BE-NEXT: nop +; PWR10BE-NEXT: addi r3, r3, 42 +; PWR10BE-NEXT: addi r1, r1, 112 +; PWR10BE-NEXT: ld r0, 16(r1) +; PWR10BE-NEXT: mtlr r0 +; PWR10BE-NEXT: blr +entry: + %vecins2 = shufflevector <2 x i64> %Va, <2 x i64> undef, <2 x i32> + %tobool.not = icmp eq i32 %a, 0 + %add4 = add <2 x i64> %vecins2, %Va + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %entry + %vecext3 = extractelement <2 x i64> %add4, i64 0 + br label %cleanup + +if.else: ; preds = %entry + %add5 = add <2 x i64> %add4, %Vb + %call = tail call i64 @callee(<2 x i64> noundef %add5) + %add6 = add nsw i64 %call, 42 + br label %cleanup + +cleanup: ; preds = %if.else, %if.then + %retval.0 = phi i64 [ %vecext3, %if.then ], [ %add6, %if.else ] + ret i64 %retval.0 +} + +declare i64 @callee(<2 x i64> noundef) local_unnamed_addr #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -122,7 +122,7 @@ ; CHECK-P10-NEXT: xvcmpeqdp v2, vs1, vs0 ; CHECK-P10-NEXT: xxswapd v3, v2 ; CHECK-P10-NEXT: xxland vs0, v2, v3 -; CHECK-P10-NEXT: mfvsrld r3, vs0 +; CHECK-P10-NEXT: mffprd r3, f0 ; CHECK-P10-NEXT: andi. r3, r3, 1 ; CHECK-P10-NEXT: bc 4, gt, .LBB5_2 ; CHECK-P10-NEXT: # %bb.1: # %bb8 diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1245,7 +1245,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -1259,7 +1259,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -1279,7 +1279,7 @@ ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -1295,7 +1295,7 @@ ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -1318,7 +1318,7 @@ ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -1338,7 +1338,7 @@ ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -1367,7 +1367,7 @@ ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -1395,7 +1395,7 @@ ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: @@ -1468,7 +1468,7 @@ ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i8tov16i64_sign: @@ -1560,7 +1560,7 @@ ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i8tov16i64_sign: @@ -1667,7 +1667,7 @@ ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i8tov16i64_zero: @@ -1745,7 +1745,7 @@ ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i8tov16i64_zero: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-and.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-and.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-and.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-and.ll @@ -212,7 +212,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: xxland vs0, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -226,7 +226,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: xxland vs0, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -246,7 +246,7 @@ ; PWR9LE-NEXT: xxland vs0, v2, v3 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxland vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -285,7 +285,7 @@ ; PWR9LE-NEXT: xxland vs0, vs1, vs0 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxland vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -334,7 +334,7 @@ ; PWR9LE-NEXT: xxland vs0, vs0, vs2 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxland vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-or.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-or.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-or.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-or.ll @@ -212,7 +212,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: xxlor vs0, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -226,7 +226,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: xxlor vs0, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -246,7 +246,7 @@ ; PWR9LE-NEXT: xxlor vs0, v2, v3 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -262,7 +262,7 @@ ; PWR10LE-NEXT: xxlor vs0, v2, v3 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -285,7 +285,7 @@ ; PWR9LE-NEXT: xxlor vs0, vs1, vs0 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -305,7 +305,7 @@ ; PWR10LE-NEXT: xxlor vs0, vs1, vs0 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -334,7 +334,7 @@ ; PWR9LE-NEXT: xxlor vs0, vs0, vs2 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -362,7 +362,7 @@ ; PWR10LE-NEXT: xxlor vs0, vs0, vs2 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-smax.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-smax.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-smax.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-smax.ll @@ -616,7 +616,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -630,7 +630,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -650,7 +650,7 @@ ; PWR9LE-NEXT: vmaxsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -666,7 +666,7 @@ ; PWR10LE-NEXT: vmaxsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -689,7 +689,7 @@ ; PWR9LE-NEXT: vmaxsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -709,7 +709,7 @@ ; PWR10LE-NEXT: vmaxsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -738,7 +738,7 @@ ; PWR9LE-NEXT: vmaxsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -766,7 +766,7 @@ ; PWR10LE-NEXT: vmaxsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-smin.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-smin.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-smin.ll @@ -616,7 +616,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -630,7 +630,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -650,7 +650,7 @@ ; PWR9LE-NEXT: vminsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -666,7 +666,7 @@ ; PWR10LE-NEXT: vminsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -689,7 +689,7 @@ ; PWR9LE-NEXT: vminsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -709,7 +709,7 @@ ; PWR10LE-NEXT: vminsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -738,7 +738,7 @@ ; PWR9LE-NEXT: vminsd v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminsd v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -766,7 +766,7 @@ ; PWR10LE-NEXT: vminsd v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminsd v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-umax.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-umax.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-umax.ll @@ -616,7 +616,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -630,7 +630,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -650,7 +650,7 @@ ; PWR9LE-NEXT: vmaxud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -666,7 +666,7 @@ ; PWR10LE-NEXT: vmaxud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -689,7 +689,7 @@ ; PWR9LE-NEXT: vmaxud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -709,7 +709,7 @@ ; PWR10LE-NEXT: vmaxud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -738,7 +738,7 @@ ; PWR9LE-NEXT: vmaxud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vmaxud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -766,7 +766,7 @@ ; PWR10LE-NEXT: vmaxud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vmaxud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll @@ -616,7 +616,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -630,7 +630,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -650,7 +650,7 @@ ; PWR9LE-NEXT: vminud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -666,7 +666,7 @@ ; PWR10LE-NEXT: vminud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -689,7 +689,7 @@ ; PWR9LE-NEXT: vminud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -709,7 +709,7 @@ ; PWR10LE-NEXT: vminud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -738,7 +738,7 @@ ; PWR9LE-NEXT: vminud v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vminud v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -766,7 +766,7 @@ ; PWR10LE-NEXT: vminud v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vminud v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-xor.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-xor.ll --- a/llvm/test/CodeGen/PowerPC/vector-reduce-xor.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-xor.ll @@ -212,7 +212,7 @@ ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: xxlxor vs0, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -226,7 +226,7 @@ ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: xxlxor vs0, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -246,7 +246,7 @@ ; PWR9LE-NEXT: xxlxor vs0, v2, v3 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -262,7 +262,7 @@ ; PWR10LE-NEXT: xxlxor vs0, v2, v3 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlxor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -285,7 +285,7 @@ ; PWR9LE-NEXT: xxlxor vs0, vs1, vs0 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -305,7 +305,7 @@ ; PWR10LE-NEXT: xxlxor vs0, vs1, vs0 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlxor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -334,7 +334,7 @@ ; PWR9LE-NEXT: xxlxor vs0, vs0, vs2 ; PWR9LE-NEXT: xxswapd v2, vs0 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2 -; PWR9LE-NEXT: mfvsrld r3, vs0 +; PWR9LE-NEXT: mffprd r3, f0 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -362,7 +362,7 @@ ; PWR10LE-NEXT: xxlxor vs0, vs0, vs2 ; PWR10LE-NEXT: xxswapd v2, vs0 ; PWR10LE-NEXT: xxlxor vs0, vs0, v2 -; PWR10LE-NEXT: mfvsrld r3, vs0 +; PWR10LE-NEXT: mffprd r3, f0 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: