Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -375,6 +375,45 @@ MI.getOperand(2).setImm(NewElem); } } + + // Splat is fed by a SWAP: XXPERMDI %VA, %VA, 2 + if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) { + unsigned SwapRes = DefMI->getOperand(0).getReg(); + unsigned SwapOp1 = DefMI->getOperand(1).getReg(); + unsigned SwapOp2 = DefMI->getOperand(2).getReg(); + unsigned SwapImm = DefMI->getOperand(3).getImm(); + unsigned SplatImm = MI.getOperand(1).getImm(); + if (SwapOp1 == SwapOp2 && SwapImm == 2) { + unsigned NewElem = 0; + // Compute the new ID to use for the splat. + if (MI.getOpcode() == PPC::VSPLTB) { + NewElem = (SplatImm + 8) & 0xF; + } + else if (MI.getOpcode() == PPC::VSPLTH) { + NewElem = (SplatImm + 4) & 0x7; + } + else if (MI.getOpcode() == PPC::XXSPLTW) { + NewElem = (SplatImm + 2) & 0x3; + } + else { + // Should never get here! + llvm_unreachable("Unknown splat opcode."); + } + + if (MRI->hasOneNonDBGUse(SwapRes)) { + DEBUG(dbgs() << "Removing redundant swap: "); + DEBUG(DefMI->dump()); + ToErase = DefMI; + } + Simplified = true; + DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << + " to " << NewElem << " in instruction: "); + DEBUG(MI.dump()); + MI.getOperand(1).setImm(NewElem); + MI.getOperand(2).setReg(SwapOp1); + } + } + break; } case PPC::XVCVDPSP: { Index: test/CodeGen/PowerPC/ppc64-peephole-swap.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/ppc64-peephole-swap.ll @@ -0,0 +1,113 @@ +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8 + +; Function Attrs: norecurse nounwind readnone +define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr { +entry: + %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0 + %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer + %add = add <16 x i8> %splat.splat.i, %v + ret <16 x i8> %add +; CHECK-LABEL: splat_8_plus +; CHECK-NOT: xxswapd +; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 +; CHECK: blr +; CHECK-PWR8-LABEL: splat_8_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr { +entry: + %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0 + %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer + %add = add <16 x i8> %splat.splat.i, %v + ret <16 x i8> %add +; CHECK-LABEL: splat_u8_plus +; CHECK-NOT: xxswapd +; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 +; CHECK: blr +; CHECK-PWR8-LABEL: splat_u8_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr { +entry: + %0 = shl i16 %c, 8 + %conv.i = ashr exact i16 %0, 8 + %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0 + %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer + %add = add <8 x i16> %splat.splat.i, %v + ret <8 x i16> %add +; CHECK-LABEL: splat_16_plus +; CHECK-NOT: xxswapd +; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 +; CHECK: blr +; CHECK-PWR8-LABEL: splat_16_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr { +entry: + %0 = shl i16 %c, 8 + %conv.i = ashr exact i16 %0, 8 + %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0 + %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer + %add = add <8 x i16> %splat.splat.i, %v + ret <8 x i16> %add +; CHECK-LABEL: splat_u16_plus +; CHECK-NOT: xxswapd +; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 +; CHECK: blr +; CHECK-PWR8-LABEL: splat_u16_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr { +entry: + %sext = shl i32 %c, 24 + %conv.i = ashr exact i32 %sext, 24 + %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 + %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer + %add = add <4 x i32> %splat.splat.i, %v + ret <4 x i32> %add +; CHECK-LABEL: splat_32_plus +; CHECK-NOT: xxswapd +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr +; CHECK-PWR8-LABEL: splat_32_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr { +entry: + %sext = shl i32 %c, 24 + %conv.i = ashr exact i32 %sext, 24 + %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 + %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer + %add = add <4 x i32> %splat.splat.i, %v + ret <4 x i32> %add +; CHECK-LABEL: splat_u32_plus +; CHECK-NOT: xxswapd +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr +; CHECK-PWR8-LABEL: splat_u32_plus +; CHECK-PWR8-NOT: xxswapd +; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +; CHECK-PWR8: blr +} +