diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" @@ -1983,6 +1984,47 @@ replaceInstUsesWith(CI, Res); return eraseInstFromFunction(CI); } + LLVM_FALLTHROUGH; + } + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd && + IID != Intrinsic::vector_reduce_fmul) || + II->hasAllowReassoc(); + const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || + IID == Intrinsic::vector_reduce_fmul) + ? 1 + : 0; + Value *Arg = II->getArgOperand(ArgIdx); + Value *V; + ArrayRef Mask; + if (auto *FVTy = dyn_cast(Arg->getType())) + if (CanBeReassociated && + match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) && + cast(Arg)->isSingleSource()) { + int Sz = Mask.size(); + SmallBitVector UsedIndeces(Sz); + for (int Idx : Mask) { + if (Idx == UndefMaskElem || UsedIndeces.test(Idx)) + break; + UsedIndeces.set(Idx); + } + // Can remove shuffle iff just shuffled elements, no repeats, undefs, or + // other changes. + if (UsedIndeces.all()) { + replaceUse(II->getOperandUse(ArgIdx), V); + return nullptr; + } + } break; } default: { diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll --- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll +++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll @@ -13,8 +13,7 @@ define i32 @reduce_or(<4 x i32> %x) { ; CHECK-LABEL: @reduce_or( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -24,8 +23,7 @@ define i32 @reduce_and(<4 x i32> %x) { ; CHECK-LABEL: @reduce_and( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -35,8 +33,7 @@ define i32 @reduce_xor(<4 x i32> %x) { ; CHECK-LABEL: @reduce_xor( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -46,8 +43,7 @@ define i32 @reduce_umax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -57,8 +53,7 @@ define i32 @reduce_umin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -68,8 +63,7 @@ define i32 @reduce_smax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -79,8 +73,7 @@ define i32 @reduce_smin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -90,8 +83,7 @@ define float @reduce_fmax(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> @@ -101,8 +93,7 @@ define float @reduce_fmin(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> @@ -112,8 +103,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fadd( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> @@ -123,8 +113,7 @@ define float @reduce_fmul(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fmul( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32>