diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2060,6 +2060,46 @@ } break; } + case Intrinsic::experimental_vector_reverse: { + Value *BO0, *BO1, *X, *Y; + Value *Vec = II->getArgOperand(0); + if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) { + auto *OldBinOp = cast(Vec); + if (match(BO0, m_Intrinsic( + m_Value(X)))) { + // rev(binop rev(X), rev(Y)) --> binop X, Y + if (match(BO1, m_Intrinsic( + m_Value(Y)))) + return replaceInstUsesWith(CI, + BinaryOperator::CreateWithCopiedFlags( + OldBinOp->getOpcode(), X, Y, OldBinOp, + OldBinOp->getName(), II)); + // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat + if (isSplatValue(BO1)) + return replaceInstUsesWith(CI, + BinaryOperator::CreateWithCopiedFlags( + OldBinOp->getOpcode(), X, BO1, + OldBinOp, OldBinOp->getName(), II)); + } + // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y + if (match(BO1, m_Intrinsic( + m_Value(Y))) && + isSplatValue(BO0)) + return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags( + OldBinOp->getOpcode(), BO0, Y, + OldBinOp, OldBinOp->getName(), II)); + } + // rev(unop rev(X)) --> unop X + if (match(Vec, m_OneUse(m_UnOp( + m_Intrinsic( + m_Value(X)))))) { + auto *OldUnOp = cast(Vec); + auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( + OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II); + return replaceInstUsesWith(CI, NewUnOp); + } + break; + } case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_and: { // Canonicalize logical or/and reductions: diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Test that the reverse is eliminated if the output and all the inputs +; of the instruction are calls to reverse. +define @binop_reverse( %a, %b) { +; CHECK-LABEL: @binop_reverse( +; CHECK-NEXT: [[ADD1:%.*]] = add [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[ADD1]] +; + %reva = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %revb = tail call @llvm.experimental.vector.reverse.nxv4i32( %b) + %add = add %reva, %revb + %revadd = tail call @llvm.experimental.vector.reverse.nxv4i32( %add) + ret %revadd +} + +define @binop_reverse_splat_RHS( %a, i32 %b) { +; CHECK-LABEL: @binop_reverse_splat_RHS( +; CHECK-NEXT: [[SPLAT_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i32 0 +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector [[SPLAT_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[UDIV1:%.*]] = udiv [[A:%.*]], [[SPLAT]] +; CHECK-NEXT: ret [[UDIV1]] +; + %reva = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %splat_insert = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %udiv = udiv %reva, %splat + %revadd = tail call @llvm.experimental.vector.reverse.nxv4i32( %udiv) + ret %revadd +} + +define @binop_reverse_splat_LHS( %a, i32 %b) { +; CHECK-LABEL: @binop_reverse_splat_LHS( +; CHECK-NEXT: [[SPLAT_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i32 0 +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector [[SPLAT_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[UDIV1:%.*]] = udiv [[SPLAT]], [[A:%.*]] +; CHECK-NEXT: ret [[UDIV1]] +; + %reva = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %splat_insert = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %udiv = udiv %splat, %reva + %revadd = tail call @llvm.experimental.vector.reverse.nxv4i32( %udiv) + ret %revadd +} + +define @unop_reverse( %a) { +; CHECK-LABEL: @unop_reverse( +; CHECK-NEXT: [[NEG1:%.*]] = fneg fast [[A:%.*]] +; CHECK-NEXT: ret [[NEG1]] +; + %reva = tail call @llvm.experimental.vector.reverse.nxv4f32( %a) + %neg = fneg fast %reva + %revneg = tail call @llvm.experimental.vector.reverse.nxv4f32( %neg) + ret %revneg +} + +declare @llvm.experimental.vector.reverse.nxv4f32() +declare @llvm.experimental.vector.reverse.nxv4i32() + + diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll @@ -19,11 +19,9 @@ ; CHECK-LABEL: vector.body: ; CHECK: %[[REVERSE6:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) ; CHECK: %[[WIDEMSKLOAD:.*]] = call @llvm.masked.load.nxv4f64.p0nxv4f64(* nonnull %{{.*}}, i32 8, %[[REVERSE6]], poison) -; CHECK-NEXT: %[[REVERSE7:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[WIDEMSKLOAD]]) -; CHECK-NEXT: %[[FADD:.*]] = fadd %[[REVERSE7]] -; CHECK-NEXT: %[[REVERSE8:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[FADD]]) +; CHECK-NEXT: %[[FADD:.*]] = fadd %[[WIDEMSKLOAD]] ; CHECK: %[[REVERSE9:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) -; CHECK: call void @llvm.masked.store.nxv4f64.p0nxv4f64( %[[REVERSE8]], * %{{.*}}, i32 8, %[[REVERSE9]] +; CHECK: call void @llvm.masked.store.nxv4f64.p0nxv4f64( %[[FADD]], * %{{.*}}, i32 8, %[[REVERSE9]] entry: %cmp7 = icmp sgt i64 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -42,17 +42,15 @@ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8, !alias.scope !0 -; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv8f64( [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP12:%.*]] = fadd [[REVERSE]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[REVERSE6:%.*]] = call @llvm.experimental.vector.reverse.nxv8f64( [[TMP12]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[DOTNEG7:%.*]] = mul i32 [[TMP14]], -8 ; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG7]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to * -; CHECK-NEXT: store [[REVERSE6]], * [[TMP18]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store [[TMP13]], * [[TMP18]], align 8, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] @@ -134,17 +132,15 @@ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8, !alias.scope !9 -; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv8i64( [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP12:%.*]] = add [[REVERSE]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[REVERSE6:%.*]] = call @llvm.experimental.vector.reverse.nxv8i64( [[TMP12]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[DOTNEG7:%.*]] = mul i32 [[TMP14]], -8 ; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG7]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP13]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * -; CHECK-NEXT: store [[REVERSE6]], * [[TMP18]], align 8, !alias.scope !12, !noalias !9 +; CHECK-NEXT: store [[TMP13]], * [[TMP18]], align 8, !alias.scope !12, !noalias !9 ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]]