diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1682,6 +1682,35 @@ return new ShuffleVectorInst(NewBO0, NewBO1, Mask); } + auto createBinOpReverse = [&](Value *X, Value *Y) { + Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); + if (auto *BO = dyn_cast(V)) + BO->copyIRFlags(&Inst); + Module *M = Inst.getModule(); + Function *F = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reverse, V->getType()); + return CallInst::Create(F, V); + }; + + // NOTE: Reverse shuffles don't require the speculative execution protection + // below because they don't affect which lanes take part in the computation. + + Value *V1, *V2; + if (match(LHS, m_VecReverse(m_Value(V1)))) { + // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) + if (match(RHS, m_VecReverse(m_Value(V2))) && + (LHS->hasOneUse() || RHS->hasOneUse() || + (LHS == RHS && LHS->hasNUses(2)))) + return createBinOpReverse(V1, V2); + + // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createBinOpReverse(V1, RHS); + } + // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) + else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) + return createBinOpReverse(LHS, V2); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. @@ -1697,7 +1726,6 @@ // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. - Value *V1, *V2; if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) && V1->getType() == V2->getType() && diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -7,14 +7,13 @@ define @binop_reverse( %a, %b) { ; CHECK-LABEL: @binop_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) %b.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %b) - %add = add %a.rev, %b.rev + %add = add nsw %a.rev, %b.rev ret %add } @@ -22,9 +21,9 @@ define @binop_reverse_1( %a, %b) { ; CHECK-LABEL: @binop_reverse_1( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add [[A]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -37,10 +36,10 @@ ; %b.rev has multiple uses define @binop_reverse_2( %a, %b) { ; CHECK-LABEL: @binop_reverse_2( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add [[A:%.*]], [[B]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -71,21 +70,35 @@ ; %a.rev used as both operands define @binop_reverse_4( %a) { ; CHECK-LABEL: @binop_reverse_4( +; CHECK-NEXT: [[MUL1:%.*]] = mul [[A:%.*]], [[A]] +; CHECK-NEXT: [[MUL:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[MUL1]]) +; CHECK-NEXT: ret [[MUL]] +; + %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %mul = mul %a.rev, %a.rev + ret %mul +} + +; %a.rev used as both operands along with a third use +define @binop_reverse_5( %a) { +; CHECK-LABEL: @binop_reverse_5( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) ; CHECK-NEXT: [[MUL:%.*]] = mul [[A_REV]], [[A_REV]] ; CHECK-NEXT: ret [[MUL]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + call void @use_nxv4i32( %a.rev) %mul = mul %a.rev, %a.rev ret %mul } define @binop_reverse_splat_RHS( %a, i32 %b) { ; CHECK-LABEL: @binop_reverse_splat_RHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[DIV:%.*]] = udiv [[A_REV]], [[B_SPLAT]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv [[A:%.*]], [[B_SPLAT]] +; CHECK-NEXT: [[DIV:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[DIV1]]) ; CHECK-NEXT: ret [[DIV]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -115,10 +128,10 @@ define @binop_reverse_splat_LHS( %a, i32 %b) { ; CHECK-LABEL: @binop_reverse_splat_LHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[DIV:%.*]] = udiv [[B_SPLAT]], [[A_REV]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[DIV1]]) ; CHECK-NEXT: ret [[DIV]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -36,15 +36,15 @@ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 1, [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to * -; CHECK-NEXT: store [[TMP14]], * [[TMP20]], align 8 +; CHECK-NEXT: store [[TMP13]], * [[TMP20]], align 8 ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[TMP21]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] @@ -130,15 +130,15 @@ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = shl i32 [[TMP21]], 3 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 1, [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP20]], i64 [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = bitcast i64* [[TMP25]] to * -; CHECK-NEXT: store [[TMP20]], * [[TMP26]], align 8 +; CHECK-NEXT: store [[TMP19]], * [[TMP26]], align 8 ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP27]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]