diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1683,6 +1683,35 @@ return new ShuffleVectorInst(NewBO0, NewBO1, Mask); } + auto createBinOpReverse = [&](Value *X, Value *Y) { + Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); + if (auto *BO = dyn_cast(V)) + BO->copyIRFlags(&Inst); + Module *M = Inst.getModule(); + Function *F = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reverse, V->getType()); + return CallInst::Create(F, V); + }; + + // NOTE: Reverse shuffles don't require the speculative execution protection + // below because they don't affect which lanes take part in the computation. + + Value *V1, *V2; + if (match(LHS, m_VecReverse(m_Value(V1)))) { + // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) + if (match(RHS, m_VecReverse(m_Value(V2))) && + (LHS->hasOneUse() || RHS->hasOneUse() || + (LHS == RHS && LHS->hasNUses(2)))) + return createBinOpReverse(V1, V2); + + // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createBinOpReverse(V1, RHS); + } + // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) + else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) + return createBinOpReverse(LHS, V2); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. @@ -1698,7 +1727,6 @@ // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. - Value *V1, *V2; if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) && V1->getType() == V2->getType() && diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -7,14 +7,13 @@ define @binop_reverse( %a, %b) { ; CHECK-LABEL: @binop_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) %b.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %b) - %add = add %a.rev, %b.rev + %add = add nsw %a.rev, %b.rev ret %add } @@ -22,9 +21,9 @@ define @binop_reverse_1( %a, %b) { ; CHECK-LABEL: @binop_reverse_1( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add [[A]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -37,10 +36,10 @@ ; %b.rev has multiple uses define @binop_reverse_2( %a, %b) { ; CHECK-LABEL: @binop_reverse_2( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) -; CHECK-NEXT: [[ADD:%.*]] = add [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add [[A:%.*]], [[B]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[ADD1]]) ; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -71,21 +70,35 @@ ; %a.rev used as both operands define @binop_reverse_4( %a) { ; CHECK-LABEL: @binop_reverse_4( +; CHECK-NEXT: [[MUL1:%.*]] = mul [[A:%.*]], [[A]] +; CHECK-NEXT: [[MUL:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[MUL1]]) +; CHECK-NEXT: ret [[MUL]] +; + %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %mul = mul %a.rev, %a.rev + ret %mul +} + +; %a.rev used as both operands along with a third use +define @binop_reverse_5( %a) { +; CHECK-LABEL: @binop_reverse_5( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) ; CHECK-NEXT: [[MUL:%.*]] = mul [[A_REV]], [[A_REV]] ; CHECK-NEXT: ret [[MUL]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + call void @use_nxv4i32( %a.rev) %mul = mul %a.rev, %a.rev ret %mul } define @binop_reverse_splat_RHS( %a, i32 %b) { ; CHECK-LABEL: @binop_reverse_splat_RHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[DIV:%.*]] = udiv [[A_REV]], [[B_SPLAT]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv [[A:%.*]], [[B_SPLAT]] +; CHECK-NEXT: [[DIV:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[DIV1]]) ; CHECK-NEXT: ret [[DIV]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -115,10 +128,10 @@ define @binop_reverse_splat_LHS( %a, i32 %b) { ; CHECK-LABEL: @binop_reverse_splat_LHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[DIV:%.*]] = udiv [[B_SPLAT]], [[A_REV]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[DIV:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[DIV1]]) ; CHECK-NEXT: ret [[DIV]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -35,19 +35,19 @@ ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 1, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP18]] -; CHECK-NEXT: store [[TMP14]], ptr [[TMP19]], align 8 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[TMP21]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = sub i32 1, [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP17]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -62,8 +62,8 @@ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP24]], 1.000000e+00 +; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -127,19 +127,19 @@ ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP22:%.*]] = shl i32 [[TMP21]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = sub i32 1, [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP24]] -; CHECK-NEXT: store [[TMP20]], ptr [[TMP25]], align 8 -; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP27]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = sub i32 1, [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: store [[TMP18]], ptr [[TMP24]], align 8 +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -154,8 +154,8 @@ ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP30]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP28]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1