diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -6091,6 +6092,31 @@ const CmpInst::Predicate Pred = Cmp.getPredicate(); Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1); Value *V1, *V2; + + auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) { + Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName()); + if (auto *I = dyn_cast(V)) + I->copyIRFlags(&Cmp); + Module *M = Cmp.getModule(); + Function *F = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reverse, V->getType()); + return CallInst::Create(F, V); + }; + + if (match(LHS, m_VecReverse(m_Value(V1)))) { + // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2) + if (match(RHS, m_VecReverse(m_Value(V2))) && + (LHS->hasOneUse() || RHS->hasOneUse())) + return createCmpReverse(Pred, V1, V2); + + // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createCmpReverse(Pred, V1, RHS); + } + // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2) + else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) + return createCmpReverse(Pred, LHS, V2); + ArrayRef M; if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M)))) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -173,9 +173,8 @@ define @icmp_reverse( %a, %b) { ; CHECK-LABEL: @icmp_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -188,9 +187,9 @@ define @icmp_reverse_1( %a, %b) { ; CHECK-LABEL: @icmp_reverse_1( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -203,10 +202,10 @@ ; %b.rev has multiple uses define @icmp_reverse_2( %a, %b) { ; CHECK-LABEL: @icmp_reverse_2( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[B:%.*]]) ; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq [[A_REV]], [[B_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -236,10 +235,10 @@ define @icmp_reverse_splat_RHS( %a, i32 %b) { ; CHECK-LABEL: @icmp_reverse_splat_RHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt [[A_REV]], [[B_SPLAT]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -269,10 +268,10 @@ define @icmp_reverse_splat_LHS( %a, i32 %b) { ; CHECK-LABEL: @icmp_reverse_splat_LHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[CMP:%.*]] = icmp ult [[B_SPLAT]], [[A_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult [[B_SPLAT]], [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[CMP1]]) ; CHECK-NEXT: ret [[CMP]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) @@ -656,15 +655,12 @@ define @reverse_fcmp_reverse( %a, %b) { ; CHECK-LABEL: @reverse_fcmp_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4f32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4f32( [[B:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt [[A_REV]], [[B_REV]] -; CHECK-NEXT: [[CMP_REV:%.*]] = tail call @llvm.experimental.vector.reverse.nxv4i1( [[CMP]]) -; CHECK-NEXT: ret [[CMP_REV]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[CMP1]] ; %a.rev = tail call @llvm.experimental.vector.reverse.nxv4f32( %a) %b.rev = tail call @llvm.experimental.vector.reverse.nxv4f32( %b) - %cmp = fcmp olt %a.rev, %b.rev + %cmp = fcmp fast olt %a.rev, %b.rev %cmp.rev = tail call @llvm.experimental.vector.reverse.nxv4i1( %cmp) ret %cmp.rev } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll @@ -10,7 +10,7 @@ ; The test checks if the mask is being correctly created, reverted and used -; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \ +; RUN: opt -passes=loop-vectorize,dce -mtriple aarch64-linux-gnu -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -20,9 +20,11 @@ ; CHECK-LABEL: vector.body: ; CHECK: %[[REVERSE6:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) ; CHECK: %[[WIDEMSKLOAD:.*]] = call @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, %[[REVERSE6]], poison) -; CHECK-NEXT: %[[FADD:.*]] = fadd %[[WIDEMSKLOAD]] +; CHECK: %[[REVERSE7:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[WIDEMSKLOAD]]) +; CHECK: %[[FADD:.*]] = fadd %[[REVERSE7]] +; CHECK: %[[REVERSE8:.*]] = call @llvm.experimental.vector.reverse.nxv4f64( %[[FADD]]) ; CHECK: %[[REVERSE9:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) -; CHECK: call void @llvm.masked.store.nxv4f64.p0( %[[FADD]], ptr %{{.*}}, i32 8, %[[REVERSE9]] +; CHECK: call void @llvm.masked.store.nxv4f64.p0( %[[REVERSE8]], ptr %{{.*}}, i32 8, %[[REVERSE9]] entry: %cmp7 = icmp sgt i64 %N, 0