Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -21,12 +21,16 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" @@ -90,6 +94,7 @@ // run loop was changed to dispatch on opcode. bool vectorizeLoadInsert(Instruction &I); bool widenSubvectorLoad(Instruction &I); + bool foldReverseLoop(Instruction &I); ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0, ExtractElementInst *Ext1, unsigned PreferredExtractIndex) const; @@ -331,6 +336,41 @@ return true; } +// Fold (reverse (icmp (reverse x), c)) -> (icmp x, c), if c is a splat. +// TODO: (reverse (unaryop (reverse x))) -> (unaryop x) +// TODO: (reverse (binop (reverse x), c) -> (binop x, c) +// TODO: (reverse (binop (reverse x), (reverse y))) -> (binop x, y) +bool VectorCombine::foldReverseLoop(Instruction &I) { + ShuffleVectorInst *Rev = cast(&I); + if (!Rev->isReverse()) + return false; + + Instruction *Cmp; + if (!match(Rev->getOperand(0), m_OneUse(m_Instruction(Cmp)))) + return false; + if (!isa(Cmp)) + return false; + + Value *InnerIst; + Constant *C; + CmpInst::Predicate Pred; + if (!match(Cmp, m_Cmp(Pred, m_OneUse(m_Value(InnerIst)), m_Constant(C)))) + return false; + + if (!C->getSplatValue()) + return false; + + if (!isa(InnerIst)) + return false; + ShuffleVectorInst *InnerRev = cast(InnerIst); + if (!InnerRev->isReverse()) + return false; + + Value *NewI = Builder.CreateCmp(Pred, InnerRev->getOperand(0), C); + replaceValue(I, *NewI); + return true; +} + /// Determine which, if any, of the inputs should be replaced by a shuffle /// followed by extract from a different index. ExtractElementInst *VectorCombine::getShuffleExtract( @@ -1725,6 +1765,7 @@ break; case Instruction::ShuffleVector: MadeChange |= widenSubvectorLoad(I); + MadeChange |= foldReverseLoop(I); break; case Instruction::Load: MadeChange |= scalarizeLoadExtract(I); Index: llvm/test/Transforms/VectorCombine/reverse-loop.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/VectorCombine/reverse-loop.ll @@ -0,0 +1,141 @@ +; RUN: opt -S -passes=vector-combine < %s | FileCheck %s + +define <32 x i1> @icmp32_8_eq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp eq <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_eq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_neq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ne <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_neq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ne <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_ult(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_oeq(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp oeq <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_oeq(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp oeq <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_olt(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp olt <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_olt(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp olt <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_uno(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_uno +; CHECK-NEXT: [[T0:%.*]] = fcmp uno <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp uno <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_uno(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_uno +; CHECK-NEXT: [[T0:%.*]] = fcmp uno <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp uno <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_ult(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp ult <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_ult(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp ult <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +}