Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2713,6 +2713,54 @@ return new ShuffleVectorInst(X, Y, NewMask); } +// fold double reverse +static Instruction *foldDoubleReverse(ShuffleVectorInst &SVI, + InstCombiner::BuilderTy &Builder) { + if (!SVI.isReverse()) + return nullptr; + ArrayRef OuterMask = SVI.getShuffleMask(); + + Instruction *Arith; + if (!match(SVI.getOperand(0), m_OneUse(m_Instruction(Arith)))) + return nullptr; + + if (CmpInst *CI = dyn_cast(Arith)) { + // fold (reverse (cmp (reverse x), y)) -> (cmp x, (reverse y)) or + // (reverse (cmp y, (reverse x))) -> (cmp (reverse y), x) + Value *LHS = CI->getOperand(0), *RHS = CI->getOperand(1); + CmpInst::Predicate Pred = CI->getPredicate(); + + bool IsLHSReverse = false; + ShuffleVectorInst *RevX; // (reverse x) + Value *Y = LHS; + if (RevX = dyn_cast(LHS), + RevX && RevX->hasOneUse() && RevX->isReverse()) { + IsLHSReverse = true; + Y = RHS; + } else if (RevX = dyn_cast(RHS), + !RevX || !RevX->hasOneUse() || !RevX->isReverse()) { + return nullptr; + } + Value *X = RevX->getOperand(0); + + Value *RevY; // (reverse y) + Constant *Poisons = PoisonValue::get(RevX->getType()); + if (Constant *C = dyn_cast(Y)) { + RevY = ConstantExpr::getShuffleVector(C, Poisons, OuterMask); + } else { + RevY = Builder.CreateShuffleVector(Y, Poisons, OuterMask); + } + + Value *V1 = IsLHSReverse ? X : RevY; + Value *V2 = IsLHSReverse ? RevY : X; + + return CmpInst::Create(CI->getOpcode(), Pred, V1, V2); + } + // TODO: (reverse (unaryop (reverse x))) -> (unaryop x) + // TODO: (reverse (binop (reverse x), y)) -> (binop x, (reverse y)) + return nullptr; +} + // Splatting the first element of the result of a BinOp, where any of the // BinOp's operands are the result of a first element splat can be simplified to // splatting the first element of the result of the BinOp @@ -2841,6 +2889,9 @@ if (Instruction *I = foldCastShuffle(SVI, Builder)) return I; + if (Instruction *I = foldDoubleReverse(SVI, Builder)) + return I; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { Index: llvm/test/Transforms/InstCombine/vector-double-reverse.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/vector-double-reverse.ll @@ -0,0 +1,244 @@ +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define <32 x i1> @icmp32_8_eq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp eq <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_eq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_neq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ne <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_neq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ne <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_ult(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_oeq(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp oeq <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_oeq(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp oeq <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_olt(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp olt <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_olt(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp olt <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_uge(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_uge +; CHECK-NEXT: [[T0:%.*]] = fcmp uge <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp uge <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_uge(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_uge +; CHECK-NEXT: [[T0:%.*]] = fcmp uge <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp uge <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_ult(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp ult <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_ult(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp ult <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + + +define <32 x i1> @icmp32_8_eq_constrev(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_constrev +; CHECK-NEXT: [[T0:%.*]] = icmp eq <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp eq <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_eq_constrev(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_eq_constrev +; CHECK-NEXT: [[T0:%.*]] = icmp eq <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + + +define <32 x i1> @icmp32_8_ult_nonconst_lhs_rev(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: @icmp32_8_ult_nonconst_lhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <32 x i8> %1, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <32 x i8> [[T0]], %0 +; CHECK-NEXT: ret <32 x i1> [[T1]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %t1, %1 + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult_nonconst_lhs_rev(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: @icmp8_8_ult_nonconst_lhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <8 x i8> [[T0]], %0 +; CHECK-NEXT: ret <8 x i1> [[T1]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %t1, %1 + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_ult_nonconst_rhs_rev(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: @icmp32_8_ult_nonconst_rhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <32 x i8> %1, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ult <32 x i8> [[T0]], %0 +; CHECK-NEXT: ret <32 x i1> [[T1]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %1, %t1 + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult_nonconst_rhs_rev(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: @icmp8_8_ult_nonconst_rhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ult <8 x i8> [[T0]], %0 +; CHECK-NEXT: ret <8 x i1> [[T1]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %1, %t1 + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_non_reverse1(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_non_reverse1 +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <8 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <8 x i1> [[T1]], <8 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_non_reverse2(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_non_reverse2 +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <8 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <8 x i1> [[T1]], <8 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_length_changing(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_length_changing +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <10 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <10 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <10 x i1> [[T1]], <10 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <10 x i32> + %t2 = icmp eq <10 x i8> %t1, + %t3 = shufflevector <10 x i1> %t2, <10 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + Index: llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -36,25 +36,21 @@ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 -3 -; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) +; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <4 x double> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[TMP8]], <4 x double> poison) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 -4 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i64 -3 -; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[TMP9]], <4 x double> poison) ; CHECK-NEXT: [[TMP16:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], ; CHECK-NEXT: [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP16]], ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]]) -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP17]], ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP16]], ptr [[TMP11]], i32 8, <4 x i1> [[TMP8]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP17]], ptr [[TMP14]], i32 8, <4 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]