Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2713,6 +2713,49 @@ return new ShuffleVectorInst(X, Y, NewMask); } +// fold double reverse +static Instruction *foldDoubleReverse(ShuffleVectorInst &SVI, + InstCombiner::BuilderTy &Builder) { + if (!SVI.isReverse()) + return nullptr; + + Instruction *Arith; + if (!match(SVI.getOperand(0), m_OneUse(m_Instruction(Arith)))) + return nullptr; + + if (CmpInst *CI = dyn_cast(Arith)) { + Value *LHS = CI->getOperand(0), *RHS = CI->getOperand(1); + + bool ReverseRHS; + ShuffleVectorInst *RevX; // (reverse x) + Value *Y; + if ((RevX = dyn_cast(LHS)) && RevX->hasOneUse() && + RevX->isReverse()) { + // fold (reverse (cmp (reverse x), y)) -> (cmp x, (reverse y)) + ReverseRHS = true; + Y = RHS; + } else if ((RevX = dyn_cast(RHS)) && RevX->hasOneUse() && + RevX->isReverse()) { + // fold (reverse (cmp y, (reverse x))) -> (cmp (reverse y), x) + ReverseRHS = false; + Y = LHS; + } else { + return nullptr; + } + + Value *X = RevX->getOperand(0); + Value *RevY = Builder.CreateVectorReverse(Y); + + Value *V1 = ReverseRHS ? X : RevY; + Value *V2 = ReverseRHS ? RevY : X; + + return CmpInst::Create(CI->getOpcode(), CI->getPredicate(), V1, V2); + } + // TODO: (reverse (unaryop (reverse x))) -> (unaryop x) + // TODO: (reverse (binop (reverse x), y)) -> (binop x, (reverse y)) + return nullptr; +} + // Splatting the first element of the result of a BinOp, where any of the // BinOp's operands are the result of a first element splat can be simplified to // splatting the first element of the result of the BinOp @@ -2840,6 +2883,9 @@ if (Instruction *I = foldCastShuffle(SVI, Builder)) return I; + if (Instruction *I = foldDoubleReverse(SVI, Builder)) + return I; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { Index: llvm/test/Transforms/InstCombine/vector-double-reverse.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/vector-double-reverse.ll @@ -0,0 +1,244 @@ +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define <32 x i1> @icmp32_8_eq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp eq <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_eq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_eq +; CHECK-NEXT: [[T0:%.*]] = icmp eq <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_neq(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ne <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_neq(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_neq +; CHECK-NEXT: [[T0:%.*]] = icmp ne <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ne <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_ult(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = icmp ult <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_oeq(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp oeq <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_oeq(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_oeq +; CHECK-NEXT: [[T0:%.*]] = fcmp oeq <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp oeq <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_olt(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp olt <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_olt(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_olt +; CHECK-NEXT: [[T0:%.*]] = fcmp olt <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp olt <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_uge(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_uge +; CHECK-NEXT: [[T0:%.*]] = fcmp uge <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp uge <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_uge(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_uge +; CHECK-NEXT: [[T0:%.*]] = fcmp uge <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp uge <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @fcmp32_8_ult(<32 x float> %0) { +; CHECK-LABEL: @fcmp32_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <32 x float> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x float> %0, <32 x float> poison, <32 x i32> + %t2 = fcmp ult <32 x float> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @fcmp8_8_ult(<8 x float> %0) { +; CHECK-LABEL: @fcmp8_8_ult +; CHECK-NEXT: [[T0:%.*]] = fcmp ult <8 x float> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %t2 = fcmp ult <8 x float> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + + +define <32 x i1> @icmp32_8_eq_constrev(<32 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_constrev +; CHECK-NEXT: [[T0:%.*]] = icmp eq <32 x i8> %0, +; CHECK-NEXT: ret <32 x i1> [[T0]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp eq <32 x i8> %t1, + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_eq_constrev(<8 x i8> %0) { +; CHECK-LABEL: @icmp8_8_eq_constrev +; CHECK-NEXT: [[T0:%.*]] = icmp eq <8 x i8> %0, +; CHECK-NEXT: ret <8 x i1> [[T0]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + + +define <32 x i1> @icmp32_8_ult_nonconst_lhs_rev(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: @icmp32_8_ult_nonconst_lhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <32 x i8> %1, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <32 x i8> [[T0]], %0 +; CHECK-NEXT: ret <32 x i1> [[T1]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %t1, %1 + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult_nonconst_lhs_rev(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: @icmp8_8_ult_nonconst_lhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <8 x i8> [[T0]], %0 +; CHECK-NEXT: ret <8 x i1> [[T1]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %t1, %1 + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <32 x i1> @icmp32_8_ult_nonconst_rhs_rev(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: @icmp32_8_ult_nonconst_rhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <32 x i8> %1, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ult <32 x i8> [[T0]], %0 +; CHECK-NEXT: ret <32 x i1> [[T1]] + %t1 = shufflevector <32 x i8> %0, <32 x i8> poison, <32 x i32> + %t2 = icmp ult <32 x i8> %1, %t1 + %t3 = shufflevector <32 x i1> %t2, <32 x i1> poison, <32 x i32> + ret <32 x i1> %t3 +} + +define <8 x i1> @icmp8_8_ult_nonconst_rhs_rev(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: @icmp8_8_ult_nonconst_rhs_rev +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp ult <8 x i8> [[T0]], %0 +; CHECK-NEXT: ret <8 x i1> [[T1]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp ult <8 x i8> %1, %t1 + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_non_reverse1(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_non_reverse1 +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <8 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <8 x i1> [[T1]], <8 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_non_reverse2(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_non_reverse2 +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <8 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <8 x i1> [[T1]], <8 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> + %t2 = icmp eq <8 x i8> %t1, + %t3 = shufflevector <8 x i1> %t2, <8 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + +define <8 x i1> @icmp32_8_eq_negative_length_changing(<8 x i8> %0) { +; CHECK-LABEL: @icmp32_8_eq_negative_length_changing +; CHECK-NEXT: [[T0:%.*]] = shufflevector <8 x i8> %0, <8 x i8> poison, <10 x i32> +; CHECK-NEXT: [[T1:%.*]] = icmp eq <10 x i8> [[T0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shufflevector <10 x i1> [[T1]], <10 x i1> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i1> [[T2]] + %t1 = shufflevector <8 x i8> %0, <8 x i8> poison, <10 x i32> + %t2 = icmp eq <10 x i8> %t1, + %t3 = shufflevector <10 x i1> %t2, <10 x i1> poison, <8 x i32> + ret <8 x i1> %t3 +} + Index: llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -36,23 +36,19 @@ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i64 -3 -; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP7]], i64 -7 -; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp une <4 x double> [[WIDE_LOAD:%.*]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP6]], i32 8, <4 x i1> [[TMP7]], <4 x double> poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[TMP5]], i64 -7 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[WIDE_LOAD1:%.*]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[TMP9]], <4 x double> poison) ; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]]) -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP6]], i32 8, <4 x i1> [[TMP7]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP8]], i32 8, <4 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]