Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -947,6 +947,42 @@ return new ZExtInst(Select, SI.getType()); } +/// Try to transform a vector select with a constant condition vector into a +/// shuffle for easier combining with other shuffles and insert/extract. +static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Constant *CondC; + if (!CondVal->getType()->isVectorTy() || !match(CondVal, m_Constant(CondC))) + return nullptr; + + unsigned NumElts = CondVal->getType()->getVectorNumElements(); + SmallVector Mask; + Mask.reserve(NumElts); + Type *Int32Ty = Type::getInt32Ty(CondVal->getContext()); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = CondC->getAggregateElement(i); + if (!Elt) + return nullptr; + + if (Elt->isOneValue()) { + // If the select condition element is true, choose from the 1st vector. + Mask.push_back(ConstantInt::get(Int32Ty, i)); + } else if (Elt->isNullValue()) { + // If the select condition element is false, choose from the 2nd vector. + Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts)); + } else if (isa(Elt)) { + // If the select condition element is undef, the shuffle mask is undef. + Mask.push_back(UndefValue::get(Int32Ty)); + } else { + // Bail out on a constant expression. + return nullptr; + } + } + + return new ShuffleVectorInst(SI.getTrueValue(), SI.getFalseValue(), + ConstantVector::get(Mask)); +} + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -957,6 +993,9 @@ SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(SI, V); + if (Instruction *I = canonicalizeSelectToShuffle(SI)) + return I; + if (SelType->getScalarType()->isIntegerTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { Index: llvm/trunk/test/Transforms/InstCombine/blend_x86.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/blend_x86.ll +++ llvm/trunk/test/Transforms/InstCombine/blend_x86.ll @@ -1,9 +1,10 @@ ; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { -; CHECK-LABEL: @constant_blendvpd -; CHECK-NEXT: %1 = select <2 x i1> , <2 x double> %ab, <2 x double> %xy -; CHECK-NEXT: ret <2 x double> %1 +; CHECK-LABEL: @constant_blendvpd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %ab, <2 x double> %xy, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> ) ret <2 x double> %1 } @@ -23,9 +24,10 @@ } define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { -; CHECK-LABEL: @constant_blendvps -; CHECK-NEXT: %1 = select <4 x i1> , <4 x float> %abcd, <4 x float> %xyzw -; CHECK-NEXT: ret <4 x float> %1 +; CHECK-LABEL: @constant_blendvps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %abcd, <4 x float> %xyzw, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) ret <4 x float> %1 } @@ -45,9 +47,10 @@ } define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { -; CHECK-LABEL: @constant_pblendvb -; CHECK-NEXT: %1 = select <16 x i1> , <16 x i8> %abcd, <16 x i8> %xyzw -; CHECK-NEXT: ret <16 x i8> %1 +; CHECK-LABEL: @constant_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %abcd, <16 x i8> %xyzw, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) ret <16 x i8> %1 } @@ -67,9 +70,10 @@ } define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { -; CHECK-LABEL: @constant_blendvpd_avx -; CHECK-NEXT: %1 = select <4 x i1> , <4 x double> %ab, <4 x double> %xy -; CHECK-NEXT: ret <4 x double> %1 +; CHECK-LABEL: @constant_blendvpd_avx( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %ab, <4 x double> %xy, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> ) ret <4 x double> %1 } @@ -89,9 +93,10 @@ } define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { -; CHECK-LABEL: @constant_blendvps_avx -; CHECK-NEXT: %1 = select <8 x i1> , <8 x float> %abcd, <8 x float> %xyzw -; CHECK-NEXT: ret <8 x float> %1 +; CHECK-LABEL: @constant_blendvps_avx( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %abcd, <8 x float> %xyzw, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) ret <8 x float> %1 } @@ -111,9 +116,10 @@ } define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { -; CHECK-LABEL: @constant_pblendvb_avx2 -; CHECK-NEXT: %1 = select <32 x i1> , <32 x i8> %abcd, <32 x i8> %xyzw -; CHECK-NEXT: ret <32 x i8> %1 +; CHECK-LABEL: @constant_pblendvb_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %abcd, <32 x i8> %xyzw, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @vec_sel_consts( -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %and1 = and <4 x i32> %a, @@ -379,11 +379,9 @@ ret <4 x i32> %or } -; The select condition constant is always derived from the first operand of the 'or'. - define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { ; CHECK-LABEL: @vec_sel_consts_weird( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> , <3 x i129> %b, <3 x i129> %a +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> %b, <3 x i129> %a, <3 x i32> ; CHECK-NEXT: ret <3 x i129> [[TMP1]] ; %and1 = and <3 x i129> %a, Index: llvm/trunk/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select.ll +++ llvm/trunk/test/Transforms/InstCombine/select.ll @@ -1765,3 +1765,36 @@ ret <2 x i32> %x.xor } +; Make sure that undef elements of the select condition are translated into undef elements of the shuffle mask. + +define <4 x i32> @canonicalize_to_shuffle(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @canonicalize_to_shuffle( +; CHECK-NEXT: [[SEL:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %sel +} + +; Don't die or try if the condition mask is a constant expression or contains a constant expression. + +@g = global i32 0 + +define <4 x i32> @cannot_canonicalize_to_shuffle1(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @cannot_canonicalize_to_shuffle1( +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %sel = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %sel +} + +define <4 x i32> @cannot_canonicalize_to_shuffle2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @cannot_canonicalize_to_shuffle2( +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> , <4 x i32> %a, <4 x i32> %b +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %sel = select <4 x i1> , <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %sel +} + Index: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -216,7 +216,7 @@ ; CHECK-LABEL: @test_select( ; CHECK-NEXT: [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0 ; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3 -; CHECK-NEXT: [[RET:%.*]] = select <4 x i1> , <4 x float> [[A3]], <4 x float> +; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RET]] ; %a0 = insertelement <4 x float> undef, float %f, i32 0 @@ -231,10 +231,12 @@ ret <4 x float> %ret } -; Check that instcombine doesn't wrongly fold the select statement into a ret <2 x i64> %v +; Check that instcombine doesn't wrongly fold away the select completely. +; TODO: Should this be an insertelement rather than a shuffle? + define <2 x i64> @PR24922(<2 x i64> %v) { ; CHECK-LABEL: @PR24922( -; CHECK-NEXT: [[RESULT:%.*]] = select <2 x i1> , <2 x i64> %v, <2 x i64> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x i64> %v, <2 x i64> , <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[RESULT]] ; %result = select <2 x i1> bitcast (<4 x i32> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer