diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -959,8 +959,7 @@ return nullptr; // If vectors, verify that they have the same number of elements. - if (SrcTy && cast(SrcTy)->getNumElements() != - cast(DestTy)->getNumElements()) + if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount()) return nullptr; } @@ -1515,8 +1514,7 @@ } Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { - // FIXME: some of this is likely fine for scalable vectors - if (!isa(Inst.getType())) + if (!isa(Inst.getType())) return nullptr; BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); @@ -1605,13 +1603,16 @@ // intends to move shuffles closer to other shuffles and binops closer to // other binops, so they can be folded. It may also enable demanded elements // transforms. - unsigned NumElts = cast(Inst.getType())->getNumElements(); Constant *C; - if (match(&Inst, + auto *InstVTy = dyn_cast(Inst.getType()); + if (InstVTy && + match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))), - m_Constant(C))) && !isa(C) && - cast(V1->getType())->getNumElements() <= NumElts) { - assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() && + m_Constant(C))) && + !isa(C) && + cast(V1->getType())->getNumElements() <= + InstVTy->getNumElements()) { + assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && "Shuffle should not change scalar type"); // Find constant NewC that has property: @@ -1626,6 +1627,7 @@ UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType()); SmallVector NewVecC(SrcVecNumElts, UndefScalar); bool MayChange = true; + unsigned NumElts = InstVTy->getNumElements(); for (unsigned I = 0; I < NumElts; ++I) { Constant *CElt = C->getAggregateElement(I); if (ShMask[I] >= 0) { @@ -2379,9 +2381,9 @@ DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy); }; if (GEP.getNumOperands() == 3 && - ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() && + ((GEPEltType->isArrayTy() && isa(SrcEltType) && areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) || - (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() && + (isa(GEPEltType) && SrcEltType->isArrayTy() && areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) { // Create a new GEP here, as using `setOperand()` followed by diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll --- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll +++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll @@ -1,17 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define i1 @f(i1 %x) { ; CHECK-LABEL: @f( -; CHECK: ret i1 false - %b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*)) - ret i1 %b +; CHECK-NEXT: ret i1 false +; + %b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*)) + ret i1 %b } define i32 @g(i32 %x) { ; CHECK-LABEL: @g( -; CHECK: ret i32 %x - %b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32) - ret i32 %b +; CHECK-NEXT: ret i32 [[X:%.*]] +; + %b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32) + ret i32 %b +} + +define i32 @h(i1 %A, i32 %B) { +; CHECK-LABEL: @h( +; CHECK-NEXT: EntryBlock: +; CHECK-NEXT: [[B_OP:%.*]] = add i32 [[B:%.*]], 2 +; CHECK-NEXT: [[OP:%.*]] = select i1 [[A:%.*]], i32 3, i32 [[B_OP]] +; CHECK-NEXT: ret i32 [[OP]] +; +EntryBlock: + %cf = select i1 %A, i32 1, i32 %B + %op = add i32 2, %cf + ret i32 %op +} + +define <4 x float> @h1(i1 %A, <4 x i32> %B) { +; CHECK-LABEL: @h1( +; CHECK-NEXT: EntryBlock: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> +; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> , <4 x float> [[TMP0]] +; CHECK-NEXT: ret <4 x float> [[BC]] +; +EntryBlock: + %cf = select i1 %A, <4 x i32> , <4 x i32> %B + %bc = bitcast <4 x i32> %cf to <4 x float> + ret <4 x float> %bc +} + +define @h2(i1 %A, %B) { +; CHECK-LABEL: @h2( +; CHECK-NEXT: EntryBlock: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [[B:%.*]] to +; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], zeroinitializer, [[TMP0]] +; CHECK-NEXT: ret [[BC]] +; +EntryBlock: + %cf = select i1 %A, zeroinitializer, %B + %bc = bitcast %cf to + ret %bc +} + +define @h3(i1 %A, %B) { +; CHECK-LABEL: @h3( +; CHECK-NEXT: EntryBlock: +; CHECK-NEXT: [[CF:%.*]] = select i1 [[A:%.*]], zeroinitializer, [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = bitcast [[CF]] to +; CHECK-NEXT: ret [[BC]] +; +EntryBlock: + %cf = select i1 %A, zeroinitializer, %B + %bc = bitcast %cf to + ret %bc + } diff --git a/llvm/test/Transforms/InstCombine/vec-binop-select.ll b/llvm/test/Transforms/InstCombine/vec-binop-select.ll --- a/llvm/test/Transforms/InstCombine/vec-binop-select.ll +++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll @@ -14,6 +14,18 @@ ret <4 x i32> %r } +define @vscaleand( %x, %y) { +; CHECK-LABEL: @vscaleand( +; CHECK-NEXT: [[R:%.*]] = and [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[S:%.*]] = shufflevector [[R]], undef, zeroinitializer +; CHECK-NEXT: ret [[S]] +; + %sel1 = shufflevector %x, undef, zeroinitializer + %sel2 = shufflevector %y, undef, zeroinitializer + %r = and %sel1, %sel2 + ret %r +} + define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @or( ; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]] diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1468,6 +1468,20 @@ ret <4 x i32> %r } +define @vsplat_assoc_add( %x, %y) { +; CHECK-LABEL: @vsplat_assoc_add( +; CHECK-NEXT: [[TMP1:%.*]] = add [[X:%.*]], shufflevector ( insertelement ( undef, i32 317426, i32 0), undef, zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], undef, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = add [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret [[R]] +; + + %splatx = shufflevector %x, undef, zeroinitializer + %a = add %y, shufflevector ( insertelement ( undef, i32 317426, i32 0), undef, zeroinitializer) + %r = add %splatx, %a + ret %r +} + ; Undefs in splat mask are replaced with defined splat index define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {