Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4238,6 +4238,60 @@ return true; } +/// Return true if we can extend/trunc the type of the specified value in +/// instcombine. +static bool canEvaluateInDifferentType(Value *V) { + // Constants can always be evaluated in a different type. + if (isa(V)) + return true; + + auto *I = dyn_cast(V); + if (!I) + return false; + // Instcombine does't change the types of something that has multiple uses. + // SLP may remove extractelement, so we will continue checking if all the + // uses are the vector operands of extractelement insts. + if (!I->hasOneUse() && (llvm::any_of(I->users(), [=](User *U) -> bool { + return !isa(U) || U->getOperand(0) != I; + }))) + return false; + switch (I->getOpcode()) { + // We can always change the types of a cast. + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + return true; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operators can all arbitrarily be extended/trunced if their inputs + // can. + return canEvaluateInDifferentType(I->getOperand(0)) && + canEvaluateInDifferentType(I->getOperand(1)); + case Instruction::Select: + // We can change a select if we can change both true/false operands. + return canEvaluateInDifferentType(I->getOperand(1)) && + canEvaluateInDifferentType(I->getOperand(2)); + case Instruction::PHI: { + // We can change a phi if we can change all operands. + PHINode *PN = cast(I); + for (Value *IncValue : PN->incoming_values()) + if (!canEvaluateInDifferentType(IncValue)) + return false; + return true; + } + case Instruction::ExtractElement: + return canEvaluateInDifferentType(I->getOperand(0)); + return true; + default: + break; + } + return false; +} + void BoUpSLP::computeMinimumValueSizes() { // If there are no external uses, the expression tree must be rooted by a // store. We can't demote in-memory values, so there is nothing to do here. @@ -4281,15 +4335,16 @@ SmallVector ToDemote; SmallVector Roots; for (auto *Root : TreeRoot) { - // Do not include top zext/sext/trunc operations to those to be demoted, it - // produces noise cast, trunc , exctract , cast - // sequence. + // Do not include top zext/sext/trunc operations to those to be demoted if + // the sources of these cast IRs cannot be extended, it produces noise + // cast, trunc , exctract , cast sequence. if (isa(Root)) continue; auto *I = dyn_cast(Root); if (!I || !I->hasOneUse() || !Expr.count(I)) return; - if (isa(I) || isa(I)) + if ((isa(I) || isa(I)) && + !canEvaluateInDifferentType(I->getOperand(0))) continue; if (auto *TI = dyn_cast(I)) { Roots.push_back(TI->getOperand(0)); Index: test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -slp-vectorizer -instcombine < %s | FileCheck %s + +target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @foo(i64, i64, i64, i64) + +define void @test(<4 x i16> %a, <4 x i16> %b, i64* %p) { +; Make sure types of sub and its sources are not extended. +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> [[Z0]], [[Z1]] +entry: + %z0 = zext <4 x i16> %a to <4 x i32> + %z1 = zext <4 x i16> %b to <4 x i32> + %sub0 = sub <4 x i32> %z0, %z1 + %e0 = extractelement <4 x i32> %sub0, i32 0 + %s0 = sext i32 %e0 to i64 + %gep0 = getelementptr inbounds i64, i64* %p, i64 %s0 + %load0 = load i64, i64* %gep0 + %e1 = extractelement <4 x i32> %sub0, i32 1 + %s1 = sext i32 %e1 to i64 + %gep1 = getelementptr inbounds i64, i64* %p, i64 %s1 + %load1 = load i64, i64* %gep1 + %e2 = extractelement <4 x i32> %sub0, i32 2 + %s2 = sext i32 %e2 to i64 + %gep2 = getelementptr inbounds i64, i64* %p, i64 %s2 + %load2 = load i64, i64* %gep2 + %e3 = extractelement <4 x i32> %sub0, i32 3 + %s3 = sext i32 %e3 to i64 + %gep3 = getelementptr inbounds i64, i64* %p, i64 %s3 + %load3 = load i64, i64* %gep3 + call void @foo(i64 %load0, i64 %load1, i64 %load2, i64 %load3) + ret void +}