Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4301,24 +4301,9 @@ // additional roots that require investigating in Roots. SmallVector ToDemote; SmallVector Roots; - for (auto *Root : TreeRoot) { - // Do not include top zext/sext/trunc operations to those to be demoted, it - // produces noise cast, trunc , exctract , cast - // sequence. - if (isa(Root)) - continue; - auto *I = dyn_cast(Root); - if (!I || !I->hasOneUse() || !Expr.count(I)) - return; - if (isa(I) || isa(I)) - continue; - if (auto *TI = dyn_cast(I)) { - Roots.push_back(TI->getOperand(0)); - continue; - } + for (auto *Root : TreeRoot) if (!collectValuesToDemote(Root, Expr, ToDemote, Roots)) return; - } // The maximum bit width required to represent all the values that can be // demoted without loss of precision. It would be safe to truncate the roots @@ -4347,7 +4332,11 @@ // We start by looking at each entry that can be demoted. We compute the // maximum bit width required to store the scalar by using ValueTracking to // compute the number of high-order bits we can truncate. - if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) { + if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) && + llvm::all_of(TreeRoot, [](Value *R) { + assert(R->hasOneUse() && "Root should have only one use!"); + return isa(*R->user_back()); + })) { MaxBitWidth = 8u; // Determine if the sign bit of all the roots is known to be zero. If not, Index: test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -9,9 +9,9 @@ ; Make sure types of sub and its sources are not extended. ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i64> -; CHECK-NEXT: [[SUB0:%.*]] = sub nsw <4 x i64> [[Z0]], [[Z1]] +; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> +; CHECK-NEXT: [[SUB0:%.*]] = sub nsw <4 x i32> [[Z0]], [[Z1]] ; entry: %z0 = zext <4 x i16> %a to <4 x i32>