Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -530,9 +530,34 @@ continue; Visited.insert(Val); - // Non-instructions terminate a chain successfully. - if (!isa(Val)) + // There are some cases where Trunc's live bitwidth + // gets erroneously popagated to its operands which + // are linked to Arguments or ConstantInts. To be + // conservative, we need to take the size of Argument + // or ConstantInt into consideration when computing + // Leader's DemandedBits. + if (!isa(Val)) { + uint64_t BitSize = 0; + if (const auto *CI = dyn_cast(Val)) { + APInt CIValue = CI->getValue(); + if (CI->isNegative()) + BitSize = CIValue.getMinSignedBits(); + else + BitSize = CIValue.getActiveBits(); + } else { + BitSize = Val->getType()->getScalarSizeInBits(); + } + if (BitSize == 0) + continue; + // Bail out for same reason below. + if (BitSize > 64) + return MapVector(); + uint64_t V = APInt::getAllOnesValue(BitSize).getZExtValue(); + DBits[Leader] |= V; + LLVM_DEBUG(dbgs() << "\t Value's dbits: " << Twine::utohexstr(V) << '\n'); continue; + } + Instruction *I = cast(Val); // If we encounter a type that is larger than 64 bits, we can't represent Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll @@ -0,0 +1,37 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = dso_local local_unnamed_addr global i64 0, align 8 +@b = dso_local local_unnamed_addr global i16 0, align 4 + +define dso_local void @myFunc(i64 %d, i64 %e) { +; CHECK: pred.store.continue2: +; CHECK-NEXT: %{{[0-9]+}} = icmp ult <2 x i64> %broadcast.splat{{[0-9]*}}, %broadcast.splat{{[0-9]*}} +for.body29.lr.ph: + br label %for.body29 + +for.cond25.for.cond.cleanup28_crit_edge: ; preds = %for.inc + ret void + +for.body29: ; preds = %for.inc, %for.body29.lr.ph + %n.078 = phi i16 [ undef, %for.body29.lr.ph ], [ %add34, %for.inc ] + br i1 undef, label %for.inc, label %if.then + +if.then: ; preds = %for.body29 + %conv31 = zext i8 undef to i64 + store i64 %conv31, i64* @a, align 8 + %cmp.i = icmp ult i64 %e, %d + %.sroa.speculated = select i1 %cmp.i, i64 %d, i64 %e + %conv32 = trunc i64 %.sroa.speculated to i16 + store i16 %conv32, i16* @b, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body29 + %add34 = add nsw i16 %n.078, 2 + %cmp27 = icmp slt i16 %add34, 16 + br i1 %cmp27, label %for.body29, label %for.cond25.for.cond.cleanup28_crit_edge, !llvm.loop !6 +} + +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@b = dso_local local_unnamed_addr global i8 0, align 4 + +define dso_local void @myFunc(i64 %h) { +; CHECK: vector.body: +; CHECK: %{{[0-9]+}} = srem <2 x i64> %broadcast.splat{{[0-9]*}}, +; CHECK-NEXT: %{{[0-9]+}} = trunc <2 x i64> %{{[0-9]+}} to <2 x i8> + +for.body19.lr.ph: + br label %for.body19 + +for.cond16.for.cond.cleanup18_crit_edge: ; preds = %for.inc + ret void + +for.body19: ; preds = %for.inc, %for.body19.lr.ph + %o.075 = phi i32 [ undef, %for.body19.lr.ph ], [ %add, %for.inc ] + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %for.body19 + %rem = srem i64 %h, 30181 + %conv20 = trunc i64 %rem to i8 + store i8 %conv20, i8* undef, align 1 + br label %if.end + +if.end: ; preds = %if.then, %for.body19 + br i1 undef, label %for.inc, label %if.then27 + +if.then27: ; preds = %if.end + br i1 undef, label %cond.false, label %cond.true + +cond.true: ; preds = %if.then27 + %conv37 = sext i8 undef to i64 + br label %cond.end + +cond.false: ; preds = %if.then27 + %0 = load i64, i64* undef, align 8 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + store i8 undef, i8* @b, align 4 + br label %for.inc + +for.inc: ; preds = %cond.end, %if.end + %add = add nuw nsw i32 %o.075, 2 + %cmp17 = icmp ult i32 %add, 15 + br i1 %cmp17, label %for.body19, label %for.cond16.for.cond.cleanup18_crit_edge, !llvm.loop !0 +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll @@ -0,0 +1,42 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = dso_local local_unnamed_addr global i8 0, align 4 + +define dso_local void @myFunc(i32 %e, i32 %f) { +; CHECK: vector.body: +; CHECK: %{{[0-9]+}} = lshr <2 x i32> %broadcast.splat{{[0-9]*}}, +; CHECK-NEXT: %{{[0-9]+}} = trunc <2 x i32> %{{[0-9]+}} to <2 x i8> + +for.cond1.preheader: + %conv3 = and i32 %e, 255 + br label %for.body6.lr.ph + +for.body6.lr.ph: ; preds = %for.cond1.preheader + br label %for.body6 + +for.cond1.for.cond.cleanup5_crit_edge: ; preds = %for.inc + ret void + +for.body6: ; preds = %for.inc, %for.body6.lr.ph + %h.018 = phi i8 [ 0, %for.body6.lr.ph ], [ %add, %for.inc ] + br i1 undef, label %for.inc, label %if.then + +if.then: ; preds = %for.body6 + %0 = lshr i32 %f, 18 + %conv7 = trunc i32 %0 to i8 + store i8 %conv7, i8* @a, align 4 + store i64 0, i64* undef, align 8 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body6 + %add = add i8 %h.018, 3 + %conv = zext i8 %add to i32 + %cmp4 = icmp ugt i32 %conv3, %conv + br i1 %cmp4, label %for.body6, label %for.cond1.for.cond.cleanup5_crit_edge, !llvm.loop !5 +} + +!5 = distinct !{!5, !6, !7} +!6 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!7 = !{!"llvm.loop.vectorize.enable", i1 true}