Index: llvm/lib/Analysis/DemandedBits.cpp =================================================================== --- llvm/lib/Analysis/DemandedBits.cpp +++ llvm/lib/Analysis/DemandedBits.cpp @@ -285,7 +285,14 @@ AB = AOut; break; case Instruction::Trunc: - AB = AOut.zext(BitWidth); + // We cannot blindly say that the operands of Trunc have the same + // live bits as the Trunc instruction itself. To be conservative + // we can add a whitelist of instructions that are always truncation + // friendly. + if (auto *I = dyn_cast(Val)) + if (I->isBitwiseLogicOp() || I->getOpcode() == Instruction::Add || + I->getOpcode() == Instruction::Sub) + AB = AOut.zext(BitWidth); break; case Instruction::ZExt: AB = AOut.trunc(BitWidth); Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll @@ -0,0 +1,37 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = dso_local local_unnamed_addr global i64 0, align 8 +@b = dso_local local_unnamed_addr global i16 0, align 4 + +define dso_local void @myFunc(i64 %d, i64 %e) { +; CHECK: pred.store.continue2: +; CHECK-NEXT: %{{[0-9]+}} = icmp ult <2 x i64> %broadcast.splat{{[0-9]*}}, %broadcast.splat{{[0-9]*}} +for.body29.lr.ph: + br label %for.body29 + +for.cond25.for.cond.cleanup28_crit_edge: ; preds = %for.inc + ret void + +for.body29: ; preds = %for.inc, %for.body29.lr.ph + %n.078 = phi i16 [ undef, %for.body29.lr.ph ], [ %add34, %for.inc ] + br i1 undef, label %for.inc, label %if.then + +if.then: ; preds = %for.body29 + %conv31 = zext i8 undef to i64 + store i64 %conv31, i64* @a, align 8 + %cmp.i = icmp ult i64 %e, %d + %.sroa.speculated = select i1 %cmp.i, i64 %d, i64 %e + %conv32 = trunc i64 %.sroa.speculated to i16 + store i16 %conv32, i16* @b, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body29 + %add34 = add nsw i16 %n.078, 2 + %cmp27 = icmp slt i16 %add34, 16 + br i1 %cmp27, label %for.body29, label %for.cond25.for.cond.cleanup28_crit_edge, !llvm.loop !6 +} + +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@b = dso_local local_unnamed_addr global i8 0, align 4 + +define dso_local void @myFunc(i64 %h) { +; CHECK: vector.body: +; CHECK: %{{[0-9]+}} = srem <2 x i64> %broadcast.splat{{[0-9]*}}, +; CHECK-NEXT: %{{[0-9]+}} = trunc <2 x i64> %{{[0-9]+}} to <2 x i8> + +for.body19.lr.ph: + br label %for.body19 + +for.cond16.for.cond.cleanup18_crit_edge: ; preds = %for.inc + ret void + +for.body19: ; preds = %for.inc, %for.body19.lr.ph + %o.075 = phi i32 [ undef, %for.body19.lr.ph ], [ %add, %for.inc ] + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %for.body19 + %rem = srem i64 %h, 30181 + %conv20 = trunc i64 %rem to i8 + store i8 %conv20, i8* undef, align 1 + br label %if.end + +if.end: ; preds = %if.then, %for.body19 + br i1 undef, label %for.inc, label %if.then27 + +if.then27: ; preds = %if.end + br i1 undef, label %cond.false, label %cond.true + +cond.true: ; preds = %if.then27 + %conv37 = sext i8 undef to i64 + br label %cond.end + +cond.false: ; preds = %if.then27 + %0 = load i64, i64* undef, align 8 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + store i8 undef, i8* @b, align 4 + br label %for.inc + +for.inc: ; preds = %cond.end, %if.end + %add = add nuw nsw i32 %o.075, 2 + %cmp17 = icmp ult i32 %add, 15 + br i1 %cmp17, label %for.body19, label %for.cond16.for.cond.cleanup18_crit_edge, !llvm.loop !0 +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll @@ -0,0 +1,42 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = dso_local local_unnamed_addr global i8 0, align 4 + +define dso_local void @myFunc(i32 %e, i32 %f) { +; CHECK: vector.body: +; CHECK: %{{[0-9]+}} = lshr <2 x i32> %broadcast.splat{{[0-9]*}}, +; CHECK-NEXT: %{{[0-9]+}} = trunc <2 x i32> %{{[0-9]+}} to <2 x i8> + +for.cond1.preheader: + %conv3 = and i32 %e, 255 + br label %for.body6.lr.ph + +for.body6.lr.ph: ; preds = %for.cond1.preheader + br label %for.body6 + +for.cond1.for.cond.cleanup5_crit_edge: ; preds = %for.inc + ret void + +for.body6: ; preds = %for.inc, %for.body6.lr.ph + %h.018 = phi i8 [ 0, %for.body6.lr.ph ], [ %add, %for.inc ] + br i1 undef, label %for.inc, label %if.then + +if.then: ; preds = %for.body6 + %0 = lshr i32 %f, 18 + %conv7 = trunc i32 %0 to i8 + store i8 %conv7, i8* @a, align 4 + store i64 0, i64* undef, align 8 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body6 + %add = add i8 %h.018, 3 + %conv = zext i8 %add to i32 + %cmp4 = icmp ugt i32 %conv3, %conv + br i1 %cmp4, label %for.body6, label %for.cond1.for.cond.cleanup5_crit_edge, !llvm.loop !5 +} + +!5 = distinct !{!5, !6, !7} +!6 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!7 = !{!"llvm.loop.vectorize.enable", i1 true}