Index: llvm/lib/Analysis/DemandedBits.cpp
===================================================================
--- llvm/lib/Analysis/DemandedBits.cpp
+++ llvm/lib/Analysis/DemandedBits.cpp
@@ -285,7 +285,14 @@
     AB = AOut;
     break;
   case Instruction::Trunc:
-    AB = AOut.zext(BitWidth);
+    // We cannot blindly say that the operands of Trunc have the same
+    // live bits as the Trunc instruction itself. To be conservative
+    // we can add a whitelist of instructions that are always truncation
+    // friendly.
+    if (auto *I = dyn_cast<Instruction>(Val))
+      if (I->isBitwiseLogicOp() || I->getOpcode() == Instruction::Add ||
+          I->getOpcode() == Instruction::Sub)
+        AB = AOut.zext(BitWidth);
     break;
   case Instruction::ZExt:
     AB = AOut.trunc(BitWidth);
Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-vectorize -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+@a = dso_local local_unnamed_addr global i64 0, align 8
+@b = dso_local local_unnamed_addr global i16 0, align 4
+
+define dso_local void @myFunc(i64 %d, i64 %e) {
+; CHECK:        pred.store.continue2:
+; CHECK-NEXT:      %{{[0-9]+}} = icmp ult <2 x i64> %broadcast.splat{{[0-9]*}}, %broadcast.splat{{[0-9]*}}
+for.body29.lr.ph:
+  br label %for.body29
+
+for.cond25.for.cond.cleanup28_crit_edge:          ; preds = %for.inc
+  ret void
+
+for.body29:                                       ; preds = %for.inc, %for.body29.lr.ph
+  %n.078 = phi i16 [ undef, %for.body29.lr.ph ], [ %add34, %for.inc ]
+  br i1 undef, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body29
+  %conv31 = zext i8 undef to i64
+  store i64 %conv31, i64* @a, align 8
+  %cmp.i = icmp ult i64 %e, %d
+  %.sroa.speculated = select i1 %cmp.i, i64 %d, i64 %e
+  %conv32 = trunc i64 %.sroa.speculated to i16
+  store i16 %conv32, i16* @b, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body29
+  %add34 = add nsw i16 %n.078, 2
+  %cmp27 = icmp slt i16 %add34, 16
+  br i1 %cmp27, label %for.body29, label %for.cond25.for.cond.cleanup28_crit_edge, !llvm.loop !6
+}
+
+!6 = distinct !{!6, !7}
+!7 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/avoid-truncate-remainder-operands.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-vectorize -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+@b = dso_local local_unnamed_addr global i8 0, align 4
+
+define dso_local void @myFunc(i64 %h) {
+; CHECK:      vector.body:
+; CHECK:          %{{[0-9]+}} = srem <2 x i64> %broadcast.splat{{[0-9]*}}, <i64 30181, i64 30181>
+; CHECK-NEXT:     %{{[0-9]+}} = trunc <2 x i64> %{{[0-9]+}} to <2 x i8>
+
+for.body19.lr.ph:
+  br label %for.body19
+
+for.cond16.for.cond.cleanup18_crit_edge:          ; preds = %for.inc
+  ret void
+
+for.body19:                                       ; preds = %for.inc, %for.body19.lr.ph
+  %o.075 = phi i32 [ undef, %for.body19.lr.ph ], [ %add, %for.inc ]
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body19
+  %rem = srem i64 %h, 30181
+  %conv20 = trunc i64 %rem to i8
+  store i8 %conv20, i8* undef, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body19
+  br i1 undef, label %for.inc, label %if.then27
+
+if.then27:                                        ; preds = %if.end
+  br i1 undef, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %if.then27
+  %conv37 = sext i8 undef to i64
+  br label %cond.end
+
+cond.false:                                       ; preds = %if.then27
+  %0 = load i64, i64* undef, align 8
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  store i8 undef, i8* @b, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %cond.end, %if.end
+  %add = add nuw nsw i32 %o.075, 2
+  %cmp17 = icmp ult i32 %add, 15
+  br i1 %cmp17, label %for.body19, label %for.cond16.for.cond.cleanup18_crit_edge, !llvm.loop !0
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/avoid-truncate-shift-operands.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-vectorize -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+@a = dso_local local_unnamed_addr global i8 0, align 4
+
+define dso_local void @myFunc(i32 %e, i32 %f) {
+; CHECK:      vector.body:
+; CHECK:         %{{[0-9]+}} = lshr <2 x i32> %broadcast.splat{{[0-9]*}}, <i32 18, i32 18>
+; CHECK-NEXT:    %{{[0-9]+}} = trunc <2 x i32> %{{[0-9]+}} to <2 x i8>
+
+for.cond1.preheader:
+  %conv3 = and i32 %e, 255
+  br label %for.body6.lr.ph
+
+for.body6.lr.ph:                                  ; preds = %for.cond1.preheader
+  br label %for.body6
+
+for.cond1.for.cond.cleanup5_crit_edge:            ; preds = %for.inc
+  ret void
+
+for.body6:                                        ; preds = %for.inc, %for.body6.lr.ph
+  %h.018 = phi i8 [ 0, %for.body6.lr.ph ], [ %add, %for.inc ]
+  br i1 undef, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body6
+  %0 = lshr i32 %f, 18
+  %conv7 = trunc i32 %0 to i8
+  store i8 %conv7, i8* @a, align 4
+  store i64 0, i64* undef, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body6
+  %add = add i8 %h.018, 3
+  %conv = zext i8 %add to i32
+  %cmp4 = icmp ugt i32 %conv3, %conv
+  br i1 %cmp4, label %for.body6, label %for.cond1.for.cond.cleanup5_crit_edge, !llvm.loop !5
+}
+
+!5 = distinct !{!5, !6, !7}
+!6 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
+!7 = !{!"llvm.loop.vectorize.enable", i1 true}