diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -191,6 +191,8 @@ AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2); } break; + case Instruction::UDiv: + case Instruction::SDiv: case Instruction::Mul: // Find the highest live output bit. We don't need any more input // bits than that (adds, and thus subtracts, ripple only to the diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll @@ -38,7 +38,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = tail call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[AND]]) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[PSRC_ADDR_0_LCSSA]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) -; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[SUM_0_LCSSA]]