diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2304,15 +2304,16 @@
   /// Some opcodes may create immediate undefined behavior when used with some
   /// values (integer division-by-zero for example). Therefore, these operations
   /// are not generally safe to move around or change.
-  bool isSafeToSpeculativelyExecute(unsigned Opcode) const {
-    switch (Opcode) {
+  bool isSafeToSpeculativelyExecute(const SDNode *N) const {
+    switch (N->getOpcode()) {
     case ISD::SDIV:
     case ISD::SREM:
     case ISD::SDIVREM:
-    case ISD::UDIV:
+      return false;
     case ISD::UREM:
     case ISD::UDIVREM:
-      return false;
+    case ISD::UDIV:
+      return isKnownNeverZero(N->getOperand(1));
     default:
       return true;
     }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2385,10 +2385,10 @@
     return SDValue();
 
   // We can't hoist div/rem because of immediate UB (not speculatable).
-  unsigned Opcode = N->getOpcode();
-  if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+  if (!DAG.isSafeToSpeculativelyExecute(N))
     return SDValue();
 
+  unsigned Opcode = N->getOpcode();
   EVT VT = N->getValueType(0);
   SDValue Cond = N1.getOperand(0);
   SDValue TVal = N1.getOperand(1);
@@ -25175,7 +25175,7 @@
       TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
       Scalar.getOperand(0).getValueType() == VecEltVT &&
       Scalar.getOperand(1).getValueType() == VecEltVT &&
-      DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
+      DAG.isSafeToSpeculativelyExecute(N) && hasOperation(Opcode, VT)) {
     // Match an extract element and get a shuffle mask equivalent.
     SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
 
@@ -25707,7 +25707,7 @@
   // same types of operations that are in the original sequence. We do have to
   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
   // though. This code is adapted from the identical transform in instcombine.
-  if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
+  if (DAG.isSafeToSpeculativelyExecute(N)) {
     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -1231,13 +1231,29 @@
 }
 
 define <vscale x 8 x i32> @vdivu_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %mask) {
-; CHECK-LABEL: vdivu_vi_mask_nxv8i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v12, 1
-; CHECK-NEXT:    vmerge.vim v12, v12, 7, v0
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
-; CHECK-NEXT:    ret
+; RV32-LABEL: vdivu_vi_mask_nxv8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 149797
+; RV32-NEXT:    addi a0, a0, -1755
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vmulhu.vx v12, v8, a0
+; RV32-NEXT:    vsub.vv v16, v8, v12
+; RV32-NEXT:    vsrl.vi v16, v16, 1
+; RV32-NEXT:    vadd.vv v12, v16, v12
+; RV32-NEXT:    vsrl.vi v8, v12, 2, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vdivu_vi_mask_nxv8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, 149797
+; RV64-NEXT:    addiw a0, a0, -1755
+; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vmulhu.vx v12, v8, a0
+; RV64-NEXT:    vsub.vv v16, v8, v12
+; RV64-NEXT:    vsrl.vi v16, v16, 1
+; RV64-NEXT:    vadd.vv v12, v16, v12
+; RV64-NEXT:    vsrl.vi v8, v12, 2, v0.t
+; RV64-NEXT:    ret
   %head1 = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
   %one = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
   %head2 = insertelement <vscale x 8 x i32> poison, i32 7, i32 0
diff --git a/llvm/test/CodeGen/X86/divrem-by-select.ll b/llvm/test/CodeGen/X86/divrem-by-select.ll
--- a/llvm/test/CodeGen/X86/divrem-by-select.ll
+++ b/llvm/test/CodeGen/X86/divrem-by-select.ll
@@ -26,20 +26,16 @@
 ; CHECK-X64-V4:       # %bb.0:
 ; CHECK-X64-V4-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; CHECK-X64-V4-NEXT:    vpmovq2m %xmm0, %k1
-; CHECK-X64-V4-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [1,1]
-; CHECK-X64-V4-NEXT:    vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
-; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm0, %rcx
-; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm1, %rax
-; CHECK-X64-V4-NEXT:    xorl %edx, %edx
-; CHECK-X64-V4-NEXT:    divq %rcx
-; CHECK-X64-V4-NEXT:    movq %rax, %rcx
-; CHECK-X64-V4-NEXT:    vmovq %xmm0, %rsi
-; CHECK-X64-V4-NEXT:    vmovq %xmm1, %rax
-; CHECK-X64-V4-NEXT:    xorl %edx, %edx
-; CHECK-X64-V4-NEXT:    divq %rsi
+; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm1, %rdx
+; CHECK-X64-V4-NEXT:    movabsq $3353953467947191203, %rax # imm = 0x2E8BA2E8BA2E8BA3
+; CHECK-X64-V4-NEXT:    mulxq %rax, %rcx, %rcx
 ; CHECK-X64-V4-NEXT:    vmovq %rcx, %xmm0
-; CHECK-X64-V4-NEXT:    vmovq %rax, %xmm1
-; CHECK-X64-V4-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-X64-V4-NEXT:    vmovq %xmm1, %rdx
+; CHECK-X64-V4-NEXT:    mulxq %rax, %rax, %rax
+; CHECK-X64-V4-NEXT:    vmovq %rax, %xmm2
+; CHECK-X64-V4-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; CHECK-X64-V4-NEXT:    vpsrlq $1, %xmm0, %xmm1 {%k1}
+; CHECK-X64-V4-NEXT:    vmovdqa %xmm1, %xmm0
 ; CHECK-X64-V4-NEXT:    retq
   %d = select <2 x i1> %c, <2 x i64> <i64 11, i64 11>, <2 x i64> <i64 1, i64 1>
   %r = udiv <2 x i64> %x, %d