diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -2356,6 +2356,19 @@ } } + /// Check if the provided node is save to speculatively executed given its + /// current arguments. So, while `udiv` the opcode is not safe to + /// speculatively execute, a given `udiv` node may be if the denominator is + /// known nonzero. + bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const { + switch (N->getOpcode()) { + case ISD::UDIV: + return isKnownNeverZero(N->getOperand(1)); + default: + return isSafeToSpeculativelyExecute(N->getOpcode()); + } + } + SDValue makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, SDValue InChain, const SDLoc &DLoc); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2434,11 +2434,12 @@ if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse()) return SDValue(); - // We can't hoist div/rem because of immediate UB (not speculatable). - unsigned Opcode = N->getOpcode(); - if (!DAG.isSafeToSpeculativelyExecute(Opcode)) + // We can't hoist all instructions because of immediate UB (not speculatable). + // For example div/rem by zero. + if (!DAG.isSafeToSpeculativelyExecuteNode(N)) return SDValue(); + unsigned Opcode = N->getOpcode(); EVT VT = N->getValueType(0); SDValue Cond = N1.getOperand(0); SDValue TVal = N1.getOperand(1); diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll @@ -1231,13 +1231,29 @@ } define @vdivu_vi_mask_nxv8i32( %va, %mask) { -; CHECK-LABEL: vdivu_vi_mask_nxv8i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmerge.vim v12, v12, 7, v0 -; CHECK-NEXT: vdivu.vv v8, v8, v12 -; CHECK-NEXT: ret +; RV32-LABEL: vdivu_vi_mask_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 149797 +; RV32-NEXT: addi a0, a0, -1755 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmulhu.vx v12, v8, a0 +; RV32-NEXT: vsub.vv v16, v8, v12 +; RV32-NEXT: vsrl.vi v16, v16, 1 +; RV32-NEXT: vadd.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v8, v12, 2, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vdivu_vi_mask_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 149797 +; RV64-NEXT: addiw a0, a0, -1755 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmulhu.vx v12, v8, a0 +; RV64-NEXT: vsub.vv v16, v8, v12 +; RV64-NEXT: vsrl.vi v16, v16, 1 +; RV64-NEXT: vadd.vv v12, v16, v12 +; RV64-NEXT: vsrl.vi v8, v12, 2, v0.t +; RV64-NEXT: ret %head1 = insertelement poison, i32 1, i32 0 %one = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 7, i32 0 diff --git a/llvm/test/CodeGen/X86/divrem-by-select.ll b/llvm/test/CodeGen/X86/divrem-by-select.ll --- a/llvm/test/CodeGen/X86/divrem-by-select.ll +++ b/llvm/test/CodeGen/X86/divrem-by-select.ll @@ -6,7 +6,8 @@ ; CHECK-X64-V3-LABEL: udiv_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -26,20 +27,16 @@ ; CHECK-X64-V4: # %bb.0: ; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0 ; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1 -; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] -; CHECK-X64-V4-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} -; CHECK-X64-V4-NEXT: vpextrq $1, %xmm0, %rcx -; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax -; CHECK-X64-V4-NEXT: xorl %edx, %edx -; CHECK-X64-V4-NEXT: divq %rcx -; CHECK-X64-V4-NEXT: movq %rax, %rcx -; CHECK-X64-V4-NEXT: vmovq %xmm0, %rsi -; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax -; CHECK-X64-V4-NEXT: xorl %edx, %edx -; CHECK-X64-V4-NEXT: divq %rsi +; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rdx +; CHECK-X64-V4-NEXT: movabsq $3353953467947191203, %rax # imm = 0x2E8BA2E8BA2E8BA3 +; CHECK-X64-V4-NEXT: mulxq %rax, %rcx, %rcx ; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0 -; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1 -; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-X64-V4-NEXT: vmovq %xmm1, %rdx +; CHECK-X64-V4-NEXT: mulxq %rax, %rax, %rax +; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2 +; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; CHECK-X64-V4-NEXT: vpsrlq $1, %xmm0, %xmm1 {%k1} +; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-X64-V4-NEXT: retq %d = select <2 x i1> %c, <2 x i64> , <2 x i64> %r = udiv <2 x i64> %x, %d @@ -51,7 +48,8 @@ ; CHECK-X64-V3-LABEL: udiv_identity_const_todo_getter_nonzero: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -101,7 +99,8 @@ ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 ; CHECK-X64-V3-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; CHECK-X64-V3-NEXT: vpsubq %xmm3, %xmm2, %xmm2 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm3 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm3 = [1,1] +; CHECK-X64-V3-NEXT: # xmm3 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -189,7 +188,8 @@ ; CHECK-X64-V3-LABEL: udiv_indentity_partial_zero: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -233,7 +233,8 @@ ; CHECK-X64-V3-LABEL: urem_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [11,11] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [11,11] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -277,7 +278,8 @@ ; CHECK-X64-V3-LABEL: sdiv_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -321,7 +323,8 @@ ; CHECK-X64-V3-LABEL: sdiv_identity_const_todo_better_nonzero: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -365,7 +368,8 @@ ; CHECK-X64-V3-LABEL: srem_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [11,11] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [11,11] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -409,7 +413,8 @@ ; CHECK-X64-V3-LABEL: udivrem_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax @@ -465,7 +470,8 @@ ; CHECK-X64-V3-LABEL: sdivrem_identity_const: ; CHECK-X64-V3: # %bb.0: ; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] +; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0] ; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax