Index: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h @@ -740,6 +740,9 @@ return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops); } + /// Return true if the result of this operation is always undefined. + bool isUndef(unsigned Opcode, ArrayRef Ops); + /// Return an UNDEF node. UNDEF does not have a useful SDLoc. SDValue getUNDEF(EVT VT) { return getNode(ISD::UNDEF, SDLoc(), VT); Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2524,15 +2524,7 @@ EVT VT = N->getValueType(0); SDLoc DL(N); - // X / undef -> undef - // X % undef -> undef - if (N1.isUndef()) - return N1; - - // X / 0 --> undef - // X % 0 --> undef - // We don't need to preserve faults! - if (isNullConstantOrNullSplatConstant(N1)) + if (DAG.isUndef(N->getOpcode(), {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3695,12 +3695,6 @@ if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); - // Division/remainder with a zero divisor is undefined behavior. - if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || - Opcode == ISD::SREM || Opcode == ISD::UREM) && - Cst2->isNullValue()) - return getUNDEF(VT); - std::pair Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) @@ -3728,6 +3722,30 @@ GA->getOffset() + uint64_t(Offset)); } +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef Ops) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + // If a divisor is zero/undef or any element of a divisor vector is + // zero/undef, the whole op is undef. + assert(Ops.size() == 2 && "Div/rem should have 2 operands"); + SDValue Divisor = Ops[1]; + if (Divisor.isUndef() || isNullConstant(Divisor)) + return true; + + return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && + any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + // TODO: Handle signed overflow. + } + // TODO: Handle oversized shifts. + default: + return false; + } +} + SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { @@ -3737,6 +3755,9 @@ if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + return getUNDEF(VT); + // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast(Cst2)) { @@ -3804,6 +3825,9 @@ if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, Ops)) + return getUNDEF(VT); + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? if (!VT.isVector()) return SDValue(); Index: llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll +++ llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll @@ -152,7 +152,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: sdiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %zero = and <4 x i32> %x, %some_ones = or <4 x i32> %zero, @@ -163,7 +162,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: udiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %div = udiv <4 x i32> , ret <4 x i32> %div @@ -172,7 +170,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: urem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %zero = and <4 x i32> %x, %some_ones = or <4 x i32> %zero, @@ -183,8 +180,6 @@ define <4 x i32> @srem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: srem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movl $-2, %eax -; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retq %rem = srem <4 x i32> , ret <4 x i32> %rem Index: llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll +++ llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -184,27 +184,15 @@ ret <16 x i16> %a0 } -; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit? +; Div-by-0 in any lane is UB. define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { ; SSE-LABEL: sdiv_non_splat: ; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $31, %ecx -; SSE-NEXT: addl %eax, %ecx -; SSE-NEXT: sarl %ecx -; SSE-NEXT: movd %ecx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sdiv_non_splat: ; AVX: # BB#0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $31, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: retq %y = sdiv <4 x i32> %x, ret <4 x i32> %y