Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1717,12 +1717,12 @@ EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - assert((isConstantOrConstantVector(NewCT) || + assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || isConstantFPBuildVectorOrConstantFP(NewCT)) && "Failed to constant fold a binop with constant operands"); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - assert((isConstantOrConstantVector(NewCF) || + assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || isConstantFPBuildVectorOrConstantFP(NewCF)) && "Failed to constant fold a binop with constant operands"); @@ -2417,6 +2417,9 @@ if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2482,9 +2485,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem; - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } @@ -2508,6 +2508,9 @@ N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2553,9 +2556,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem; - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } @@ -2575,6 +2575,9 @@ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2629,9 +2632,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3657,6 +3657,12 @@ if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); + // Division/remainder with a zero divisor is undefined behavior. + if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || + Opcode == ISD::SREM || Opcode == ISD::UREM) && + Cst2->isNullValue()) + return getUNDEF(VT); + std::pair Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) Index: test/CodeGen/X86/div-rem-simplify.ll =================================================================== --- test/CodeGen/X86/div-rem-simplify.ll +++ test/CodeGen/X86/div-rem-simplify.ll @@ -69,3 +69,81 @@ ret <4 x i32> %div } +; Make sure we handle undef before we try to fold constants from the select with the 0. +; These used to assert because we can't fold div/rem-by-0 into APInt. + +define i32 @sel_urem0(i1 %cond) { +; CHECK-LABEL: sel_urem0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %rem = urem i32 %sel, 0 + ret i32 %rem +} + +define i32 @sel_srem0(i1 %cond) { +; CHECK-LABEL: sel_srem0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %rem = srem i32 %sel, 0 + ret i32 %rem +} + +define i32 @sel_udiv0(i1 %cond) { +; CHECK-LABEL: sel_udiv0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %div = udiv i32 %sel, 0 + ret i32 %div +} + +define i32 @sel_sdiv0(i1 %cond) { +; CHECK-LABEL: sel_sdiv0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %div = sdiv i32 %sel, 0 + ret i32 %div +} + +; Make sure we handle undef before we try to fold constants from the select with the vector 0. +; These used to assert because we can't fold div/rem-by-0 into APInt. + +define <4 x i32> @sel_urem0_vec(i1 %cond) { +; CHECK-LABEL: sel_urem0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %rem = urem <4 x i32> %sel, zeroinitializer + ret <4 x i32> %rem +} + +define <4 x i32> @sel_srem0_vec(i1 %cond) { +; CHECK-LABEL: sel_srem0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %rem = srem <4 x i32> %sel, zeroinitializer + ret <4 x i32> %rem +} + +define <4 x i32> @sel_udiv0_vec(i1 %cond) { +; CHECK-LABEL: sel_udiv0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %div = udiv <4 x i32> %sel, zeroinitializer + ret <4 x i32> %div +} + +define <4 x i32> @sel_sdiv0_vec(i1 %cond) { +; CHECK-LABEL: sel_sdiv0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %div = sdiv <4 x i32> %sel, zeroinitializer + ret <4 x i32> %div +} + Index: test/CodeGen/X86/pr30693.ll =================================================================== --- test/CodeGen/X86/pr30693.ll +++ test/CodeGen/X86/pr30693.ll @@ -11,70 +11,51 @@ define void @_Z3foov() local_unnamed_addr #0 { ; CHECK-LABEL: _Z3foov: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movslq {{.*}}(%rip), %rax -; CHECK-NEXT: movzwl {{.*}}(%rip), %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611,36611] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_7: # %for.cond.cleanup477.loopexit -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; CHECK-NEXT: vmovdqu %ymm2, (%rax) -; CHECK-NEXT: .LBB0_1: # %vector.ph1520 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: vmovups %ymm3, (%rax) +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: .LBB0_2: # %vector.body1512.prol.loopexit ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_3 Depth 2 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivq %rdi -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: # BB#2: # %vector.body1512.prol.loopexit -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 -; CHECK-NEXT: vpbroadcastw %xmm0, %xmm3 +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm4 +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm5 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %for.cond74.loopexit.us -; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vmovdqu %xmm0, (%rax) -; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: vmovups %xmm1, (%rax) +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # BB#4: # %for.cond337.preheader.lr.ph -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vmovups %ymm1, (%rax) -; CHECK-NEXT: vmovdqu %ymm2, (%rax) -; CHECK-NEXT: vmovdqu %xmm3, (%rax) +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: vmovups %ymm2, (%rax) +; CHECK-NEXT: vmovdqu %ymm4, (%rax) +; CHECK-NEXT: vmovdqu %xmm5, (%rax) ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jg .LBB0_9 ; CHECK-NEXT: # BB#5: # %for.cond385.preheader -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jle .LBB0_10 -; CHECK-NEXT: # BB#6: # %for.cond399.preheader.lr.ph.us.1 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: # BB#8: # %for.cond399.preheader.lr.ph.us.1 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jle .LBB0_7 -; CHECK-NEXT: .LBB0_9: # %for.cond337.preheader.us.preheader -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: .LBB0_8: # %vector.body1512.prol.preheader -; CHECK-NEXT: imull %ecx, %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: imull %ecx, %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: addl $36611, %esi # imm = 0x8F03 -; CHECK-NEXT: vmovd %esi, %xmm0 -; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 +; CHECK-NEXT: .LBB0_9: # %vector.body1512.prol.preheader ; CHECK-NEXT: vmovdqu %ymm0, (%rax) ; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: .LBB0_10: # %for.cond392.preheader.preheader +; CHECK-NEXT: .LBB0_6: # %for.cond392.preheader.preheader ; CHECK-NEXT: vmovdqu %ymm0, (%rax) ; CHECK-NEXT: vmovdqu %ymm0, (%rax) ; CHECK-NEXT: vmovdqu %ymm0, (%rax)