Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1717,12 +1717,12 @@ EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - assert((isConstantOrConstantVector(NewCT) || + assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || isConstantFPBuildVectorOrConstantFP(NewCT)) && "Failed to constant fold a binop with constant operands"); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - assert((isConstantOrConstantVector(NewCF) || + assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || isConstantFPBuildVectorOrConstantFP(NewCF)) && "Failed to constant fold a binop with constant operands"); @@ -2417,6 +2417,9 @@ if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2482,9 +2485,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem; - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } @@ -2508,6 +2508,9 @@ N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2553,9 +2556,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem; - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } @@ -2575,6 +2575,9 @@ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -2629,9 +2632,6 @@ if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); - if (SDValue V = simplifyDivRem(N, DAG)) - return V; - return SDValue(); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3657,6 +3657,12 @@ if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); + // Division/remainder with a zero divisor is undefined behavior. + if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || + Opcode == ISD::SREM || Opcode == ISD::UREM) && + Cst2->isNullValue()) + return getUNDEF(VT); + std::pair Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) Index: llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll +++ llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll @@ -69,3 +69,81 @@ ret <4 x i32> %div } +; Make sure we handle undef before we try to fold constants from the select with the 0. +; These used to assert because we can't fold div/rem-by-0 into APInt. + +define i32 @sel_urem0(i1 %cond) { +; CHECK-LABEL: sel_urem0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %rem = urem i32 %sel, 0 + ret i32 %rem +} + +define i32 @sel_srem0(i1 %cond) { +; CHECK-LABEL: sel_srem0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %rem = srem i32 %sel, 0 + ret i32 %rem +} + +define i32 @sel_udiv0(i1 %cond) { +; CHECK-LABEL: sel_udiv0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %div = udiv i32 %sel, 0 + ret i32 %div +} + +define i32 @sel_sdiv0(i1 %cond) { +; CHECK-LABEL: sel_sdiv0: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, i32 23, i32 234 + %div = sdiv i32 %sel, 0 + ret i32 %div +} + +; Make sure we handle undef before we try to fold constants from the select with the vector 0. +; These used to assert because we can't fold div/rem-by-0 into APInt. + +define <4 x i32> @sel_urem0_vec(i1 %cond) { +; CHECK-LABEL: sel_urem0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %rem = urem <4 x i32> %sel, zeroinitializer + ret <4 x i32> %rem +} + +define <4 x i32> @sel_srem0_vec(i1 %cond) { +; CHECK-LABEL: sel_srem0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %rem = srem <4 x i32> %sel, zeroinitializer + ret <4 x i32> %rem +} + +define <4 x i32> @sel_udiv0_vec(i1 %cond) { +; CHECK-LABEL: sel_udiv0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %div = udiv <4 x i32> %sel, zeroinitializer + ret <4 x i32> %div +} + +define <4 x i32> @sel_sdiv0_vec(i1 %cond) { +; CHECK-LABEL: sel_sdiv0_vec: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %sel = select i1 %cond, <4 x i32> , <4 x i32> + %div = sdiv <4 x i32> %sel, zeroinitializer + ret <4 x i32> %div +} + Index: llvm/trunk/test/CodeGen/X86/pr30693.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr30693.ll +++ llvm/trunk/test/CodeGen/X86/pr30693.ll @@ -1,213 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; PR30693 -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -@var_35 = external local_unnamed_addr global i32, align 4 -@var_14 = external local_unnamed_addr global i16, align 2 - -; Function Attrs: uwtable -define void @_Z3foov() local_unnamed_addr #0 { -; CHECK-LABEL: _Z3foov: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: movslq {{.*}}(%rip), %rax -; CHECK-NEXT: movzwl {{.*}}(%rip), %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: jmp .LBB0_1 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # %for.cond.cleanup477.loopexit -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; CHECK-NEXT: vmovdqu %ymm2, (%rax) -; CHECK-NEXT: .LBB0_1: # %vector.ph1520 -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_3 Depth 2 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivq %rdi -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: # BB#2: # %vector.body1512.prol.loopexit -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 -; CHECK-NEXT: vpbroadcastw %xmm0, %xmm3 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %for.cond74.loopexit.us -; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vmovdqu %xmm0, (%rax) -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: # BB#4: # %for.cond337.preheader.lr.ph -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: vmovups %ymm1, (%rax) -; CHECK-NEXT: vmovdqu %ymm2, (%rax) -; CHECK-NEXT: vmovdqu %xmm3, (%rax) -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jg .LBB0_9 -; CHECK-NEXT: # BB#5: # %for.cond385.preheader -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jle .LBB0_10 -; CHECK-NEXT: # BB#6: # %for.cond399.preheader.lr.ph.us.1 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jle .LBB0_7 -; CHECK-NEXT: .LBB0_9: # %for.cond337.preheader.us.preheader -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: .LBB0_8: # %vector.body1512.prol.preheader -; CHECK-NEXT: imull %ecx, %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: imull %ecx, %esi -; CHECK-NEXT: imull %eax, %esi -; CHECK-NEXT: addl $36611, %esi # imm = 0x8F03 -; CHECK-NEXT: vmovd %esi, %xmm0 -; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: .LBB0_10: # %for.cond392.preheader.preheader -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -; CHECK-NEXT: vmovdqu %ymm0, (%rax) -entry: - %0 = load i32, i32* @var_35, align 4 - %1 = load i16, i16* @var_14, align 2 - %conv34 = zext i16 %1 to i64 - %conv37 = ashr exact i64 undef, 32 - %sub316 = add i16 undef, -7198 - %cmp339981 = icmp sgt i32 undef, 0 - %cmp401989 = icmp sgt i32 undef, 0 - %cmp443994 = icmp sgt i32 undef, 0 - %lcmp.mod = icmp eq i64 undef, 0 - %broadcast.splat1461 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer - %broadcast.splat1357 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer - %broadcast.splat1435 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer - %broadcast.splat1409 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer - br label %for.cond11.preheader - -for.cond11.preheader: ; preds = %for.cond.cleanup477.loopexit, %entry - %div = sdiv i32 0, 0 - %mul31 = mul nsw i32 %div, %0 - %conv32 = sext i32 %mul31 to i64 - %div40 = sdiv i64 0, 0 - %div41 = sdiv i32 0, 0 - %conv42 = sext i32 %div41 to i64 - %mul43 = mul nsw i64 %conv32, %conv34 - %mul44 = mul i64 %mul43, %div40 - %mul45 = mul i64 %mul44, %conv37 - %mul46 = mul i64 %mul45, %conv42 - %add48 = add nsw i64 %mul46, 36611 - %conv49 = trunc i64 %add48 to i16 - br label %vector.ph1520 - -vector.ph1520: ; preds = %for.cond11.preheader - %broadcast.splatinsert1531 = insertelement <32 x i16> undef, i16 %conv49, i32 0 - %broadcast.splat1532 = shufflevector <32 x i16> %broadcast.splatinsert1531, <32 x i16> undef, <32 x i32> zeroinitializer - br i1 %lcmp.mod, label %vector.body1512.prol.loopexit, label %vector.body1512.prol.preheader - -vector.body1512.prol.preheader: ; preds = %vector.ph1520 - store <32 x i16> %broadcast.splat1532, <32 x i16>* undef, align 8, !tbaa !1 - unreachable - -vector.body1512.prol.loopexit: ; preds = %vector.ph1520 - %add318 = add i16 %sub316, 0 - %2 = insertelement <16 x i16> undef, i16 %add318, i32 7 - %3 = insertelement <16 x i16> %2, i16 %add318, i32 8 - %4 = insertelement <16 x i16> %3, i16 %add318, i32 9 - %5 = insertelement <16 x i16> %4, i16 %add318, i32 10 - %6 = insertelement <16 x i16> %5, i16 %add318, i32 11 - %7 = insertelement <16 x i16> %6, i16 %add318, i32 12 - %8 = insertelement <16 x i16> %7, i16 %add318, i32 13 - %9 = insertelement <16 x i16> %8, i16 %add318, i32 14 - %10 = insertelement <16 x i16> undef, i16 %add318, i32 7 - %11 = insertelement <16 x i16> %10, i16 %add318, i32 8 - %12 = insertelement <16 x i16> %11, i16 %add318, i32 9 - %13 = insertelement <16 x i16> %12, i16 %add318, i32 10 - %14 = insertelement <16 x i16> %13, i16 %add318, i32 11 - %15 = insertelement <16 x i16> %14, i16 %add318, i32 12 - %16 = insertelement <16 x i16> %15, i16 %add318, i32 13 - %17 = insertelement <16 x i16> %16, i16 %add318, i32 14 - %18 = insertelement <16 x i16> %17, i16 %add318, i32 15 - %19 = insertelement <8 x i16> undef, i16 %add318, i32 7 - br label %for.cond74.loopexit.us - -for.cond337.preheader.lr.ph: ; preds = %for.cond130.preheader.loopexit - br i1 %cmp339981, label %for.cond337.preheader.us.preheader, label %for.cond.cleanup335 - -for.cond337.preheader.us.preheader: ; preds = %for.cond337.preheader.lr.ph - store <32 x i16> %broadcast.splat1461, <32 x i16>* undef, align 4, !tbaa !1 - unreachable - -for.cond74.loopexit.us: ; preds = %for.cond74.loopexit.us, %vector.body1512.prol.loopexit - store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2, !tbaa !1 - %cmp76.us = icmp slt i64 undef, undef - br i1 %cmp76.us, label %for.cond74.loopexit.us, label %for.cond130.preheader.loopexit - -for.cond130.preheader.loopexit: ; preds = %for.cond74.loopexit.us - store <16 x i16> zeroinitializer, <16 x i16>* undef, align 2, !tbaa !1 - store <16 x i16> %18, <16 x i16>* undef, align 2, !tbaa !1 - store <8 x i16> %19, <8 x i16>* undef, align 2, !tbaa !1 - br label %for.cond337.preheader.lr.ph - -for.cond.cleanup335: ; preds = %for.cond337.preheader.lr.ph - br label %for.cond380.preheader - -for.cond380.preheader: ; preds = %for.cond.cleanup335 - br label %for.cond385.preheader - -for.cond.cleanup378.loopexit: ; preds = %for.cond.cleanup388 - br label %for.cond481.preheader - -for.cond385.preheader: ; preds = %for.cond380.preheader - br i1 %cmp443994, label %for.cond392.preheader.us.preheader, label %for.cond392.preheader.preheader - -for.cond392.preheader.preheader: ; preds = %for.cond385.preheader - store <32 x i16> %broadcast.splat1435, <32 x i16>* undef, align 4, !tbaa !1 - store <32 x i16> %broadcast.splat1409, <32 x i16>* undef, align 4, !tbaa !1 - unreachable - -for.cond392.preheader.us.preheader: ; preds = %for.cond385.preheader - br label %for.cond399.preheader.lr.ph.us.1 - -for.cond.cleanup388: ; preds = %for.cond399.preheader.lr.ph.us.1 - br label %for.cond.cleanup378.loopexit - -for.cond481.preheader: ; preds = %for.cond.cleanup486, %for.cond.cleanup378.loopexit - br label %for.cond.cleanup486 - -for.cond.cleanup477.loopexit: ; preds = %for.cond.cleanup486 - store <8 x i32> , <8 x i32>* undef, align 4, !tbaa !5 - br label %for.cond11.preheader - -for.cond.cleanup486: ; preds = %for.cond481.preheader - br i1 undef, label %for.cond481.preheader, label %for.cond.cleanup477.loopexit - -for.cond399.preheader.lr.ph.us.1: ; preds = %for.cond392.preheader.us.preheader - br i1 %cmp401989, label %for.cond399.preheader.us.us.1.preheader, label %for.cond.cleanup388 - -for.cond399.preheader.us.us.1.preheader: ; preds = %for.cond399.preheader.lr.ph.us.1 - store <32 x i16> %broadcast.splat1357, <32 x i16>* undef, align 4, !tbaa !1 - unreachable -} - -attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.ident = !{!0} - -!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git ef66d4d58b9a2c6b3d31bbaf3ed2a70a9754a137) (http://llvm.org/git/llvm.git 5e661621191d6133a12effa103bfb2cbbdbb35ad)"} -!1 = !{!2, !2, i64 0} -!2 = !{!"short", !3, i64 0} -!3 = !{!"omnipotent char", !4, i64 0} -!4 = !{!"Simple C++ TBAA"} -!5 = !{!6, !6, i64 0} -!6 = !{!"int", !3, i64 0}