Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -19383,13 +19383,26 @@ bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); - // Special case: Use min/max operations for unsigned compares. We only want - // to do this for unsigned compares if we need to flip signs or if it allows - // use to avoid an invert. + // Special case: Use min/max operations for unsigned compares. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ISD::isUnsignedIntSetCC(Cond) && (FlipSigns || ISD::isTrueWhenEqual(Cond)) && TLI.isOperationLegal(ISD::UMIN, VT)) { + // If we have a constant operand, increment/decrement it and change the + // condition to avoid an invert. + // TODO: This could be extended to handle a non-splat constant by checking + // that each element of the constant is not the max/null value. + APInt C; + if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) { + // X > C --> X >= (C+1) --> X == umax(X, C+1) + Op1 = DAG.getConstant(C + 1, dl, VT); + Cond = ISD::SETUGE; + } + if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) { + // X < C --> X <= (C-1) --> X == umin(X, C-1) + Op1 = DAG.getConstant(C - 1, dl, VT); + Cond = ISD::SETULE; + } bool Invert = false; unsigned Opc; switch (Cond) { Index: llvm/trunk/test/CodeGen/X86/sat-add.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sat-add.ll +++ llvm/trunk/test/CodeGen/X86/sat-add.ll @@ -526,11 +526,9 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] ; SSE41-NEXT: paddd %xmm0, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253] -; SSE41-NEXT: pminud %xmm0, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967254,4294967254,4294967254,4294967254] +; SSE41-NEXT: pmaxud %xmm0, %xmm2 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq %a = add <4 x i32> %x, Index: llvm/trunk/test/CodeGen/X86/vec_minmax_match.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_minmax_match.ll +++ llvm/trunk/test/CodeGen/X86/vec_minmax_match.ll @@ -223,12 +223,11 @@ ; CHECK-LABEL: wrong_pred_for_smin_with_not: ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291] -; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm1 +; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] +; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq %not_x = xor <4 x i32> %x, %cmp = icmp ugt <4 x i32> %x, Index: llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll +++ llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll @@ -32,17 +32,15 @@ ; SSE41-NEXT: je LBB0_3 ; SSE41-NEXT: ## %bb.1: ## %for.body.preheader ; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26,26,26,26,26] -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] ; SSE41-NEXT: .p2align 4, 0x90 ; SSE41-NEXT: LBB0_2: ## %for.body ; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 -; SSE41-NEXT: movdqa (%rdi,%rax), %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: pmaxuw %xmm0, %xmm3 -; SSE41-NEXT: pcmpeqw %xmm2, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm3, (%rsi,%rax) +; SSE41-NEXT: movdqa (%rdi,%rax), %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: pminuw %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, (%rsi,%rax) ; SSE41-NEXT: addq $16, %rax ; SSE41-NEXT: decl %edx ; SSE41-NEXT: jne LBB0_2 @@ -146,11 +144,9 @@ define <16 x i8> @test_ult_byte(<16 x i8> %a) { ; CHECK-LABEL: test_ult_byte: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] -; CHECK-NEXT: pmaxub %xmm0, %xmm1 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; CHECK-NEXT: pminub %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %icmp = icmp ult <16 x i8> %a, @@ -187,11 +183,9 @@ define <16 x i1> @ugt_v16i8_splat(<16 x i8> %x) { ; CHECK-LABEL: ugt_v16i8_splat: ; CHECK: ## %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] -; CHECK-NEXT: pminub %xmm0, %xmm1 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43] +; CHECK-NEXT: pmaxub %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ugt <16 x i8> %x, ret <16 x i1> %cmp @@ -206,11 +200,9 @@ ; ; SSE41-LABEL: ugt_v8i16_splat: ; SSE41: ## %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] -; SSE41-NEXT: pminuw %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] +; SSE41-NEXT: pmaxuw %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %cmp = icmp ugt <8 x i16> %x, ret <8 x i1> %cmp @@ -225,11 +217,9 @@ ; ; SSE41-LABEL: ugt_v4i32_splat: ; SSE41: ## %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] -; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967255,4294967255,4294967255,4294967255] +; SSE41-NEXT: pmaxud %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %cmp = icmp ugt <4 x i32> %x, ret <4 x i1> %cmp @@ -341,11 +331,9 @@ define <16 x i1> @ult_v16i8_splat(<16 x i8> %x) { ; CHECK-LABEL: ult_v16i8_splat: ; CHECK: ## %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] -; CHECK-NEXT: pmaxub %xmm0, %xmm1 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41] +; CHECK-NEXT: pminub %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ult <16 x i8> %x, ret <16 x i1> %cmp @@ -361,11 +349,9 @@ ; ; SSE41-LABEL: ult_v8i16_splat: ; SSE41: ## %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] -; SSE41-NEXT: pmaxuw %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [241,241,241,241,241,241,241,241] +; SSE41-NEXT: pminuw %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %cmp = icmp ult <8 x i16> %x, ret <8 x i1> %cmp @@ -382,11 +368,9 @@ ; ; SSE41-LABEL: ult_v4i32_splat: ; SSE41: ## %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] -; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253] +; SSE41-NEXT: pminud %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %cmp = icmp ult <4 x i32> %x, ret <4 x i1> %cmp @@ -494,6 +478,30 @@ ret <2 x i1> %cmp } +; This should be simplified before we reach lowering, but +; make sure that we are not getting it wrong by underflowing. + +define <4 x i1> @ult_v4i32_splat_0_simplify(<4 x i32> %x) { +; CHECK-LABEL: ult_v4i32_splat_0_simplify: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ult <4 x i32> %x, + ret <4 x i1> %cmp +} + +; This should be simplified before we reach lowering, but +; make sure that we are not getting it wrong by overflowing. + +define <4 x i1> @ugt_v4i32_splat_maxval_simplify(<4 x i32> %x) { +; CHECK-LABEL: ugt_v4i32_splat_maxval_simplify: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ugt <4 x i32> %x, + ret <4 x i1> %cmp +} + define <4 x i1> @ugt_v4i32_nonsplat(<4 x i32> %x) { ; SSE2-LABEL: ugt_v4i32_nonsplat: ; SSE2: ## %bb.0: @@ -524,11 +532,9 @@ ; ; SSE41-LABEL: ugt_v4i32_splat_commute: ; SSE41: ## %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4] -; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3,3,3,3] +; SSE41-NEXT: pminud %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %cmp = icmp ugt <4 x i32> , %x ret <4 x i1> %cmp @@ -549,11 +555,9 @@ ; SSE41-LABEL: PR39859: ; SSE41: ## %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [42,42,42,42,42,42,42,42] -; SSE41-NEXT: pminuw %xmm0, %xmm3 -; SSE41-NEXT: pcmpeqw %xmm0, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [43,43,43,43,43,43,43,43] +; SSE41-NEXT: pmaxuw %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq