Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -30732,12 +30732,7 @@ if (!CmpLHS.hasOneUse()) return SDValue(); - auto *CmpRHSC = dyn_cast(CmpRHS); - if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) - return SDValue(); - - const unsigned Opc = CmpLHS.getOpcode(); - + unsigned Opc = CmpLHS.getOpcode(); if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB) return SDValue(); @@ -30750,6 +30745,35 @@ if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; + auto *CmpRHSC = dyn_cast(CmpRHS); + if (!CmpRHSC) + return SDValue(); + + APInt Comparison = CmpRHSC->getAPIntValue(); + + // If the addend is the negation of the comparison value, then we can do + // a full comparison by emitting the atomic arithmetic is a locked sub. + if (Comparison == -Addend) { + // The CC is fine, but we need to rewrite the LHS of the comparison as an + // atomic sub. + auto *AN = cast(CmpLHS.getNode()); + auto AtomicSub = DAG.getAtomic( + ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpLHS.getValueType(), + /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1), + /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()), + AN->getMemOperand()); + auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG); + DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), + DAG.getUNDEF(CmpLHS.getValueType())); + DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); + return LockOp; + } + + // We can handle comparisons with zero in a number of cases by manipulating + // the CC used. + if (!Comparison.isNullValue()) + return SDValue(); + if (CC == X86::COND_S && Addend == 1) CC = X86::COND_LE; else if (CC == X86::COND_NS && Addend == 1) Index: llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll +++ llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -192,4 +192,90 @@ ret i8 %s2 } +define i8 @test_sub_1_cmp_1_setcc_eq(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_cmp_1_setcc_eq: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp eq i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_sub_1_cmp_1_setcc_ne(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_cmp_1_setcc_ne: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp ne i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_sub_1_cmp_1_setcc_ugt(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_cmp_1_setcc_ugt: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: seta %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp ugt i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +; FIXME: This test canonicalizes in a way that hides the fact that the +; comparison can be folded into the atomic subtract. +define i8 @test_sub_1_cmp_1_setcc_sle(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_cmp_1_setcc_sle: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: cmpq $2, %rax +; CHECK-NEXT: setl %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp sle i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_sub_3_cmp_3_setcc_eq(i64* %p) #0 { +; CHECK-LABEL: test_sub_3_cmp_3_setcc_eq: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock subq $3, (%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst + %tmp1 = icmp eq i64 %tmp0, 3 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +; FIXME: This test canonicalizes in a way that hides the fact that the +; comparison can be folded into the atomic subtract. +define i8 @test_sub_3_cmp_3_setcc_uge(i64* %p) #0 { +; CHECK-LABEL: test_sub_3_cmp_3_setcc_uge: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq $-3, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: cmpq $2, %rax +; CHECK-NEXT: seta %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst + %tmp1 = icmp uge i64 %tmp0, 3 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + attributes #0 = { nounwind }