Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1006,9 +1006,10 @@ SelectionDAG &DAG) const; /// Emit nodes that will be selected as "cmp Op0,Op1", or something - /// equivalent, for use with the given x86 condition code. - SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, - SelectionDAG &DAG) const; + /// equivalent, possibly adjusting the condition code to a more appropriate + /// x86 form. + SDValue EmitCmp(SDValue Op0, SDValue Op1, ISD::CondCode CC, SDLoc dl, + SelectionDAG &DAG, unsigned &X86CC) const; /// Convert a comparison if required by the subtarget. SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -3504,6 +3504,24 @@ llvm_unreachable("covered switch fell through?!"); } +/// Convert an integer condition code to an x86 one in the most straightforward +/// way possible, with no optimisation attempt. +static unsigned getSimpleX86IntCC(ISD::CondCode SetCCOpcode) { + switch (SetCCOpcode) { + default: llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: return X86::COND_E; + case ISD::SETGT: return X86::COND_G; + case ISD::SETGE: return X86::COND_GE; + case ISD::SETLT: return X86::COND_L; + case ISD::SETLE: return X86::COND_LE; + case ISD::SETNE: return X86::COND_NE; + case ISD::SETULT: return X86::COND_B; + case ISD::SETUGT: return X86::COND_A; + case ISD::SETULE: return X86::COND_BE; + case ISD::SETUGE: return X86::COND_AE; + } +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -3527,19 +3545,7 @@ } } - switch (SetCCOpcode) { - default: llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: return X86::COND_E; - case ISD::SETGT: return X86::COND_G; - case ISD::SETGE: return X86::COND_GE; - case ISD::SETLT: return X86::COND_L; - case ISD::SETLE: return X86::COND_LE; - case ISD::SETNE: return X86::COND_NE; - case ISD::SETULT: return X86::COND_B; - case ISD::SETUGT: return X86::COND_A; - case ISD::SETULE: return X86::COND_BE; - case ISD::SETUGE: return X86::COND_AE; - } + return getSimpleX86IntCC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. @@ -10374,10 +10380,77 @@ return SDValue(New.getNode(), 1); } -/// Emit nodes that will be selected as "cmp Op0,Op1", or something -/// equivalent. -SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SDLoc dl, SelectionDAG &DAG) const { +static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT T = Op.getSimpleValueType(); + SDLoc DL(Op); + unsigned Reg = 0; + unsigned size = 0; + switch(T.SimpleTy) { + default: llvm_unreachable("Invalid value type!"); + case MVT::i8: Reg = X86::AL; size = 1; break; + case MVT::i16: Reg = X86::AX; size = 2; break; + case MVT::i32: Reg = X86::EAX; size = 4; break; + case MVT::i64: + assert(Subtarget->is64Bit() && "Node not type legal!"); + Reg = X86::RAX; size = 8; + break; + } + SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, + Op.getOperand(2), SDValue()); + SDValue Ops[] = { cpIn.getValue(0), + Op.getOperand(1), + Op.getOperand(3), + DAG.getTargetConstant(size, MVT::i8), + cpIn.getValue(1) }; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, + Ops, T, MMO); + SDValue cpOut = + DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); + return cpOut; +} + + +/// Emit nodes that will be selected as "cmp Op0,Op1", or something equivalent +/// and set X86CC to the needed condition code to make use of the +/// comparison. This may not be the obvious equivalent to CC if the comparison +/// can be achieved more efficiently using a different method. +SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, ISD::CondCode CC, + SDLoc dl, SelectionDAG &DAG, + unsigned &X86CC) const { + // A cmpxchg instruction will actually perform the comparison + if ((Op0.getOpcode() == ISD::ATOMIC_CMP_SWAP && Op0.getOperand(2) == Op1) || + (Op1.getOpcode() == ISD::ATOMIC_CMP_SWAP && Op1.getOperand(2) == Op0)) { + SDValue CmpSwap; + if (Op0.getOpcode() == ISD::ATOMIC_CMP_SWAP) { + // x86's cmpxchg instruction performs the equivalent of "cmp desired, + // loaded". + CmpSwap = Op0; + CC = getSetCCSwappedOperands(CC); + } else + CmpSwap = Op1; + + // Lowering the CmpSwap node gives us a getCopyFromReg SDValue representing + // the value loaded. I.e. the ATOMIC_CMP_SWAP + X86CC = getSimpleX86IntCC(CC); + SDValue EFLAGS = LowerCMP_SWAP(CmpSwap, Subtarget, DAG); + DAG.ReplaceAllUsesOfValueWith(CmpSwap.getValue(0), EFLAGS); + + // Glue on a copy from EFLAGS to be used in place of the required Cmp. + EFLAGS = DAG.getCopyFromReg(EFLAGS.getValue(1), dl, X86::EFLAGS, + MVT::i32, EFLAGS.getValue(2)); + DAG.ReplaceAllUsesOfValueWith(CmpSwap.getValue(1), EFLAGS.getValue(1)); + + return EFLAGS; + } + + bool IsFP = Op1.getSimpleValueType().isFloatingPoint(); + X86CC = TranslateX86CC(CC, IsFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); + if (ConstantSDNode *C = dyn_cast(Op1)) { if (C->getAPIntValue() == 0) return EmitTest(Op0, X86CC, dl, DAG); @@ -10385,7 +10458,7 @@ if (Op0.getValueType() == MVT::i1) llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); } - + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { // Do the comparison at i32 if it's smaller, besides the Atom case. @@ -10951,12 +11024,11 @@ return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC); } - bool isFP = Op1.getSimpleValueType().isFloatingPoint(); - unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - if (X86CC == X86::COND_INVALID) + unsigned X86CC; + SDValue EFLAGS = EmitCmp(Op0, Op1, CC, dl, DAG, X86CC); + if (!EFLAGS.getNode()) return SDValue(); - SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), EFLAGS); @@ -11384,6 +11456,14 @@ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { Cond = Cmp; addTest = false; + } else if (Opc == ISD::CopyFromReg && + Cmp->getOperand(0)->getOpcode() == ISD::CopyFromReg && + Cmp->getOperand(0)->getOperand(0)->getOpcode() == + X86ISD::LCMPXCHG_DAG) { + assert(cast(Cmp->getOperand(1))->getReg() == X86::EFLAGS); + Chain = Cmp.getValue(1); + Cond = Cmp; + addTest = false; } else { switch (cast(CC)->getZExtValue()) { default: break; @@ -14363,38 +14443,6 @@ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } -static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - MVT T = Op.getSimpleValueType(); - SDLoc DL(Op); - unsigned Reg = 0; - unsigned size = 0; - switch(T.SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: Reg = X86::AL; size = 1; break; - case MVT::i16: Reg = X86::AX; size = 2; break; - case MVT::i32: Reg = X86::EAX; size = 4; break; - case MVT::i64: - assert(Subtarget->is64Bit() && "Node not type legal!"); - Reg = X86::RAX; size = 8; - break; - } - SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, - Op.getOperand(2), SDValue()); - SDValue Ops[] = { cpIn.getValue(0), - Op.getOperand(1), - Op.getOperand(3), - DAG.getTargetConstant(size, MVT::i8), - cpIn.getValue(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - MachineMemOperand *MMO = cast(Op)->getMemOperand(); - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, T, MMO); - SDValue cpOut = - DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); - return cpOut; -} - static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); Index: test/CodeGen/X86/cmpxchg-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/cmpxchg-i1.ll @@ -0,0 +1,82 @@ +; RUN: llc -mtriple=x86_64 -o - %s | FileCheck %s + +define i1 @try_cmpxchg(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: try_cmpxchg: +; CHECK: cmpxchgl +; CHECK-NOT: cmp +; CHECK: sete %al +; CHECK: retq + %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %success = icmp eq i32 %old, %desired + ret i1 %success +} + +define void @cmpxchg_flow(i64* %addr, i64 %desired, i64 %new) { +; CHECK-LABEL: cmpxchg_flow: +; CHECK: cmpxchgq +; CHECK-NOT: cmp +; CHECK-NOT: set +; CHECK: {{jne|jeq}} + %old = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst seq_cst + %success = icmp eq i64 %old, %desired + br i1 %success, label %true, label %false + +true: + call void @foo() + ret void + +false: + call void @bar() + ret void +} + +define i1 @cmpxchg_arithcmp(i16* %addr, i16 %desired, i16 %new) { +; CHECK-LABEL: cmpxchg_arithcmp: +; CHECK: cmpxchgw +; CHECK-NOT: cmp +; CHECK: setbe %al +; CHECK: retq + %old = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst + %success = icmp uge i16 %old, %desired + ret i1 %success +} + +define i1 @cmpxchg_arithcmp_swapped(i8* %addr, i8 %desired, i8 %new) { +; CHECK-LABEL: cmpxchg_arithcmp_swapped: +; CHECK: cmpxchgb +; CHECK-NOT: cmp +; CHECK: setge %al +; CHECK: retq + %old = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst seq_cst + %success = icmp sge i8 %desired, %old + ret i1 %success +} + +define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: cmpxchg_sext: +; CHECK-DAG: cmpxchgl +; CHECK-DAG: movq $-1, %rax +; CHECK-DAG: xorl %e[[ZERO:[a-z0-9]+]], %e[[ZERO]] +; CHECK-NOT: cmpl +; CHECK: cmovneq %r[[ZERO]], %rax +; CHECK: retq + %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %success = icmp eq i32 %old, %desired + %mask = sext i1 %success to i64 + ret i64 %mask +} + +define i32 @cmpxchg_zext(i32* %addr, i32 %desired, i32 %new) { +; CHECK-LABEL: cmpxchg_zext: +; CHECK: cmpxchgl +; CHECK-NOT: cmp +; CHECK: sete [[BYTE:%[a-z0-9]+]] +; CHECK: movzbl [[BYTE]], %eax + %old = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %success = icmp eq i32 %old, %desired + %mask = zext i1 %success to i32 + ret i32 %mask +} + +declare void @foo() +declare void @bar()