Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -58,6 +58,11 @@ SBCS, ANDS, + // Conditional compares. Operands: left,right,falsecc,cc,flags + CCMP, + CCMN, + FCCMP, + // Floating point comparison FCMP, @@ -494,6 +499,8 @@ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + bool shouldNormalizeToSelectSequence(LLVMContext&, EVT) const override; }; namespace AArch64 { Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -75,6 +75,9 @@ cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +/// Value type used for condition codes. +static const MVT MVT_CC = MVT::i32; + AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -764,6 +767,8 @@ case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; + case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; @@ -1086,7 +1091,7 @@ EVT VT = LHS.getValueType(); if (VT.isFloatingPoint()) - return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::FCMP, dl, MVT_CC, LHS, RHS); // The CMP instruction is just an alias for SUBS, and representing it as // SUBS means that it's possible to get CSE with subtract operations. @@ -1120,10 +1125,130 @@ LHS = LHS.getOperand(0); } - return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) .getValue(1); } +static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue CCOp, + SDValue Condition, unsigned NZCV, + SDLoc DL, SelectionDAG &DAG) { + unsigned Opcode; + if (LHS.getValueType().isFloatingPoint()) + Opcode = AArch64ISD::FCCMP; + else if (RHS.getOpcode() == ISD::SUB && + isa(RHS.getOperand(0)) && + cast(RHS.getOperand(0))->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // See emitComparison on why we can only do this for SETEQ and SETNE + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } else + Opcode = AArch64ISD::CCMP; + + SDValue NZCVOp = DAG.getConstant(NZCV, MVT::i32); + return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); +} + +/// Returns true if @p Val is a tree of AND/OR/SETCC operations. +static bool isConjunctionDisjunctionTree(const SDValue Val) { + if (!Val.hasOneUse()) + return false; + if (Val->getOpcode() == ISD::SETCC) + return true; + if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) { + SDValue O0 = Val->getOperand(0); + SDValue O1 = Val->getOperand(1); + return isConjunctionDisjunctionTree(O0) && isConjunctionDisjunctionTree(O1); + } + return false; +} + +/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain +/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and +/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0 +/// and setting flags to inversed(cond_1) otherwise. +/// This recursive function produces DAG nodes that produce condition flags +/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC) +/// by testing the result for the condition set to @p OutCC. If @p Negate is +/// set the opposite truth value is produced. If @p CCOp and @p Condition are +/// given then conditional comparison are created so that false is reported +/// when they are false. +static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, + AArch64CC::CondCode &OutCC, + bool Negate, + SDValue CCOp = SDValue(), + AArch64CC::CondCode Condition = + AArch64CC::AL) { + assert(isConjunctionDisjunctionTree(Val)); + // We're at a tree leaf, produce a c?f?cmp. + unsigned Opcode = Val->getOpcode(); + if (Opcode == ISD::SETCC) { + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + ISD::CondCode CC = cast(Val->getOperand(2))->get(); + bool isInteger = LHS.getValueType().isInteger(); + if (Negate) + CC = getSetCCInverse(CC, isInteger); + SDLoc DL(Val); + // Determine OutCC and handle FP special case. + if (isInteger) + OutCC = changeIntCCToAArch64CC(CC); + else { + assert(LHS.getValueType().isFloatingPoint()); + AArch64CC::CondCode ExtraCC; + changeFPCCToAArch64CC(CC, OutCC, ExtraCC); + // Surpisingly some floatingpoint conditions can't be tested with a single + // condition code. Construct an additional comparison in this case. + // See comment below on how we deal with OR conditions. + if (ExtraCC != AArch64CC::AL) { + SDValue ExtraCmp; + if (!CCOp.getNode()) + ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); + else { + SDValue ConditionOp = DAG.getConstant(Condition, MVT_CC); + // Note that we want the inverse of ExtraCC, so NZCV is not inversed. + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); + ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, + NZCV, DL, DAG); + } + CCOp = ExtraCmp; + Condition = AArch64CC::getInvertedCondCode(ExtraCC); + OutCC = AArch64CC::getInvertedCondCode(OutCC); + } + } + + // Produce a normal comparison if we are first in the chain + if (!CCOp.getNode()) + return emitComparison(LHS, RHS, CC, DL, DAG); + // Otherwise produce a ccmp. + SDValue ConditionOp = DAG.getConstant(Condition, MVT_CC); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); + return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, + DAG); + } + + // Construct comparison sequence for the left hand side. + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + + // We can only implement AND-like behaviour here, but negation is free. So we + // use (not (and (not x) (not y))) to implement (or x y). + bool isOr = Val->getOpcode() == ISD::OR; + assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR."); + Negate ^= isOr; + + AArch64CC::CondCode LHSCC; + SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, LHSCC, isOr, CCOp, + Condition); + SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, OutCC, isOr, CmpL, + LHSCC); + if (Negate) + OutCC = AArch64CC::getInvertedCondCode(OutCC); + return CmpR; +} + static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { SDValue Cmp; @@ -1219,9 +1344,18 @@ } } } - Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS) && + cast(RHS)->isNullValue() && + isConjunctionDisjunctionTree(LHS)) { + bool Negate = CC == ISD::SETEQ; + Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate); + } else { + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + } + + AArch64cc = DAG.getConstant(AArch64CC, MVT_CC); return Cmp; } @@ -8873,3 +9007,8 @@ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } + +bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, + EVT) const { + return false; +} Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -525,6 +525,13 @@ let ParserMatchClass = Imm0_31Operand; } +// True if the 32-bit immediate is in the range [0,31] +def imm32_0_31 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, ImmLeaf; +}]> { + let ParserMatchClass = Imm0_15Operand; +} // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -2062,9 +2071,12 @@ //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, +class BaseCondComparisonImm + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2084,19 +2096,13 @@ let Inst{3-0} = nzcv; } -multiclass CondSetFlagsImm { - def Wi : BaseCondSetFlagsImm { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm { - let Inst{31} = 1; - } -} - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseCondComparisonReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2116,11 +2122,19 @@ let Inst{3-0} = nzcv; } -multiclass CondSetFlagsReg { - def Wr : BaseCondSetFlagsReg { +multiclass CondComparison { + // immediate operand variants + def Wi : BaseCondComparisonImm { let Inst{31} = 0; } - def Xr : BaseCondSetFlagsReg { + def Xi : BaseCondComparisonImm { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg { + let Inst{31} = 0; + } + def Xr : BaseCondComparisonReg { let Inst{31} = 1; } } @@ -3923,11 +3937,14 @@ //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseFPCondComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + bits<5> Rn; bits<5> Rm; bits<4> nzcv; @@ -3943,16 +3960,18 @@ let Inst{3-0} = nzcv; } -multiclass FPCondComparison { - let Defs = [NZCV], Uses = [NZCV] in { - def Srr : BaseFPCondComparison { +multiclass FPCondComparison { + def Srr : BaseFPCondComparison { let Inst{22} = 0; } - - def Drr : BaseFPCondComparison { + def Drr : BaseFPCondComparison { let Inst{22} = 1; } - } // Defs = [NZCV], Uses = [NZCV] } //--- Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -64,6 +64,20 @@ SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; @@ -158,6 +172,10 @@ def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; @@ -948,13 +966,10 @@ def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Conditionally set flags instructions. +// Conditionally compare instructions. //===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; //===----------------------------------------------------------------------===// // Conditional select instructions. @@ -2463,7 +2478,7 @@ //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -287,3 +287,43 @@ %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16 br label %sw.bb.i.i } + +; CHECK-LABEL: select_and +define i64 @select_and(i32 %v1, i32 %v2, i64 %a, i64 %b) { +; CHECK: cmp +; CHECK: ccmp{{.*}}, #4, lt +; CHECK: csel{{.*}}, ne + %1 = icmp slt i32 %v1, %v2 + %2 = icmp ne i32 5, %v2 + %3 = and i1 %1, %2 + %sel = select i1 %3, i64 %a, i64 %b + ret i64 %sel +} + +; CHECK-LABEL: select_or +define i64 @select_or(i32 %v1, i32 %v2, i64 %a, i64 %b) { +; CHECK: cmp +; CHECK: ccmp{{.*}}, #0, ge +; CHECK: csel{{.*}}, ne + %1 = icmp slt i32 %v1, %v2 + %2 = icmp ne i32 5, %v2 + %3 = or i1 %1, %2 + %sel = select i1 %3, i64 %a, i64 %b + ret i64 %sel +} + +; CHECK-LABEL: select_complicated +define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { +; CHECK: fcmp +; CHECK: fccmp{{.*}}, #4, vc +; CHECK: fccmp{{.*}}, #4, ne +; CHECK: fccmp{{.*}}, #4, ne +; CEHCK: csel{{.*}}, eq + %1 = fcmp one double %v1, %v2 + %2 = fcmp oeq double %v2, 13.0 + %3 = fcmp oeq double %v1, 42.0 + %or0 = or i1 %2, %3 + %or1 = or i1 %1, %or0 + %sel = select i1 %or1, i16 %a, i16 %b + ret i16 %sel +}