Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -11374,9 +11374,98 @@ return V; } +// Code size optimisation: rewrite __aeabi_dcmplt and __aeabi_dcmpgt (and +// also the less/greater equal variants) to __aeabi_cdcmple and +// __aeabi_cdrcmple, which avoids pulling in more helper functions from the +// math library during link time, and also cdcmple and cdrcmple are one and the +// same function but the latter just swaps the operands. The approach taken +// here is to fix up the isel dag after the initial lowering and softening to +// calls dcmplt and dcmpgt. The reasons is that the libcalls are generated +// during type legalization which is early in isel and the __aeabi_cdcmple +// functions are different than the other EABI libcalls, i.e. they don't have a +// return value but just update the status flags and this fixup is less +// intrusive. +static bool rewriteEABICmpLibcalls(SDNode *N, SelectionDAG &DAG) { + SDValue Cmp = N->getOperand(4); + SDValue CopyFromReg = Cmp->getOperand(0); + + if (CopyFromReg.getOpcode() != ISD::CopyFromReg) + return false; + + SDValue CSEnd = CopyFromReg->getOperand(0); + if (CSEnd.getOpcode() != ISD::CALLSEQ_END) + return false; + + SDValue Call = CSEnd.getOperand(0); + if (Call.getOpcode() != ARMISD::CALL) + return false; + + SDValue Symbol = Call.getOperand(1); + if (Symbol.getOpcode() != ISD::TargetExternalSymbol) + return false; + + SDValue NewSym; + ExternalSymbolSDNode *ESN = cast(Symbol); + ARMCC::CondCodes CC; + + StringRef SymbolName = ESN->getSymbol(); + StringRef NewSymbolName; + + if(SymbolName == "__aeabi_dcmpgt") { + NewSymbolName = "__aeabi_cdrcmple"; + CC = ARMCC::CondCodes::HS; + } else if(SymbolName == "__aeabi_dcmplt") { + NewSymbolName = "__aeabi_cdcmple"; + CC = ARMCC::CondCodes::HS; + } else if(SymbolName == "__aeabi_dcmple") { + NewSymbolName = "__aeabi_cdcmple"; + CC = ARMCC::CondCodes::HI; + } else if(SymbolName == "__aeabi_dcmpge") { + NewSymbolName = "__aeabi_cdrcmple"; + CC = ARMCC::CondCodes::HI; + } else + return false; + + // Because we are going to remove the CMP node from the DAG and replace + // the uses with the glue code, we are making sure there is only 1 use. + if (!Cmp.hasOneUse()) + return false; + + NewSym = DAG.getTargetExternalSymbol(NewSymbolName.begin(), MVT::isVoid); + + // A libcall has 8 operands, we keep all operands here, and only change + // the Symbol operand. As we are creating a new node that does not exist, + // function UpdateNodeOperands should mutate the operands in place. + ArrayRef< SDValue > Ops = { Call.getOperand(0), NewSym, Call.getOperand(2), + Call.getOperand(3), Call.getOperand(4), + Call.getOperand(5), Call.getOperand(6), + Call.getOperand(7)}; + DAG.UpdateNodeOperands(Call.getNode(), Ops); + + DAG.ReplaceAllUsesWith(Cmp, SDValue(Cmp.getOperand(0).getNode(), 2)); + + // From the Run-time ABI for the ARM Architecture: + // "The 3-way comparison functions c*cmple, c*cmpeq and c*rcmple return their + // results in the CPSR Z and C flags. C is clear only if the operands are + // ordered and the first operand is less than the second. Z is set only when + // the operands are ordered and equal." + // + // The conditional branch, the BRCOND node, branches to false case, so therefore + // for the GT and LT case, we have condition code HS which tests if C==1, and + // for the GE and LE cases, we use Hi because that tests (C==1) && (Z==0). + SDValue ARMcc = DAG.getConstant(CC, SDLoc(N), MVT::i32); + DAG.ReplaceAllUsesWith(N->getOperand(2), ARMcc); + DAG.RemoveDeadNode(Cmp.getNode()); + return true; +} + /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { + if (getSubtarget()->isTargetAEABI() && getSubtarget()->isAAPCS_ABI() && + rewriteEABICmpLibcalls(N, DAG)) + return SDValue(); + SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. Index: test/CodeGen/ARM/cdcmp.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/cdcmp.ll @@ -0,0 +1,114 @@ +; RUN: llc -mcpu=cortex-m3 -mtriple=thumb-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumb-windows %s -o - | FileCheck %s -check-prefix=WINDOWS + +define i32 @OLT() { +entry: +; CHECK-LABEL: OLT: +; CHECK: __aeabi_cdcmple +; WINDOWS-NOT: __aeabi_cdcmple + %retval = alloca i32, align 4 + %A = alloca double, align 8 + %B = alloca double, align 8 + store double 1.000000e+00, double* %A, align 8 + store double 1.000000e+01, double* %B, align 8 + %0 = load double, double* %A, align 8 + %1 = load double, double* %B, align 8 + %cmp = fcmp olt double %0, %1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 0, i32* %retval, align 4 + br label %return + +if.end: + store i32 1, i32* %retval, align 4 + br label %return + +return: + %2 = load i32, i32* %retval, align 4 + ret i32 %2 +} + +define i32 @OLE() { +entry: +; CHECK-LABEL: OLE: +; CHECK: __aeabi_cdcmple +; WINDOWS-NOT: __aeabi_cdcmple + %retval = alloca i32, align 4 + %A = alloca double, align 8 + %B = alloca double, align 8 + store double 1.000000e+00, double* %A, align 8 + store double 1.000000e+01, double* %B, align 8 + %0 = load double, double* %A, align 8 + %1 = load double, double* %B, align 8 + %cmp = fcmp ole double %0, %1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 0, i32* %retval, align 4 + br label %return + +if.end: + store i32 1, i32* %retval, align 4 + br label %return + +return: + %2 = load i32, i32* %retval, align 4 + ret i32 %2 +} + +define i32 @OGT() { +entry: +; CHECK-LABEL: OGT: +; CHECK: __aeabi_cdrcmple +; WINDOWS-NOT: __aeabi_cdrcmple + %retval = alloca i32, align 4 + %A = alloca double, align 8 + %B = alloca double, align 8 + store double 1.000000e+00, double* %A, align 8 + store double 1.000000e+01, double* %B, align 8 + %0 = load double, double* %A, align 8 + %1 = load double, double* %B, align 8 + %cmp = fcmp ogt double %0, %1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 0, i32* %retval, align 4 + br label %return + +if.end: + store i32 1, i32* %retval, align 4 + br label %return + +return: + %2 = load i32, i32* %retval, align 4 + ret i32 %2 +} + +define i32 @OGE() { +entry: +; CHECK-LABEL: OGE: +; CHECK: __aeabi_cdrcmple +; WINDOWS-NOT: __aeabi_cdrcmple + %retval = alloca i32, align 4 + %A = alloca double, align 8 + %B = alloca double, align 8 + store double 1.000000e+00, double* %A, align 8 + store double 1.000000e+01, double* %B, align 8 + %0 = load double, double* %A, align 8 + %1 = load double, double* %B, align 8 + %cmp = fcmp oge double %0, %1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 0, i32* %retval, align 4 + br label %return + +if.end: + store i32 1, i32* %retval, align 4 + br label %return + +return: + %2 = load i32, i32* %retval, align 4 + ret i32 %2 +}