Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -741,6 +741,20 @@ return GenericOpc; } +/// Helper function to select the opcode for a G_FCMP. +static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { + // If this is a compare against +0.0, then we don't have to explicitly + // materialize a constant. + const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); + bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); + unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); + if (OpSize != 32 && OpSize != 64) + return 0; + unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, + {AArch64::FCMPSri, AArch64::FCMPDri}}; + return CmpOpcTbl[ShouldUseImm][OpSize == 64]; +} + static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { switch (P) { default: @@ -1845,15 +1859,9 @@ return false; } - unsigned CmpOpc = 0; - LLT CmpTy = MRI.getType(I.getOperand(2).getReg()); - if (CmpTy == LLT::scalar(32)) { - CmpOpc = AArch64::FCMPSrr; - } else if (CmpTy == LLT::scalar(64)) { - CmpOpc = AArch64::FCMPDrr; - } else { + unsigned CmpOpc = selectFCMPOpc(I, MRI); + if (!CmpOpc) return false; - } // FIXME: regbank @@ -1861,9 +1869,16 @@ changeFCMPPredToAArch64CC( (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); - MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) - .addUse(I.getOperand(2).getReg()) - .addUse(I.getOperand(3).getReg()); + // Partially build the compare. Decide if we need to add a use for the + // third operand based off whether or not we're comparing against 0.0. + auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) + .addUse(I.getOperand(2).getReg()); + + // If we don't have an immediate compare, then we need to add a use of the + // register which wasn't used for the immediate. + // Note that the immediate will always be the last operand. + if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) + CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); const unsigned DefReg = I.getOperand(0).getReg(); unsigned Def1Reg = DefReg; @@ -1893,8 +1908,7 @@ constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); } - - constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI); + constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); I.eraseFromParent(); Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir @@ -0,0 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# Verify that we get FCMPSri when we compare against 0.0 and that we get +# FCMPSrr otherwise. + +... +--- +name: zero +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: zero + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $s0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: notzero +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: notzero + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FMOVSi:%[0-9]+]]:fpr32 = FMOVSi 112 + ; CHECK: FCMPSrr [[COPY]], [[FMOVSi]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $s0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 1.000000e+00 + %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0