Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -172,6 +172,7 @@ bool tryOptVectorShuffle(MachineInstr &I) const; bool tryOptVectorDup(MachineInstr &MI) const; + bool tryOptSelect(MachineInstr &MI) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; @@ -741,6 +742,19 @@ return GenericOpc; } +static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, + const RegisterBankInfo &RBI) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != + AArch64::GPRRegBankID); + LLT Ty = MRI.getType(I.getOperand(0).getReg()); + if (Ty == LLT::scalar(32)) + return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; + else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) + return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; + return 0; +} + /// Helper function to select the opcode for a G_FCMP. static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { // If this is a compare against +0.0, then we don't have to explicitly @@ -1774,16 +1788,11 @@ // select instead of an integer select. bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != AArch64::GPRRegBankID); - unsigned CSelOpc = 0; - if (Ty == LLT::scalar(32)) { - CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; - } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { - CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; - } else { - return false; - } + if (IsFP && tryOptSelect(I)) + return true; + unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); MachineInstr &TstMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) .addDef(AArch64::WZR) @@ -2810,6 +2819,69 @@ return &I; } +bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { + MachineIRBuilder MIB(I); + MachineRegisterInfo &MRI = *MIB.getMRI(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + + // First, check if the condition is defined by a compare. + MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); + while (CondDef) { + // We can only fold if all of the defs have one use. + if (!MRI.hasOneUse(CondDef->getOperand(0).getReg())) + return false; + + // We can skip over G_TRUNC since the condition is 1-bit. + // Truncating/extending can have no impact on the value. + unsigned Opc = CondDef->getOpcode(); + if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) + break; + + CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); + } + + // Is the condition defined by a compare? + // TODO: Handle G_ICMP. + if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP) + return false; + + // Get the condition code for the select. + AArch64CC::CondCode CondCode; + AArch64CC::CondCode CondCode2; + changeFCMPPredToAArch64CC( + (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, + CondCode2); + + // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two + // instructions to emit the comparison. + // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be + // unnecessary. + if (CondCode2 != AArch64CC::AL) + return false; + + // Make sure we'll be able to select the compare. + // Delete the def itself; we'll generate our own compare here. + unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); + if (!CmpOpc) + return false; + + // Emit a new compare. + auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); + if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) + Cmp.addUse(CondDef->getOperand(3).getReg()); + + // Emit the select. + unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); + auto CSel = + MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()}, + {I.getOperand(2).getReg(), I.getOperand(3).getReg()}) + .addImm(CondCode); + constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); + constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // Try to match a vector splat operation into a dup instruction. // We're looking for this pattern: Index: llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir @@ -0,0 +1,351 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Verify the following: +# +# - We can fold compares into selects. +# - This only happens when the result of the compare is only used by the select. +# +# Also verify that, for now: +# +# - We only support doing this with G_FCMP. +# - We only support condition flags that require a single instruction. +# + +... +--- +name: fcmp_more_than_one_user_no_fold +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1, $w1 + + ; CHECK-LABEL: name: fcmp_more_than_one_user_no_fold + ; CHECK: liveins: $s0, $s1, $w1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv + ; CHECK: $w1 = COPY [[CSINCWr]] + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %6(s1), %2, %1 + $w1 = COPY %5(s32) + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: using_icmp +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $w0 + + ; CHECK-LABEL: name: using_icmp + ; CHECK: liveins: $s0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:gpr(s32) = COPY $w0 + %1:fpr(s32) = COPY $s0 + %2:gpr(s32) = G_CONSTANT i32 0 + %5:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %6:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %6(s32) + %7:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %7(s1), %1, %5 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: foeq +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: foeq + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 0, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %6(s1), %2, %1 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fueq +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: fueq + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %6(s1), %2, %1 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fone +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: fone + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(one), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %6(s1), %1, %2 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fune +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: fune + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = COPY $s1 + %2:fpr(s32) = G_FCONSTANT float 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(une), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s32) = G_SELECT %6(s1), %1, %2 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: doeq +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: doeq + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv + ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 0, implicit $nzcv + ; CHECK: $d0 = COPY [[FCSELDrrr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s64) = G_SELECT %6(s1), %2, %1 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: dueq +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: dueq + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 1, implicit $nzcv + ; CHECK: $d0 = COPY [[FCSELDrrr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s64), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s64) = G_SELECT %6(s1), %2, %1 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: done +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: done + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv + ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv + ; CHECK: $d0 = COPY [[FCSELDrrr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(one), %0(s64), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s64) = G_SELECT %6(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: dune +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: dune + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv + ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv + ; CHECK: $d0 = COPY [[FCSELDrrr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 0.000000e+00 + %5:gpr(s32) = G_FCMP floatpred(une), %0(s64), %2 + %3:gpr(s1) = G_TRUNC %5(s32) + %6:fpr(s1) = COPY %3(s1) + %4:fpr(s64) = G_SELECT %6(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +...