diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp --- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -15,8 +15,11 @@ #include "PPCRegisterBankInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/Support/Debug.h" @@ -121,6 +124,32 @@ return true; } +static unsigned selectLoadStoreOp(unsigned GenericOpc, unsigned RegBankID, + unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (RegBankID) { + case PPC::GPRRegBankID: + switch (OpSize) { + case 64: + return IsStore ? PPC::STD : PPC::LD; + default: + llvm_unreachable("Unexpected size!"); + } + break; + case PPC::FPRRegBankID: + switch (OpSize) { + case 64: + return IsStore ? PPC::STFD : PPC::LFD; + default: + llvm_unreachable("Unexpected size!"); + } + break; + default: + llvm_unreachable("Unexpected register bank!"); + } + return GenericOpc; +} + bool PPCInstructionSelector::selectIntToFP(MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const { @@ -198,6 +227,43 @@ switch (Opcode) { default: return false; + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + GLoadStore &LdSt = cast(I); + LLT PtrTy = MRI.getType(LdSt.getPointerReg()); + + if (PtrTy != LLT::pointer(0, 64)) { + LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy + << ", expected: " << LLT::pointer(0, 64) << '\n'); + return false; + } + + auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { + const unsigned NewOpc = selectLoadStoreOp( + I.getOpcode(), RBI.getRegBank(LdSt.getReg(0), MRI, TRI)->getID(), + LdSt.getMemSizeInBits()); + + if (NewOpc == I.getOpcode()) + return nullptr; + + // For now, simply use DForm with load/store addr as base and 0 as imm. + // FIXME: optimize load/store with some specific address patterns. + I.setDesc(TII.get(NewOpc)); + Register AddrReg = I.getOperand(1).getReg(); + bool IsKill = I.getOperand(1).isKill(); + I.getOperand(1).ChangeToImmediate(0); + I.addOperand(*I.getParent()->getParent(), + MachineOperand::CreateReg(AddrReg, /* isDef */ false, + /* isImp */ false, IsKill)); + return &I; + }; + + MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); + if (!LoadStore) + return false; + + return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); + } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: return selectIntToFP(I, MBB, MRI); diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp --- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp @@ -19,6 +19,7 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { using namespace TargetOpcode; + const LLT P0 = LLT::pointer(0, 64); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); getActionDefinitionsBuilder(G_IMPLICIT_DEF).legalFor({S64}); @@ -41,5 +42,10 @@ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalForCartesianProduct({S32, S64}, {S64}); + // For now only handle 64 bit, we only support 64 bit integer and zext/sext is + // not ready. + getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .legalForTypesWithMemDesc({{S64, P0, S64, 8}}); + getLegacyLegalizerInfo().computeTables(); } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h @@ -69,6 +69,23 @@ InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override; + +private: + /// Maximum recursion depth for hasFPConstraints. + const unsigned MaxFPRSearchDepth = 2; + + /// \returns true if \p MI only uses and defines FPRs. + bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth = 0) const; + + /// \returns true if \p MI only uses FPRs. + bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + + /// \returns true if \p MI only defines FPRs. + bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -119,6 +119,37 @@ getValueMapping(PMI_GPR64)}); break; } + case TargetOpcode::G_LOAD: { + // Check if that load feeds fp instructions. + if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + // If we have at least one direct use in a FP instruction, + // assume this was a floating point load in the IR. If it was + // not, we would have had a bitcast before reaching that + // instruction. + // + // Int->FP conversion operations are also captured in + // onlyDefinesFP(). + return onlyUsesFP(UseMI, MRI, TRI); + })) + OperandsMapping = getOperandsMapping( + {getValueMapping(PMI_FPR64), getValueMapping(PMI_GPR64)}); + else + OperandsMapping = getOperandsMapping( + {getValueMapping(PMI_GPR64), getValueMapping(PMI_GPR64)}); + break; + } + case TargetOpcode::G_STORE: { + // Check if the store is fed by fp instructions. + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg()); + if (onlyDefinesFP(*DefMI, MRI, TRI)) + OperandsMapping = getOperandsMapping( + {getValueMapping(PMI_FPR64), getValueMapping(PMI_GPR64)}); + else + OperandsMapping = getOperandsMapping( + {getValueMapping(PMI_GPR64), getValueMapping(PMI_GPR64)}); + break; + } default: return getInvalidInstructionMapping(); } @@ -126,6 +157,128 @@ return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands); } +/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, +/// having only floating-point operands. +/// FIXME: this is copied from target AArch64. Needs some code refactor here to +/// put this function in GlobalISel/Utils.cpp. +static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FABS: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMINIMUM: + return true; + } + return false; +} + +/// \returns true if a given intrinsic \p ID only uses and defines FPRs. +static bool isFPIntrinsic(unsigned ID) { + // TODO: Add more intrinsics. + switch (ID) { + default: + return false; + } +} + +/// FIXME: this is copied from target AArch64. Needs some code refactor here to +/// put this function in class RegisterBankInfo. +bool PPCRegisterBankInfo::hasFPConstraints(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + unsigned Op = MI.getOpcode(); + if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID())) + return true; + + // Do we have an explicit floating point instruction? + if (isPreISelGenericFloatingPointOpcode(Op)) + return true; + + // No. Check if we have a copy-like instruction. If we do, then we could + // still be fed by floating point instructions. + if (Op != TargetOpcode::COPY && !MI.isPHI() && + !isPreISelGenericOptimizationHint(Op)) + return false; + + // Check if we already know the register bank. + auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); + if (RB == &PPC::FPRRegBank) + return true; + if (RB == &PPC::GPRRegBank) + return false; + + // We don't know anything. + // + // If we have a phi, we may be able to infer that it will be assigned a FPR + // based off of its inputs. + if (!MI.isPHI() || Depth > MaxFPRSearchDepth) + return false; + + return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { + return Op.isReg() && + onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); + }); +} + +/// FIXME: this is copied from target AArch64. Needs some code refactor here to +/// put this function in class RegisterBankInfo. +bool PPCRegisterBankInfo::onlyUsesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FCMP: + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + +/// FIXME: this is copied from target AArch64. Needs some code refactor here to +/// put this function in class RegisterBankInfo. +bool PPCRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + RegisterBankInfo::InstructionMappings PPCRegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { // TODO Implement. diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/load-store-64bit.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/load-store-64bit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/GlobalISel/load-store-64bit.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -global-isel -o - \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +define i64 @load_i64(ptr %p) { +; CHECK-LABEL: load_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: blr +entry: + %ret = load i64, ptr %p, align 8 + ret i64 %ret +} + +define i64 @load2_i64(ptr %p, i64 %a) { +; CHECK-LABEL: load2_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: blr +entry: + %load = load i64, ptr %p, align 8 + %ret = add i64 %load, %a + ret i64 %ret +} + +define float @load3_i64(ptr %p) { +; CHECK-LABEL: load3_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxdsp f1, f0 +; CHECK-NEXT: blr +entry: + %load = load i64, ptr %p, align 8 + %ret = sitofp i64 %load to float + ret float %ret +} + +define double @load4_i64(ptr %p) { +; CHECK-LABEL: load4_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxddp f1, f0 +; CHECK-NEXT: blr +entry: + %load = load i64, ptr %p, align 8 + %ret = sitofp i64 %load to double + ret double %ret +} + +define float @load5_i64(ptr %p) { +; CHECK-LABEL: load5_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxdsp f1, f0 +; CHECK-NEXT: blr +entry: + %load = load i64, ptr %p, align 8 + %ret = uitofp i64 %load to float + ret float %ret +} + +define double @load6_i64(ptr %p) { +; CHECK-LABEL: load6_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxddp f1, f0 +; CHECK-NEXT: blr +entry: + %load = load i64, ptr %p, align 8 + %ret = uitofp i64 %load to double + ret double %ret +} + +define double @load_f64(ptr %p) { +; CHECK-LABEL: load_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd f1, 0(r3) +; CHECK-NEXT: blr +entry: + %ret = load double, ptr %p, align 8 + ret double %ret +} + +define double @load2_f64(ptr %p, double %a) { +; CHECK-LABEL: load2_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: xsadddp f1, f0, f1 +; CHECK-NEXT: blr +entry: + %load = load double, ptr %p, align 8 + %ret = fadd double %load, %a + ret double %ret +} + +define i64 @load3_f64(ptr %p) { +; CHECK-LABEL: load3_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: xscvdpsxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %load = load double, ptr %p, align 8 + %ret = fptosi double %load to i64 + ret i64 %ret +} + +define i64 @load4_f64(ptr %p) { +; CHECK-LABEL: load4_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: xscvdpuxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %load = load double, ptr %p, align 8 + %ret = fptoui double %load to i64 + ret i64 %ret +} + +define void @store_i64(ptr %p) { +; CHECK-LABEL: store_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + store i64 100, ptr %p, align 8 + ret void +} + +define void @store2_i64(ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: store2_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add r4, r4, r5 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + %add = add i64 %a, %b + store i64 %add, ptr %p, align 8 + ret void +} + +define void @store3_i64(ptr %p, float %a) { +; CHECK-LABEL: store3_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r4, f0 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = fptosi float %a to i64 + store i64 %conv, ptr %p, align 8 + ret void +} + +define void @store4_i64(ptr %p, double %a) { +; CHECK-LABEL: store4_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r4, f0 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = fptosi double %a to i64 + store i64 %conv, ptr %p, align 8 + ret void +} + +define void @store5_i64(ptr %p, float %a) { +; CHECK-LABEL: store5_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r4, f0 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = fptoui float %a to i64 + store i64 %conv, ptr %p, align 8 + ret void +} + +define void @store6_i64(ptr %p, double %a) { +; CHECK-LABEL: store6_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r4, f0 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = fptoui double %a to i64 + store i64 %conv, ptr %p, align 8 + ret void +} + +define void @store_f64(ptr %p, double %a) { +; CHECK-LABEL: store_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stfd f1, 0(r3) +; CHECK-NEXT: blr +entry: + store double %a, ptr %p, align 8 + ret void +} + +define void @store2_f64(ptr %p, double %a, double %b) { +; CHECK-LABEL: store2_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsadddp f0, f1, f2 +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %fadd = fadd double %a, %b + store double %fadd, ptr %p, align 8 + ret void +} + +define void @store3_f64(ptr %p, i64 %a) { +; CHECK-LABEL: store3_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r4 +; CHECK-NEXT: xscvsxddp f0, f0 +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = sitofp i64 %a to double + store double %conv, ptr %p, align 8 + ret void +} + +define void @store4_f64(ptr %p, i64 %a) { +; CHECK-LABEL: store4_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r4 +; CHECK-NEXT: xscvuxddp f0, f0 +; CHECK-NEXT: stfd f0, 0(r3) +; CHECK-NEXT: blr +entry: + %conv = uitofp i64 %a to double + store double %conv, ptr %p, align 8 + ret void +}