diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -681,21 +681,27 @@ // FIXME: Should be derived from the scheduling model. if (OpRegBankIdx[0] != PMI_FirstGPR) Cost = 2; - else - // Check if that load feeds fp instructions. - // In that case, we want the default mapping to be on FPR - // instead of blind map every scalar to GPR. - for (const MachineInstr &UseMI : - MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { - // If we have at least one direct use in a FP instruction, - // assume this was a floating point load in the IR. - // If it was not, we would have had a bitcast before - // reaching that instruction. - if (onlyUsesFP(UseMI, MRI, TRI)) { + else { + // Check if the MMO on the load indicates the IR type is an FP type. + const auto &MMO = **MI.memoperands_begin(); + const Value *LdVal = MMO.getValue(); + if (LdVal) { + PointerType *PTy = cast(LdVal->getType()); + if (PTy->getElementType()->isFPOrFPVectorTy()) { OpRegBankIdx[0] = PMI_FirstFPR; break; } } + } + // If we're going to assign GPR, have a second attempt to check for FP + // users, in case one of them is a COPY that writes to an FPR register. + for (const MachineInstr &UseMI : + MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { + if (onlyUsesFP(UseMI, MRI, TRI)) { + OpRegBankIdx[0] = PMI_FirstFPR; + break; + } + } break; case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + @var_fp = global float 0.0 + @var_int = global i32 0 + + define float @fp_load_phi() { ret float undef } + define i32 @int_load_phi() { ret i32 undef } + +... +--- +name: fp_load_phi +legalized: true +regBankSelected: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fp_load_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var_fp + ; CHECK: %fp_load:fpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var_fp) + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s32), %bb.0, [[PHI]](s32), %bb.1 + ; CHECK: G_BRCOND [[COPY]](s32), %bb.1 + ; CHECK: bb.2: + ; CHECK: $s0 = COPY [[PHI]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + ; Here we're checking that the load is assigned an FPR bank, since it's + ; loading from an fp type in the IR. + bb.0: + liveins: $w0 + successors: %bb.1 + %0:_(s32) = COPY $w0 + %1:_(p0) = G_GLOBAL_VALUE @var_fp + %fp_load:_(s32) = G_LOAD %1 :: (load 4 from @var_fp) + + bb.1: + successors: %bb.1, %bb.2 + %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1 + G_BRCOND %0, %bb.1 + + bb.2: + $s0 = COPY %2 + RET_ReallyLR implicit $s0 +... + +--- +name: int_load_phi +legalized: true +regBankSelected: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: int_load_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var_fp + ; CHECK: %fp_load:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var_int) + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:gpr(s32) = PHI %fp_load(s32), %bb.0, [[PHI]](s32), %bb.1 + ; CHECK: G_BRCOND [[COPY]](s32), %bb.1 + ; CHECK: bb.2: + ; CHECK: $s0 = COPY [[PHI]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + bb.0: + liveins: $w0 + successors: %bb.1 + %0:_(s32) = COPY $w0 + %1:_(p0) = G_GLOBAL_VALUE @var_fp + %fp_load:_(s32) = G_LOAD %1 :: (load 4 from @var_int) + + bb.1: + successors: %bb.1, %bb.2 + %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1 + G_BRCOND %0, %bb.1 + + bb.2: + $s0 = COPY %2 + RET_ReallyLR implicit $s0 +...