Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -317,12 +317,39 @@ return GenericOpc; } +static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, unsigned SrcReg) { + // Copies from gpr32 to fpr16 need to use a sub-register copy. + unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY)) + .addDef(CopyReg) + .addUse(SrcReg); + unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(SubRegCopy) + .addUse(CopyReg, 0, AArch64::hsub); + + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(SubRegCopy); + return true; +} + static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg = I.getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) && + !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank( + MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true); + if (SrcRC == &AArch64::GPR32allRegClass) + return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); + } assert(I.isCopy() && "Generic operators do not allow physical registers"); return true; } @@ -330,7 +357,6 @@ const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); (void)DstSize; - unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && @@ -357,9 +383,7 @@ } if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - const RegClassOrRegBank &RegClassOrBank = - MRI.getRegClassOrRegBank(SrcReg); - + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg); const TargetRegisterClass *SrcRC = RegClassOrBank.dyn_cast(); const RegisterBank *RB = nullptr; @@ -378,6 +402,9 @@ .addImm(AArch64::hsub); MachineOperand &RegOp = I.getOperand(1); RegOp.setReg(PromoteReg); + } else if (RC == &AArch64::FPR16RegClass && + SrcRC == &AArch64::GPR32allRegClass) { + selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); } } Index: test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir +++ test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir @@ -6,18 +6,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-arm-none-eabi" - %struct.struct2 = type { [2 x half] } + ; Function Attrs: noinline nounwind optnone + define void @fp16_to_gpr([2 x half], [2 x half]* %addr) { + ret void + } - @global_arg0 = common dso_local global %struct.struct2 zeroinitializer, align 2 + define void @gpr_to_fp16() { + ret void + } - ; Function Attrs: noinline nounwind optnone - define dso_local void @c_test([2 x half], [2 x half]* %addr) { - store [2 x half] %0, [2 x half]* %addr, align 2 + define void @gpr_to_fp16_physreg() { ret void } ... --- -name: c_test +name: fp16_to_gpr alignment: 2 legalized: true regBankSelected: true @@ -40,7 +43,7 @@ bb.1 (%ir-block.1): liveins: $h0, $h1, $x0 - ; CHECK-LABEL: name: c_test + ; CHECK-LABEL: name: fp16_to_gpr ; CHECK: liveins: $h0, $h1, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 @@ -67,3 +70,58 @@ RET_ReallyLR ... + +--- +name: gpr_to_fp16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: gpr_to_fp16 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: $h0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s16) = G_TRUNC %0(s32) + %2:fpr(s16) = COPY %1(s16) + $h0 = COPY %2(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: gpr_to_fp16_physreg +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: gpr_to_fp16_physreg + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: $h0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s16) = G_TRUNC %0(s32) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +...