Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -356,6 +356,31 @@ return false; } + if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + const RegClassOrRegBank &RegClassOrBank = + MRI.getRegClassOrRegBank(SrcReg); + + const TargetRegisterClass *SrcRC = + RegClassOrBank.dyn_cast(); + const RegisterBank *RB = nullptr; + if (!SrcRC) { + RB = RegClassOrBank.get(); + SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true); + } + // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG. + if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) { + unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(AArch64::SUBREG_TO_REG)) + .addDef(PromoteReg) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::hsub); + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(PromoteReg); + } + } + // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. @@ -812,17 +837,25 @@ case TargetOpcode::G_INSERT: { LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + unsigned DstSize = DstTy.getSizeInBits(); // Larger inserts are vectors, same-size ones should be something else by // now (split up or turned into COPYs). if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) return false; - I.setDesc(TII.get(AArch64::BFMXri)); + I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); unsigned LSB = I.getOperand(3).getImm(); unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); - I.getOperand(3).setImm((64 - LSB) % 64); + I.getOperand(3).setImm((DstSize - LSB) % DstSize); MachineInstrBuilder(MF, I).addImm(Width - 1); + if (DstSize < 64) { + assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && + "unexpected G_INSERT types"); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); BuildMI(MBB, I.getIterator(), I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) Index: test/CodeGen/AArch64/GlobalISel/pr36345-fp16-copy-gpr.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/pr36345-fp16-copy-gpr.mir @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + %struct.struct2 = type { [2 x half] } + + @global_arg0 = common global %struct.struct2 zeroinitializer, align 2 + + define void @c_test([2 x half]) #0 { + %2 = alloca %struct.struct2, align 2 + %3 = getelementptr inbounds %struct.struct2, %struct.struct2* %2, i32 0, i32 0 + store [2 x half] %0, [2 x half]* %3, align 2 + %4 = bitcast %struct.struct2* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 bitcast (%struct.struct2* @global_arg0 to i8*), i8* align 2 %4, i64 4, i1 false) + ret void + } + + ; Function Attrs: argmemonly nounwind + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 + +... +--- +name: c_test +alignment: 2 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: fpr, preferred-register: '' } + - { id: 2, class: fpr, preferred-register: '' } + - { id: 3, class: gpr, preferred-register: '' } + - { id: 4, class: gpr, preferred-register: '' } + - { id: 5, class: gpr, preferred-register: '' } + - { id: 6, class: gpr, preferred-register: '' } + - { id: 7, class: gpr, preferred-register: '' } + - { id: 8, class: gpr, preferred-register: '' } + - { id: 9, class: gpr, preferred-register: '' } + - { id: 10, class: gpr, preferred-register: '' } + - { id: 11, class: gpr, preferred-register: '' } + - { id: 12, class: gpr, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 2 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, + alignment: 2, stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.1 (%ir-block.1): + liveins: $h0, $h1 + + ; CHECK-LABEL: name: c_test + ; CHECK: liveins: $h0, $h1 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]] + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]] + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15 + ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64 = MOVaddr target-flags(aarch64-page) @global_arg0, target-flags(aarch64-pageoff, aarch64-nc) @global_arg0 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64all = COPY [[MOVaddr]] + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 4 + ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK: STRWui [[BFMWri1]], [[ADDXri]], 0 :: (store 4 into %ir.3, align 2) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY4]] + ; CHECK: $x1 = COPY [[ADDXri]] + ; CHECK: $x2 = COPY [[MOVi64imm]] + ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %1:fpr(s16) = COPY $h0 + %2:fpr(s16) = COPY $h1 + %3:gpr(s32) = G_IMPLICIT_DEF + %11:gpr(s16) = COPY %1(s16) + %4:gpr(s32) = G_INSERT %3, %11(s16), 0 + %12:gpr(s16) = COPY %2(s16) + %5:gpr(s32) = G_INSERT %4, %12(s16), 16 + %0:gpr(s32) = COPY %5(s32) + %9:gpr(p0) = G_GLOBAL_VALUE @global_arg0 + %8:gpr(p0) = COPY %9(p0) + %10:gpr(s64) = G_CONSTANT i64 4 + %6:gpr(p0) = G_FRAME_INDEX %stack.0 + %7:gpr(p0) = COPY %6(p0) + G_STORE %0(s32), %7(p0) :: (store 4 into %ir.3, align 2) + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + $x0 = COPY %8(p0) + $x1 = COPY %6(p0) + $x2 = COPY %10(s64) + BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + RET_ReallyLR + +...