diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -448,6 +448,18 @@ return true; } +/// Returns the minimum size the given register bank can hold. +static unsigned getMinSizeForRegBank(const RegisterBank &RB) { + switch (RB.getID()) { + case AArch64::GPRRegBankID: + return 32; + case AArch64::FPRRegBankID: + return 8; + default: + assert(false && "Tried to get minimum size for unknown register bank."); + } +} + /// Check whether \p I is a currently unsupported binary operation: /// - it has an unsized type /// - an operand is not a vreg @@ -636,23 +648,16 @@ } #endif -/// Helper function for selectCopy. Inserts a subregister copy from -/// \p *From to \p *To, linking it up to \p I. +/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg +/// to \p *To. /// -/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into -/// -/// CopyReg (From class) = COPY SrcReg -/// SubRegCopy (To class) = COPY CopyReg:SubReg -/// Dst = COPY SubRegCopy -static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, - const RegisterBankInfo &RBI, Register SrcReg, - const TargetRegisterClass *From, - const TargetRegisterClass *To, - unsigned SubReg) { +/// E.g "To = COPY SrcReg:SubReg" +static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, + const RegisterBankInfo &RBI, Register SrcReg, + const TargetRegisterClass *To, unsigned SubReg) { MachineIRBuilder MIB(I); - auto Copy = MIB.buildCopy({From}, {SrcReg}); - auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {}) - .addReg(Copy.getReg(0), 0, SubReg); + auto SubRegCopy = + MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); MachineOperand &RegOp = I.getOperand(1); RegOp.setReg(SubRegCopy.getReg(0)); @@ -747,25 +752,28 @@ unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); unsigned DstSize = TRI.getRegSizeInBits(*DstRC); - // If we're doing a cross-bank copy on different-sized registers, we need - // to do a bit more work. + // If the source register is bigger than the destination we need to perform + // a subregister copy. if (SrcSize > DstSize) { - // We're doing a cross-bank copy into a smaller register. We need a - // subregister copy. First, get a register class that's on the same bank - // as the destination, but the same size as the source. - const TargetRegisterClass *SubregRC = - getMinClassForRegBank(DstRegBank, SrcSize, true); - assert(SubregRC && "Didn't get a register class for subreg?"); - - // Get the appropriate subregister for the destination. unsigned SubReg = 0; - if (!getSubRegForClass(DstRC, TRI, SubReg)) { - LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); - return false; + + // If the source bank doesn't support a subregister copy small enough, + // then we first need to copy to the destination bank. + if (getMinSizeForRegBank(SrcRegBank) > DstSize) { + const TargetRegisterClass *SubregRC = + getMinClassForRegBank(DstRegBank, SrcSize, true); + getSubRegForClass(DstRC, TRI, SubReg); + + MachineIRBuilder MIB(I); + auto Copy = MIB.buildCopy({SubregRC}, {SrcReg}); + copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); + } else { + const TargetRegisterClass *SubregRC = + getMinClassForRegBank(SrcRegBank, DstSize, true); + getSubRegForClass(SubregRC, TRI, SubReg); + copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); } - // Now, insert a subregister copy using the new register class. - selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); return CheckCopy(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir @@ -412,10 +412,9 @@ ; CHECK: liveins: $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %imp:gpr64 = IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8) + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %imp.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load 8) ; CHECK: $x1 = COPY %load ; CHECK: RET_ReallyLR implicit $x1 %base:gpr(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir @@ -85,10 +85,9 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: %fold_me:gpr64sp = ANDXri %copy, 4098 - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir @@ -113,10 +113,9 @@ ; CHECK: %copy:gpr32 = COPY $w0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32 ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %zext - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %zext.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: $x0 = COPY %zext diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir @@ -49,10 +49,9 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58 - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -87,10 +86,9 @@ ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: %fold_cst:gpr64 = MOVi64imm -5 ; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -125,10 +123,9 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: %shl:gpr64 = UBFMXri %copy, 62, 61 - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %shl - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: %second_use:gpr64sp = ORRXri %shl, 8000 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + target triple = "aarch64" + + @a = global i128 0, align 16 + + define void @test_128_fpr_truncation() #0 { + entry: + %0 = load i128, i128* @a, align 16 + %conv = trunc i128 %0 to i64 + %and = and i64 %conv, 1 + %tobool = icmp ne i64 %and, 0 + br i1 %tobool, label %end, label %end + + end: + ret void + } +... +--- +name: test_128_fpr_truncation +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: test_128_fpr_truncation + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @a, target-flags(aarch64-pageoff, aarch64-nc) @a + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[MOVaddr]], 0 :: (dereferenceable load 16 from @a) + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY [[LDRQui]].ssub + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 0, %bb.1 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + %1:gpr(p0) = G_GLOBAL_VALUE @a + %3:gpr(s64) = G_CONSTANT i64 1 + %5:gpr(s64) = G_CONSTANT i64 0 + %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (dereferenceable load 16 from @a) + %2:fpr(s64) = G_TRUNC %0:fpr(s128) + %8:gpr(s64) = COPY %2:fpr(s64) + %4:gpr(s64) = G_AND %8:gpr, %3:gpr + %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr + %6:gpr(s1) = G_TRUNC %7:gpr(s32) + G_BRCOND %6:gpr(s1), %bb.1 + + bb.1: + RET_ReallyLR +...