Index: llvm/include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -702,6 +702,14 @@ return RC; } + /// Returns the needed register class for a virtual register involved in + /// copying a value to/from PhysReg. If nullptr is returned the default + /// register class based on the type is used. + virtual const TargetRegisterClass * + getPhysRegCopyRegClass(unsigned PhysReg) const { + return nullptr; + } + /// Returns the largest super class of RC that is legal to use in the current /// sub-target and has the same spill size. /// The returned register class can be used to create virtual registers which Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -166,6 +166,8 @@ } else { DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); } + if (auto *CopyRC = TRI->getPhysRegCopyRegClass(SrcReg)) + DstRC = CopyRC; // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. @@ -1017,6 +1019,13 @@ if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; + if (auto *CopyRC = TRI->getPhysRegCopyRegClass(DestReg)) { + unsigned TmpReg = MRI->createVirtualRegister(CopyRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + TmpReg).addReg(SrcReg); + SrcReg = TmpReg; + } + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg).addReg(SrcReg); break; Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -58,6 +58,9 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + const TargetRegisterClass * + getPhysRegCopyRegClass(unsigned PhysReg) const override; + bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -421,3 +421,10 @@ return RC; } +const TargetRegisterClass * +SystemZRegisterInfo::getPhysRegCopyRegClass(unsigned PhysReg) const { + // EAR / SAR can only use GR32 registers. + if (SystemZ::AR32BitRegClass.contains(PhysReg)) + return &SystemZ::GR32BitRegClass; + return nullptr; +} Index: llvm/test/CodeGen/SystemZ/tls-08.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-08.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 -stop-after=finalize-isel \ +; RUN: 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test that copies to/from access registers are emitted from isel with GR32 regs. + +@x = dso_local thread_local global i32 0, align 4 +define weak_odr hidden i32* @fun0() { +; CHECK: name: fun0 +; CHECK: %{{[0-9]+}}:gr32bit = COPY $a0 +; CHECK: %{{[0-9]+}}:gr32bit = COPY $a1 + ret i32* @x +} + +define i32 @fun1() { +; CHECK: name: fun1 +; CHECK: %1:gr32bit = COPY %0 +; CHECK: $a1 = COPY %1 +; CHECK: %2:gr32bit = COPY $a0 + %val = call i32 asm "blah", "={a0}, {a1}" (i32 0) + ret i32 %val +} Index: llvm/test/CodeGen/SystemZ/tls-09.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-09.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 +; +; Test that a0 and a1 are copied successfully into GR32 registers. + +@x = dso_local thread_local global i32 0, align 4 +define i32 @fun0(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) { + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i32, align 4 + %15 = load i32, i32* @x, align 4 + store i32 %0, i32* %8, align 4 + store i32 %1, i32* %9, align 4 + store i32 %2, i32* %10, align 4 + store i32 %3, i32* %11, align 4 + store i32 %4, i32* %12, align 4 + store i32 %5, i32* %13, align 4 + store i32 %6, i32* %14, align 4 + %16 = load i32, i32* %8, align 4 + %17 = add nsw i32 %15, %16 + %18 = load i32, i32* %9, align 4 + %19 = add nsw i32 %17, %18 + %20 = load i32, i32* %10, align 4 + %21 = add nsw i32 %19, %20 + %22 = load i32, i32* %11, align 4 + %23 = add nsw i32 %21, %22 + %24 = load i32, i32* %12, align 4 + %25 = add nsw i32 %23, %24 + %26 = load i32, i32* %13, align 4 + %27 = add nsw i32 %25, %26 + %28 = load i32, i32* %14, align 4 + %29 = add nsw i32 %27, %28 + ret i32 %29 +}