Index: llvm/lib/Target/SystemZ/CMakeLists.txt =================================================================== --- llvm/lib/Target/SystemZ/CMakeLists.txt +++ llvm/lib/Target/SystemZ/CMakeLists.txt @@ -16,6 +16,7 @@ SystemZAsmPrinter.cpp SystemZCallingConv.cpp SystemZConstantPoolValue.cpp + SystemZCopyPhysRegs.cpp SystemZElimCompare.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp Index: llvm/lib/Target/SystemZ/SystemZ.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZ.h +++ llvm/lib/Target/SystemZ/SystemZ.h @@ -193,6 +193,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM); FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -0,0 +1,120 @@ +//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that a COPY of a physical register will be +// implementable after register allocation in copyPhysReg() (this could be +// done in EmitInstrWithCustomInserter() instead if COPY instructions would +// be passed to it). +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs" + +namespace llvm { + void initializeSystemZCopyPhysRegsPass(PassRegistry&); +} + +namespace { + +class SystemZCopyPhysRegs : public MachineFunctionPass { +public: + static char ID; + SystemZCopyPhysRegs() + : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) { + initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + + bool visitMBB(MachineBasicBlock &MBB); + + const SystemZInstrInfo *TII; + MachineRegisterInfo *MRI; +}; + +char SystemZCopyPhysRegs::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", + SYSTEMZ_COPYPHYSREGS_NAME, false, false) + +FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { + return new SystemZCopyPhysRegs(); +} + +void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + // Certain special registers can only be copied from a subset of the + // default register class of the type. It is therefore necessary to create + // the target copy instructions before regalloc instead of in copyPhysReg(). + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ) { + MachineInstr *MI = &*MBBI++; + if (!MI->isCopy()) + continue; + + DebugLoc DL = MI->getDebugLoc(); + Register SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + if (DstReg.isVirtual() && + (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + if (SrcReg == SystemZ::CC) + BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp); + else + BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg); + MI->getOperand(1).setReg(Tmp); + Modified = true; + } + else if (SrcReg.isVirtual() && + SystemZ::AR32BitRegClass.contains(DstReg)) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + MI->getOperand(0).setReg(Tmp); + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp); + Modified = true; + } + } + + return Modified; +} + +bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { + TII = static_cast(F.getSubtarget().getInstrInfo()); + MRI = &F.getRegInfo(); + + bool Modified = false; + for (auto &MBB : F) + Modified |= visitMBB(MBB); + + return Modified; +} + Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -820,18 +820,11 @@ return; } - // Move CC value from/to a GR32. - if (SrcReg == SystemZ::CC) { - auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg); - if (KillSrc) { - const MachineFunction *MF = MBB.getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - MIB->addRegisterKilled(SrcReg, TRI); - } - return; - } + // Move CC value from a GR32. if (DestReg == SystemZ::CC) { - BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH)) + unsigned Opcode = + SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH; + BuildMI(MBB, MBBI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(3 << (SystemZ::IPM_CC - 16)); return; @@ -856,12 +849,6 @@ Opcode = SystemZ::VLR; else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::CPYA; - else if (SystemZ::AR32BitRegClass.contains(DestReg) && - SystemZ::GR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::SAR; - else if (SystemZ::GR32BitRegClass.contains(DestReg) && - SystemZ::AR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::EAR; else llvm_unreachable("Impossible reg-to-reg copy"); Index: llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -222,6 +222,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPreRegAlloc() override; void addPostRewrite() override; void addPostRegAlloc() override; void addPreSched2() override; @@ -253,6 +254,10 @@ return true; } +void SystemZPassConfig::addPreRegAlloc() { + addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPostRewrite() { addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); } Index: llvm/test/CodeGen/SystemZ/tls-08.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-08.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 \ +; RUN: -stop-before=regallocfast 2>&1 | FileCheck %s +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O3 \ +; RUN: -stop-before=livevars 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test that copies to/from access registers are handled before regalloc with +; GR32 regs. + +@x = dso_local thread_local global i32 0, align 4 +define weak_odr hidden i32* @fun0() { +; CHECK: name: fun0 +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0 +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a1 + ret i32* @x +} + +define i32 @fun1() { +; CHECK: name: fun1 +; CHECK: [[VREG0:%[0-9]+]]:gr32bit = COPY %0 +; CHECK-NEXT: $a1 = SAR [[VREG0]] +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0 + %val = call i32 asm "blah", "={a0}, {a1}" (i32 0) + ret i32 %val +} Index: llvm/test/CodeGen/SystemZ/tls-09.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-09.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 +; +; Test that a0 and a1 are copied successfully into GR32 registers. + +@x = dso_local thread_local global i32 0, align 4 +define i32 @fun0(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) { + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i32, align 4 + %15 = load i32, i32* @x, align 4 + store i32 %0, i32* %8, align 4 + store i32 %1, i32* %9, align 4 + store i32 %2, i32* %10, align 4 + store i32 %3, i32* %11, align 4 + store i32 %4, i32* %12, align 4 + store i32 %5, i32* %13, align 4 + store i32 %6, i32* %14, align 4 + %16 = load i32, i32* %8, align 4 + %17 = add nsw i32 %15, %16 + %18 = load i32, i32* %9, align 4 + %19 = add nsw i32 %17, %18 + %20 = load i32, i32* %10, align 4 + %21 = add nsw i32 %19, %20 + %22 = load i32, i32* %11, align 4 + %23 = add nsw i32 %21, %22 + %24 = load i32, i32* %12, align 4 + %25 = add nsw i32 %23, %24 + %26 = load i32, i32* %13, align 4 + %27 = add nsw i32 %25, %26 + %28 = load i32, i32* %14, align 4 + %29 = add nsw i32 %27, %28 + ret i32 %29 +} Index: llvm/test/CodeGen/SystemZ/tls-10.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-10.mir @@ -0,0 +1,24 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-after=finalize-isel \ +# RUN: -stop-before=regallocfast -o - %s | FileCheck %s +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O3 -start-after=finalize-isel \ +# RUN: -stop-before=livevars -o - %s | FileCheck %s +# +# Test that a COPY from CC gets implemented with an IPM to a GR32 reg. + +--- +name: fun0 +tracksRegLiveness: true +registers: + - { id: 0, class: grx32bit } +body: | + bb.0: + liveins: $cc + ; CHECK-LABEL: name: fun0 + ; CHECK: %1:gr32bit = IPM implicit $cc + ; CHECK-NEXT: %0:grx32bit = COPY %1 + ; CHECK-NEXT: $r2l = COPY %0 + ; CHECK-NEXT: Return implicit $r2l + %0:grx32bit = COPY $cc + $r2l = COPY %0 + Return implicit $r2l +... Index: llvm/test/CodeGen/SystemZ/tls-11.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/tls-11.mir @@ -0,0 +1,18 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-before=prologepilog \ +# RUN: -o - %s | FileCheck %s +# +# Test that a COPY to CC gets implemented with a tmlh or tmhh depending on +# the source register. + +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r3l, $r4h + ; CHECK-LABEL: fun0 + ; CHECK: tmlh %r3, 12288 + ; CHECK: tmhh %r4, 12288 + $cc = COPY $r3l + $cc = COPY $r4h +...