diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -12,6 +12,7 @@ add_public_tablegen_target(VECommonTableGen) add_llvm_target(VECodeGen + LVLGen.cpp VEAsmPrinter.cpp VEFrameLowering.cpp VEISelDAGToDAG.cpp diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/LVLGen.cpp @@ -0,0 +1,132 @@ +//===-- LVLGen.cpp - LVL instruction generator ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VE.h" +#include "VESubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "lvl-gen" + +namespace { +struct LVLGen : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + static char ID; + LVLGen() : MachineFunctionPass(ID) {} + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + + unsigned getVL(const MachineInstr &MI); + int getVLIndex(unsigned Opcode); +}; +char LVLGen::ID = 0; + +} // end of anonymous namespace + +FunctionPass *llvm::createLVLGenPass() { return new LVLGen; } + +int LVLGen::getVLIndex(unsigned Opcode) { + const MCInstrDesc &MCID = TII->get(Opcode); + + // If an instruction has VLIndex information, return it. + if (HAS_VLINDEX(MCID.TSFlags)) + return GET_VLINDEX(MCID.TSFlags); + + return -1; +} + +// returns a register holding a vector length. NoRegister is returned when +// this MI does not have a vector length. +unsigned LVLGen::getVL(const MachineInstr &MI) { + int index = getVLIndex(MI.getOpcode()); + if (index >= 0) + return MI.getOperand(index).getReg(); + + return VE::NoRegister; +} + +bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) { +#define RegName(no) \ + (MBB.getParent()->getSubtarget().getRegisterInfo()->getName(no)) + + bool Changed = false; + bool hasRegForVL = false; + unsigned RegForVL; + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + MachineBasicBlock::iterator MI = I; + + unsigned Reg = getVL(*MI); + if (Reg != VE::NoRegister) { + LLVM_DEBUG(dbgs() << "Vector instruction found: "); + LLVM_DEBUG(MI->dump()); + LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". "); + LLVM_DEBUG(dbgs() << "Current VL is " + << (hasRegForVL ? RegName(RegForVL) : "unknown") + << ". "); + + if (!hasRegForVL || RegForVL != Reg) { + LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load " + << RegName(Reg) << ".\n"); + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg); + hasRegForVL = true; + RegForVL = Reg; + Changed = true; + } else { + LLVM_DEBUG(dbgs() << "Reuse current VL.\n"); + } + } else if (hasRegForVL) { + // Old VL is overwritten, so disable hasRegForVL. + if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) { + LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: "); + LLVM_DEBUG(MI->dump()); + hasRegForVL = false; + } + } + if (hasRegForVL) { + // The latest VL is killed, so disable hasRegForVL. + if (MI->killsRegister(RegForVL, TRI)) { + LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: "); + LLVM_DEBUG(MI->dump()); + hasRegForVL = false; + } + } + + ++I; + } + return Changed; +} + +bool LVLGen::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n"); + LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n'); + LLVM_DEBUG(F.dump()); + + bool Changed = false; + + const VESubtarget &Subtarget = F.getSubtarget(); + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + + if (Changed) { + LLVM_DEBUG(dbgs() << "\n"); + LLVM_DEBUG(F.dump()); + } + LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n"); + return Changed; +} diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -29,6 +29,7 @@ FunctionPass *createVEISelDag(VETargetMachine &TM); FunctionPass *createVEPromoteToI1Pass(); +FunctionPass *createLVLGenPass(); void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -352,6 +352,25 @@ BuildMI(MBB, I, DL, get(VE::ORri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); + } else if (VE::V64RegClass.contains(DestReg, SrcReg)) { + // Generate following instructions + // %sw16 = LEA32zii 256 + // VORmvl %dest, (0)1, %src, %sw16 + // TODO: reuse a register if vl is already assigned to a register + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + Register TmpReg = VE::SX16; + Register SubTmp = TRI->getSubReg(TmpReg, VE::sub_i32); + BuildMI(MBB, I, DL, get(VE::LEAzii), TmpReg) + .addImm(0) + .addImm(0) + .addImm(256); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(VE::VORmvl), DestReg) + .addImm(M1(0)) // Represent (0)1. + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SubTmp, getKillRegState(true)); + MIB.getInstr()->addRegisterKilled(TmpReg, TRI, true); } else if (VE::F128RegClass.contains(DestReg, SrcReg)) { // Use two instructions. const unsigned SubRegIdx[] = {VE::sub_even, VE::sub_odd}; diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -98,6 +98,7 @@ void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass() override; }; } // namespace @@ -115,3 +116,8 @@ addPass(createVEISelDag(getVETargetMachine())); return false; } + +void VEPassConfig::addPreEmitPass() { + // LVLGen should be called after scheduling and register allocation + addPass(createLVLGenPass()); +} diff --git a/llvm/test/CodeGen/VE/Vector/fastcc.ll b/llvm/test/CodeGen/VE/Vector/fastcc.ll --- a/llvm/test/CodeGen/VE/Vector/fastcc.ll +++ b/llvm/test/CodeGen/VE/Vector/fastcc.ll @@ -52,36 +52,65 @@ ret <256 x i32> %p0 } -; TODO: Uncomment tests when vreg-to-vreg copy is upstream. -; define fastcc <256 x i32> @vreg_arg_v256i32_r1(<256 x i32> %p0, <256 x i32> %p1) { -; ret <256 x i32> %p1 -; } -; -; define fastcc <256 x i32> @vreg_arg_v256i32_r2(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2) { -; ret <256 x i32> %p2 -; } -; -; define fastcc <256 x i32> @vreg_arg_v256i32_r3(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3) { -; ret <256 x i32> %p3 -; } -; -; define fastcc <256 x i32> @vreg_arg_v256i32_r4(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4) { -; ret <256 x i32> %p4 -; } -; -; define fastcc <256 x i32> @vreg_arg_v256i32_r5(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5) { -; ret <256 x i32> %p5 -; } -; -; define fastcc <256 x i32> @vreg_arg_v256i32_r6(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) { -; ret <256 x i32> %p6 -; } -; +define fastcc <256 x i32> @vreg_arg_v256i32_r1(<256 x i32> %p0, <256 x i32> %p1) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p1 +} + +define fastcc <256 x i32> @vreg_arg_v256i32_r2(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v2 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p2 +} + +define fastcc <256 x i32> @vreg_arg_v256i32_r3(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v3 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p3 +} + +define fastcc <256 x i32> @vreg_arg_v256i32_r4(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v4 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p4 +} + +define fastcc <256 x i32> @vreg_arg_v256i32_r5(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v5 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p5 +} + +define fastcc <256 x i32> @vreg_arg_v256i32_r6(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) { +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v6 +; CHECK-NEXT: or %s11, 0, %s9 + ret <256 x i32> %p6 +} + +; TODO: Uncomment test when vector loads are upstream (vreg stack passing). ; define <256 x i32> @vreg_arg_v256i32_r7(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p7) { ; ret <256 x i32> %p7 ; } -; TODO: Uncomment test when vector loads are upstream (vreg stack passing). ; define <256 x i32> @vreg_arg_v256i32_r8(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p7, <256 x i32> %p8) { ; ret <256 x i32> %p8 ; }