diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -40,6 +40,7 @@ RISCVRedundantCopyElimination.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp + RISCVRVVInitUndef.cpp RISCVSExtWRemoval.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -68,6 +68,9 @@ FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); +FunctionPass *createRISCVInitUndefPass(); +void initializeRISCVInitUndefPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -47,6 +47,8 @@ MachineBasicBlock::iterator &NextMBBI); bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool removeTempRVVInitUndef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); @@ -132,11 +134,33 @@ case RISCV::PseudoVRELOAD7_M1: case RISCV::PseudoVRELOAD8_M1: return expandVRELOAD(MBB, MBBI); + case RISCV::PseudoRVVInitUndefM1: + case RISCV::PseudoRVVInitUndefM2: + case RISCV::PseudoRVVInitUndefM4: + case RISCV::PseudoRVVInitUndefM8: + case RISCV::PseudoRVVInitUndefM1NoV0: + case RISCV::PseudoRVVInitUndefM2NoV0: + case RISCV::PseudoRVVInitUndefM4NoV0: + case RISCV::PseudoRVVInitUndefM8NoV0: + return removeTempRVVInitUndef(MBB, MBBI); } return false; } +bool RISCVExpandPseudo::removeTempRVVInitUndef( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + MachineRegisterInfo *MRI = &(MBB.getParent()->getRegInfo()); + MachineInstr &MI = *MBBI; + Register Reg = MI.getOperand(0).getReg(); + + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) + MO.setIsUndef(); + + MI.eraseFromParent(); + return true; +} + bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1764,6 +1764,18 @@ (AddiPairImmSmall AddiPair:$rs2))>; } +/// Empty pseudo for RISCVInitUndefPass +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0, isCodeGenOnly = 1 in { + def PseudoRVVInitUndefM1 : Pseudo<(outs VR:$vd), (ins), [], "">; + def PseudoRVVInitUndefM2 : Pseudo<(outs VRM2:$vd), (ins), [], "">; + def PseudoRVVInitUndefM4 : Pseudo<(outs VRM4:$vd), (ins), [], "">; + def PseudoRVVInitUndefM8 : Pseudo<(outs VRM8:$vd), (ins), [], "">; + def PseudoRVVInitUndefM1NoV0 : Pseudo<(outs VRNoV0:$vd), (ins), [], "">; + def PseudoRVVInitUndefM2NoV0 : Pseudo<(outs VRM2NoV0:$vd), (ins), [], "">; + def PseudoRVVInitUndefM4NoV0 : Pseudo<(outs VRM4NoV0:$vd), (ins), [], "">; + def PseudoRVVInitUndefM8NoV0 : Pseudo<(outs VRM8NoV0:$vd), (ins), [], "">; +} + //===----------------------------------------------------------------------===// // Standard extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -0,0 +1,198 @@ +//===- RISCVInitUndef.cpp - Initialize undef vector value to pesudo -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that initializes undef vector value to +// temporary pesudo instruction and remove it in expandpesudo pass to prevent +// register allocation resulting in a constraint violated result for vector +// instruction. +// +// RISC-V vector instruction has register overlapping constraint for certain +// instructions, and will cause illegal instruction trap if violated, we use +// early clobber to model this constraint, but it can't prevent register +// allocator allocated same or overlapped if the input register is undef value, +// so convert IMPLICIT_DEF to temporary pesudo instruction and remove it latter +// could prevent that happen, it's not best way to resolve this, and it might +// change the order of program or increase the register pressure, so ideally we +// should model the constraint right, but before we model the constraint right, +// it's the only way to prevent that happen. +// +// See also: https://github.com/llvm/llvm-project/issues/50157 +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-init-undef" +#define RISCV_INIT_UNDEF_NAME "RISCV init undef pass" + +namespace { + +class RISCVInitUndef : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + +public: + static char ID; + + RISCVInitUndef() : MachineFunctionPass(ID) { + initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; } + +private: + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + bool handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst); + bool isVectorRegClass(const Register &R); +}; + +} // end anonymous namespace + +char RISCVInitUndef::ID = 0; + +INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false) + +bool RISCVInitUndef::isVectorRegClass(const Register &R) { + unsigned RegClassID = MRI->getRegClass(R)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + case RISCV::VRM2RegClassID: + case RISCV::VRM4RegClassID: + case RISCV::VRM8RegClassID: + case RISCV::VRNoV0RegClassID: + case RISCV::VRM2NoV0RegClassID: + case RISCV::VRM4NoV0RegClassID: + case RISCV::VRM8NoV0RegClassID: + return true; + default: + return false; + } +} + +bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst) { + const TargetRegisterInfo &TRI = + *MBB.getParent()->getSubtarget().getRegisterInfo(); + + assert(Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF); + + unsigned Reg = Inst->getOperand(0).getReg(); + if (!Register::isVirtualRegister(Reg)) + return false; + + bool NeedZeroInit = false; + SmallVector UserMOs; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *UserMI = MO.getParent(); + + bool HasEarlyClobber = false; + bool TiedToDef = false; + for (MachineOperand &UseMO : UserMI->operands()) { + if (!UseMO.isReg()) + continue; + if (UseMO.isEarlyClobber()) { + HasEarlyClobber = true; + } else if (UseMO.isUse() && UseMO.isTied() && + TRI.regsOverlap(UseMO.getReg(), Reg)) { + TiedToDef = true; + } + } + if (HasEarlyClobber && !TiedToDef) { + NeedZeroInit = true; + UserMOs.push_back(&MO); + } + } + + if (!NeedZeroInit) + return false; + + LLVM_DEBUG( + dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register " + << Reg << '\n'); + + unsigned Opcode; + unsigned RegClassID = MRI->getRegClass(Reg)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + Opcode = RISCV::PseudoRVVInitUndefM1; + break; + case RISCV::VRNoV0RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM1NoV0; + break; + case RISCV::VRM2RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM2; + break; + case RISCV::VRM2NoV0RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM2NoV0; + break; + case RISCV::VRM4RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM4; + break; + case RISCV::VRM4NoV0RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM4NoV0; + break; + case RISCV::VRM8RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM8; + break; + case RISCV::VRM8NoV0RegClassID: + Opcode = RISCV::PseudoRVVInitUndefM8NoV0; + break; + default: + llvm_unreachable("Unexpected register class."); + } + + BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), Reg); + + Inst = MBB.erase(Inst); + + for (auto MO : UserMOs) + MO->setIsUndef(false); + + return true; +} + +bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr &MI = *I; + if (MI.isImplicitDef()) { + auto DstReg = MI.getOperand(0).getReg(); + if (isVectorRegClass(DstReg)) + Changed |= handleImplicitDef(MBB, I); + } + } + return Changed; +} + +bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + MRI = &MF.getRegInfo(); + TII = ST.getInstrInfo(); + + bool Changed = false; + for (MachineBasicBlock &BB : MF) + Changed |= processBasicBlock(MF, BB); + + return Changed; +} + +FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -65,6 +65,7 @@ initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVInitUndefPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -280,6 +281,8 @@ if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVInsertVSETVLIPass()); + if (getOptimizeRegAlloc()) + addPass(createRISCVInitUndefPass()); } void RISCVPassConfig::addPostRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -104,6 +104,7 @@ ; CHECK-NEXT: RISCV Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISCV Merge Base Offset ; CHECK-NEXT: RISCV Insert VSETVLI pass +; CHECK-NEXT: RISCV init undef pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -25,26 +25,26 @@ ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: li a0, 55 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vloxseg2ei32.v v8, (a0), v8 +; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs4r.v v20, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: li s0, 36 ; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma -; CHECK-NEXT: vfwadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: vfwadd.vv v16, v8, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call func@plt ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -101,23 +101,23 @@ ; SUBREGLIVENESS-NEXT: sub sp, sp, a0 ; SUBREGLIVENESS-NEXT: li a0, 55 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v8, (a0), v8 +; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a0), v8 ; SUBREGLIVENESS-NEXT: csrr a0, vlenb ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0 ; SUBREGLIVENESS-NEXT: addi a0, a0, 16 ; SUBREGLIVENESS-NEXT: csrr a1, vlenb ; SUBREGLIVENESS-NEXT: slli a1, a1, 2 -; SUBREGLIVENESS-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SUBREGLIVENESS-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: add a0, a0, a1 -; SUBREGLIVENESS-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill +; SUBREGLIVENESS-NEXT: vs4r.v v20, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; SUBREGLIVENESS-NEXT: vmclr.m v0 ; SUBREGLIVENESS-NEXT: li s0, 36 ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma -; SUBREGLIVENESS-NEXT: vfwadd.vv v8, v8, v8, v0.t +; SUBREGLIVENESS-NEXT: vfwadd.vv v16, v8, v8, v0.t ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 -; SUBREGLIVENESS-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: call func@plt ; SUBREGLIVENESS-NEXT: li a0, 32 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+v < %s | FileCheck %s + +define dso_local signext i32 @undef_early_clobber_chain() { +; CHECK-LABEL: undef_early_clobber_chain: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -400 +; CHECK-NEXT: .cfi_def_cfa_offset 400 +; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma +; CHECK-NEXT: vrgather.vi v9, v8, 0 +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: addi sp, sp, 400 +; CHECK-NEXT: ret +entry: + %dst = alloca [100 x float], align 8 + call void @llvm.lifetime.start.p0(i64 400, ptr nonnull %dst) #4 + %0 = tail call @llvm.riscv.vrgather.vx.nxv2f32.i64( undef, undef, i64 0, i64 0) + call void @llvm.riscv.vse.nxv2f32.i64( %0, ptr nonnull %dst, i64 0) + call void @llvm.lifetime.end.p0(i64 400, ptr nonnull %dst) #4 + ret i32 0 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare @llvm.riscv.vrgather.vx.nxv2f32.i64(, , i64, i64) #2 +declare void @llvm.riscv.vse.nxv2f32.i64(, ptr nocapture, i64) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)