diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4820,6 +4820,19 @@ return TailMBB; } +static MachineInstr *elideCopies(MachineInstr *MI, + const MachineRegisterInfo &MRI) { + while (true) { + if (!MI->isFullCopy()) + return MI; + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) + return nullptr; + MI = MRI.getVRegDef(MI->getOperand(1).getReg()); + if (!MI) + return nullptr; + } +} + static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, int VLIndex, unsigned SEWIndex, RISCVVLMUL VLMul, bool ForceTailAgnostic) { @@ -4880,8 +4893,11 @@ // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); - if (UseMI && UseMI->isImplicitDef()) - TailAgnostic = true; + if (UseMI) { + UseMI = elideCopies(UseMI, MRI); + if (UseMI && UseMI->isImplicitDef()) + TailAgnostic = true; + } } // For simplicity we reuse the vtype representation here. diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -126,7 +126,7 @@ define @masked_load_nxv8f64(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8f64(* %a, i32 8, %mask, undef) @@ -148,7 +148,7 @@ define @masked_load_nxv16f32(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv16f32(* %a, i32 4, %mask, undef) @@ -159,7 +159,7 @@ define @masked_load_nxv32f16(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv32f16(* %a, i32 2, %mask, undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll @@ -170,7 +170,7 @@ define @masked_load_nxv8i64(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8i64(* %a, i32 8, %mask, undef) @@ -203,7 +203,7 @@ define @masked_load_nxv16i32(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv16i32(* %a, i32 4, %mask, undef) @@ -225,7 +225,7 @@ define @masked_load_nxv32i16(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv32i16(* %a, i32 2, %mask, undef) @@ -236,7 +236,7 @@ define @masked_load_nxv64i8(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m8,tu,mu +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv64i8(* %a, i32 1, %mask, undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s + +# This test makes sure we peak through the COPY instruction between the +# IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic +# policy. The test is working if the second argument to PseudoVSETVLI has bit 6 +# set. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + ; Function Attrs: nounwind + define @masked_load_nxv8i64(* %a, %mask) #0 { + %load = call @llvm.masked.load.nxv8i64.p0nxv8i64(* %a, i32 8, %mask, undef) + ret %load + } + + ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn + declare @llvm.masked.load.nxv8i64.p0nxv8i64(*, i32 immarg, , ) #1 + + attributes #0 = { nounwind "target-features"="+experimental-v" } + attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" } + +... +--- +name: masked_load_nxv8i64 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: vrm8nov0 } + - { id: 3, class: vrm8 } + - { id: 4, class: vrm8nov0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$v0', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x10, $v0 + + ; CHECK-LABEL: name: masked_load_nxv8i64 + ; CHECK: liveins: $x10, $v0 + ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: $v0 = COPY [[COPY]] + ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]] + ; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] + ; CHECK: PseudoRET implicit $v8m8 + %1:vr = COPY $v0 + %0:gpr = COPY $x10 + $v0 = COPY %1 + %3:vrm8 = IMPLICIT_DEF + %4:vrm8nov0 = COPY %3 + %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 64, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + $v8m8 = COPY %2 + PseudoRET implicit $v8m8 + +...