diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1104,6 +1104,8 @@ // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used. +// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of +// isTruncateFree? bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; @@ -1113,8 +1115,10 @@ } bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { - if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || - !SrcVT.isInteger() || !DstVT.isInteger()) + // We consider i64->i32 free on RV64 since we have good selection of W + // instructions that make promoting operations back to i64 free in many cases. + if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || + !DstVT.isInteger()) return false; unsigned SrcBits = SrcVT.getSizeInBits(); unsigned DestBits = DstVT.getSizeInBits(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1725,17 +1725,10 @@ ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sw a1, 12(sp) -; RV64ZVE32F-NEXT: sw a0, 8(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 12 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; RV64ZVE32F-NEXT: vle32.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 8 -; RV64ZVE32F-NEXT: vle32.v v8, (a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 @@ -1744,7 +1737,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu @@ -1755,7 +1747,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i32> call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) diff --git a/llvm/test/CodeGen/RISCV/trunc-free.ll b/llvm/test/CodeGen/RISCV/trunc-free.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/trunc-free.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; Make sure we use lwu for the load, and don't emit +; a sext.w for the compare. This requires isTruncateFree +; to return true for i64->i32. Otherwise we emit a +; lw and a shift pair for the zext. + +define void @foo(i32* %p, i64* %q, i32* %r) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: lwu a0, 0(a0) +; CHECK-NEXT: sd a0, 0(a1) +; CHECK-NEXT: beqz a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if +; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: .LBB0_2: # %end +; CHECK-NEXT: ret + %a = load i32, i32* %p + %b = zext i32 %a to i64 + store i64 %b, i64* %q + %c = icmp ne i32 %a, 0 + br i1 %c, label %if, label %end + +if: + store i32 %a, i32* %r + br label %end + +end: + ret void +}