diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -179,6 +179,8 @@ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, MVT::i1, Promote); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, + MVT::i1, Promote); // TODO: add all necessary setOperationAction calls. setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); @@ -203,6 +205,8 @@ if (Subtarget.is64Bit()) { setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, MVT::i32, Custom); @@ -1092,6 +1096,8 @@ // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used. +// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of +// isTruncateFree? bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; @@ -1101,7 +1107,9 @@ } bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { - if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || + // We consider i64->i32 free on RV64 since we have good selection of W + // instructions that make promoting operations back to i64 free in many cases. + if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || !DstVT.isInteger()) return false; unsigned SrcBits = SrcVT.getSizeInBits(); @@ -7073,6 +7081,22 @@ Results.push_back(RCW.getValue(2)); break; } + case ISD::LOAD: { + if (!ISD::isNON_EXTLoad(N)) + return; + + // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the + // sext_inreg we emit for ADD/SUB/MUL/SLLI. + LoadSDNode *Ld = cast(N); + + SDLoc dl(N); + SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), + Ld->getBasePtr(), Ld->getMemoryVT(), + Ld->getMemOperand()); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); + Results.push_back(Res.getValue(1)); + return; + } case ISD::MUL: { unsigned Size = N->getSimpleValueType(0).getSizeInBits(); unsigned XLen = Subtarget.getXLen(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1725,17 +1725,10 @@ ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sw a1, 12(sp) -; RV64ZVE32F-NEXT: sw a0, 8(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 12 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; RV64ZVE32F-NEXT: vle32.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 8 -; RV64ZVE32F-NEXT: vle32.v v8, (a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 @@ -1744,7 +1737,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu @@ -1755,7 +1747,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i32> call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -65,9 +65,10 @@ declare signext i32 @bar(i32 signext) -; The load here will be an anyext load in isel and sext.w will be emitted for -; the ret. Make sure we can look through logic ops to prove the sext.w is -; unnecessary. +; The load here was previously an aext load, but this has since been changed +; to a signext load allowing us to remove a sext.w before isel. Thus we get +; the same result with or without the sext.w removal pass. +; Test has been left for coverage purposes. define signext i32 @test2(i32* %p, i32 signext %b) nounwind { ; RV64I-LABEL: test2: ; RV64I: # %bb.0: @@ -92,7 +93,6 @@ ; NOREMOVAL-NEXT: li a2, -2 ; NOREMOVAL-NEXT: rolw a1, a2, a1 ; NOREMOVAL-NEXT: and a0, a1, a0 -; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b @@ -125,7 +125,6 @@ ; NOREMOVAL-NEXT: li a2, -2 ; NOREMOVAL-NEXT: rolw a1, a2, a1 ; NOREMOVAL-NEXT: or a0, a1, a0 -; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b @@ -158,7 +157,6 @@ ; NOREMOVAL-NEXT: li a2, 1 ; NOREMOVAL-NEXT: sllw a1, a2, a1 ; NOREMOVAL-NEXT: xnor a0, a1, a0 -; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -46,7 +46,7 @@ ; ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: -; RV64-NEXT: lwu a0, 0(a0) +; RV64-NEXT: lw a0, 0(a0) ; RV64-NEXT: slli a2, a0, 40 ; RV64-NEXT: slli a3, a0, 56 ; RV64-NEXT: slli a4, a0, 48 @@ -73,7 +73,7 @@ ; RV64-NEXT: li a0, 0 ; RV64-NEXT: j .LBB0_8 ; RV64-NEXT: .LBB0_7: -; RV64-NEXT: srli a0, a0, 16 +; RV64-NEXT: srliw a0, a0, 16 ; RV64-NEXT: .LBB0_8: ; RV64-NEXT: sb a0, 2(a1) ; RV64-NEXT: sh a2, 0(a1)