diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -179,6 +179,8 @@
 
   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
                    MVT::i1, Promote);
+  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
+                   MVT::i1, Promote);
 
   // TODO: add all necessary setOperationAction calls.
   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
@@ -203,6 +205,8 @@
   if (Subtarget.is64Bit()) {
     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
+    setOperationAction(ISD::LOAD, MVT::i32, Custom);
+
     setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
                        MVT::i32, Custom);
 
@@ -1092,6 +1096,8 @@
 // On RV32, 64-bit integers are split into their high and low parts and held
 // in two different registers, so the trunc is free since the low register can
 // just be used.
+// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
+// isTruncateFree?
 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
     return false;
@@ -1101,7 +1107,9 @@
 }
 
 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
-  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
+  // We consider i64->i32 free on RV64 since we have good selection of W
+  // instructions that make promoting operations back to i64 free in many cases.
+  if (SrcVT.isVector() || DstVT.isVector() ||
       !SrcVT.isInteger() || !DstVT.isInteger())
     return false;
   unsigned SrcBits = SrcVT.getSizeInBits();
@@ -7073,6 +7081,22 @@
     Results.push_back(RCW.getValue(2));
     break;
   }
+  case ISD::LOAD: {
+    if (!ISD::isNON_EXTLoad(N))
+      return;
+
+    // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
+    // sext_inreg we emit for ADD/SUB/MUL/SLLI.
+    LoadSDNode *Ld = cast<LoadSDNode>(N);
+
+    SDLoc dl(N);
+    SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
+                                 Ld->getBasePtr(), Ld->getMemoryVT(),
+                                 Ld->getMemOperand());
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
+    Results.push_back(Res.getValue(1));
+    return;
+  }
   case ISD::MUL: {
     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
     unsigned XLen = Subtarget.getXLen();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -1725,17 +1725,10 @@
 ;
 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
 ; RV64ZVE32F:       # %bb.0:
-; RV64ZVE32F-NEXT:    addi sp, sp, -16
-; RV64ZVE32F-NEXT:    .cfi_def_cfa_offset 16
-; RV64ZVE32F-NEXT:    sw a1, 12(sp)
-; RV64ZVE32F-NEXT:    sw a0, 8(sp)
-; RV64ZVE32F-NEXT:    addi a0, sp, 12
-; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; RV64ZVE32F-NEXT:    vle32.v v9, (a0)
-; RV64ZVE32F-NEXT:    addi a0, sp, 8
-; RV64ZVE32F-NEXT:    vle32.v v8, (a0)
-; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
-; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
+; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
+; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
 ; RV64ZVE32F-NEXT:    vmv.x.s a0, v0
 ; RV64ZVE32F-NEXT:    andi a1, a0, 1
@@ -1744,7 +1737,6 @@
 ; RV64ZVE32F-NEXT:    andi a0, a0, 2
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB24_4
 ; RV64ZVE32F-NEXT:  .LBB24_2: # %else2
-; RV64ZVE32F-NEXT:    addi sp, sp, 16
 ; RV64ZVE32F-NEXT:    ret
 ; RV64ZVE32F-NEXT:  .LBB24_3: # %cond.store
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
@@ -1755,7 +1747,6 @@
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
 ; RV64ZVE32F-NEXT:    vse32.v v8, (a3)
-; RV64ZVE32F-NEXT:    addi sp, sp, 16
 ; RV64ZVE32F-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i32>
   call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m)
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -65,9 +65,10 @@
 
 declare signext i32 @bar(i32 signext)
 
-; The load here will be an anyext load in isel and sext.w will be emitted for
-; the ret. Make sure we can look through logic ops to prove the sext.w is
-; unnecessary.
+; The load here was previously an aext load, but this has since been changed
+; to a signext load allowing us to remove a sext.w before isel. Thus we get
+; the same result with or without the sext.w removal pass.
+; Test has been left for coverage purposes.
 define signext i32 @test2(i32* %p, i32 signext %b) nounwind {
 ; RV64I-LABEL: test2:
 ; RV64I:       # %bb.0:
@@ -92,7 +93,6 @@
 ; NOREMOVAL-NEXT:    li a2, -2
 ; NOREMOVAL-NEXT:    rolw a1, a2, a1
 ; NOREMOVAL-NEXT:    and a0, a1, a0
-; NOREMOVAL-NEXT:    sext.w a0, a0
 ; NOREMOVAL-NEXT:    ret
   %a = load i32, i32* %p
   %shl = shl i32 1, %b
@@ -125,7 +125,6 @@
 ; NOREMOVAL-NEXT:    li a2, -2
 ; NOREMOVAL-NEXT:    rolw a1, a2, a1
 ; NOREMOVAL-NEXT:    or a0, a1, a0
-; NOREMOVAL-NEXT:    sext.w a0, a0
 ; NOREMOVAL-NEXT:    ret
   %a = load i32, i32* %p
   %shl = shl i32 1, %b
@@ -158,7 +157,6 @@
 ; NOREMOVAL-NEXT:    li a2, 1
 ; NOREMOVAL-NEXT:    sllw a1, a2, a1
 ; NOREMOVAL-NEXT:    xnor a0, a1, a0
-; NOREMOVAL-NEXT:    sext.w a0, a0
 ; NOREMOVAL-NEXT:    ret
   %a = load i32, i32* %p
   %shl = shl i32 1, %b
diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
--- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
@@ -46,7 +46,7 @@
 ;
 ; RV64-LABEL: vec3_setcc_crash:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lwu a0, 0(a0)
+; RV64-NEXT:    lw a0, 0(a0)
 ; RV64-NEXT:    slli a2, a0, 40
 ; RV64-NEXT:    slli a3, a0, 56
 ; RV64-NEXT:    slli a4, a0, 48
@@ -73,7 +73,7 @@
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    j .LBB0_8
 ; RV64-NEXT:  .LBB0_7:
-; RV64-NEXT:    srli a0, a0, 16
+; RV64-NEXT:    srliw a0, a0, 16
 ; RV64-NEXT:  .LBB0_8:
 ; RV64-NEXT:    sb a0, 2(a1)
 ; RV64-NEXT:    sh a2, 0(a1)