diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -50,6 +50,7 @@
 
 private:
   bool optimizeZExt(ZExtInst *I);
+  bool optimizeAndExt(BinaryOperator *BO);
 };
 
 } // end anonymous namespace
@@ -83,6 +84,57 @@
   return false;
 }
 
+// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
+// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill
+// the upper 32 bits with ones. A separate transform will turn (zext X) into
+// (sext X) for the same condition.
+bool RISCVCodeGenPrepare::optimizeAndExt(BinaryOperator *BO) {
+  if (!ST->is64Bit())
+    return false;
+
+  if (BO->getOpcode() != Instruction::And)
+    return false;
+
+  if (!BO->getType()->isIntegerTy(64))
+    return false;
+
+  // Left hand side should be sext or zext.
+  Instruction *LHS = dyn_cast<Instruction>(BO->getOperand(0));
+  if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS)))
+    return false;
+
+  Value *LHSSrc = LHS->getOperand(0);
+  if (!LHSSrc->getType()->isIntegerTy(32))
+    return false;
+
+  // Right hand side should be a constant.
+  Value *RHS = BO->getOperand(1);
+
+  auto *CI = dyn_cast<ConstantInt>(RHS);
+  if (!CI)
+    return false;
+  uint64_t C = CI->getZExtValue();
+
+  // Look for constants that fit in 32 bits but not simm12, and can be made
+  // into simm12 by sign extending bit 31. This will allow use of ANDI.
+  // TODO: Is worth making simm32?
+  if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
+    return false;
+
+  // If we can determine the sign bit of the input is 0, we can replace the
+  // And mask constant.
+  if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc,
+                               Constant::getNullValue(LHSSrc->getType()),
+                               LHS, *DL))
+    return false;
+
+  // Sign extend the constant and replace the And operand.
+  C = SignExtend64<32>(C);
+  BO->setOperand(1, ConstantInt::get(LHS->getType(), C));
+
+  return true;
+}
+
 bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -98,6 +150,8 @@
     for (Instruction &I : llvm::make_early_inc_range(BB)) {
       if (auto *ZExt = dyn_cast<ZExtInst>(&I))
         MadeChange |= optimizeZExt(ZExt);
+      else if (I.getOpcode() == Instruction::And)
+        MadeChange |= optimizeAndExt(cast<BinaryOperator>(&I));
     }
   }
 
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -166,7 +166,6 @@
   }
 
   void addIRPasses() override;
-  void addCodeGenPrepare() override;
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
@@ -192,13 +191,10 @@
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createRISCVGatherScatterLoweringPass());
 
-  TargetPassConfig::addIRPasses();
-}
-
-void RISCVPassConfig::addCodeGenPrepare() {
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createRISCVCodeGenPreparePass());
-  TargetPassConfig::addCodeGenPrepare();
+
+  TargetPassConfig::addIRPasses();
 }
 
 bool RISCVPassConfig::addPreISel() {
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -26,6 +26,7 @@
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       RISCV gather/scatter lowering
+; CHECK-NEXT:       RISCV CodeGenPrepare
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Canonicalize natural loops
@@ -57,7 +58,6 @@
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       TLS Variable Hoist
-; CHECK-NEXT:       RISCV CodeGenPrepare
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Exception handling preparation
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -40,3 +40,88 @@
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
+
+; Make sure we convert the 4294967294 in for.body.preheader.new to -2 based on
+; the upper 33 bits being zero by the dominating condition %cmp3.
+define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    blez a1, .LBB1_7
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    li a3, 1
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    bne a1, a3, .LBB1_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    j .LBB1_5
+; CHECK-NEXT:  .LBB1_3: # %for.body.preheader.new
+; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    addi a4, a0, 4
+; CHECK-NEXT:  .LBB1_4: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    lw a5, -4(a4)
+; CHECK-NEXT:    lw a6, 0(a4)
+; CHECK-NEXT:    addiw a5, a5, 4
+; CHECK-NEXT:    sw a5, -4(a4)
+; CHECK-NEXT:    addiw a5, a6, 4
+; CHECK-NEXT:    sw a5, 0(a4)
+; CHECK-NEXT:    addi a3, a3, 2
+; CHECK-NEXT:    addi a4, a4, 8
+; CHECK-NEXT:    bne a1, a3, .LBB1_4
+; CHECK-NEXT:  .LBB1_5: # %for.cond.cleanup.loopexit.unr-lcssa
+; CHECK-NEXT:    beqz a2, .LBB1_7
+; CHECK-NEXT:  # %bb.6: # %for.body.epil
+; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    lw a1, 0(a0)
+; CHECK-NEXT:    addiw a1, a1, 4
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:  .LBB1_7: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  %xtraiter = and i64 %wide.trip.count, 1
+  %0 = icmp eq i32 %n, 1
+  br i1 %0, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
+
+for.body.preheader.new:                           ; preds = %for.body.preheader
+  %unroll_iter = and i64 %wide.trip.count, 4294967294
+  br label %for.body
+
+for.cond.cleanup.loopexit.unr-lcssa:              ; preds = %for.body, %for.body.preheader
+  %indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ]
+  %lcmp.mod.not = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil
+
+for.body.epil:                                    ; preds = %for.cond.cleanup.loopexit.unr-lcssa
+  %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+  %1 = load i32, ptr %arrayidx.epil, align 4
+  %add.epil = add nsw i32 %1, 4
+  store i32 %add.epil, ptr %arrayidx.epil, align 4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body.epil, %for.cond.cleanup.loopexit.unr-lcssa, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.preheader.new
+  %indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
+  %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %2, 4
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = or i64 %indvars.iv, 1
+  %arrayidx.1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  %3 = load i32, ptr %arrayidx.1, align 4
+  %add.1 = add nsw i32 %3, 4
+  store i32 %add.1, ptr %arrayidx.1, align 4
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
+  %niter.next.1 = add i64 %niter, 2
+  %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
+  br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
+}
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll
@@ -51,3 +51,94 @@
   %exitcond.not = icmp eq i64 %lsr.iv.next, 0
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
+
+; Make sure we convert the 4294967294 in for.body.preheader.new to -2 based on
+; the upper 33 bits being zero by the dominating condition %cmp3.
+define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 1
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK:       for.body.preheader.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], -2
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]]
+; CHECK:       for.body.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nsw i32 [[TMP1]], 4
+; CHECK-NEXT:    store i32 [[ADD_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_1]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP3]], 4
+; CHECK-NEXT:    store i32 [[ADD_1]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
+;
+entry:
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  %xtraiter = and i64 %wide.trip.count, 1
+  %0 = icmp eq i32 %n, 1
+  br i1 %0, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
+
+for.body.preheader.new:                           ; preds = %for.body.preheader
+  %unroll_iter = and i64 %wide.trip.count, 4294967294
+  br label %for.body
+
+for.cond.cleanup.loopexit.unr-lcssa:              ; preds = %for.body, %for.body.preheader
+  %indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ]
+  %lcmp.mod.not = icmp eq i64 %xtraiter, 0
+  br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil
+
+for.body.epil:                                    ; preds = %for.cond.cleanup.loopexit.unr-lcssa
+  %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+  %1 = load i32, ptr %arrayidx.epil, align 4
+  %add.epil = add nsw i32 %1, 4
+  store i32 %add.epil, ptr %arrayidx.epil, align 4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body.epil, %for.cond.cleanup.loopexit.unr-lcssa, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.preheader.new
+  %indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
+  %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %2, 4
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = or i64 %indvars.iv, 1
+  %arrayidx.1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  %3 = load i32, ptr %arrayidx.1, align 4
+  %add.1 = add nsw i32 %3, 4
+  store i32 %add.1, ptr %arrayidx.1, align 4
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
+  %niter.next.1 = add i64 %niter, 2
+  %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
+  br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
+}