diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -50,7 +50,6 @@
   bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
-  bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
 
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -376,62 +376,6 @@
   return true;
 }
 
-// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
-// We first check that it is the right node tree:
-//
-//  (SIGN_EXTEND_INREG (OR (SHL RS1, VC2),
-//                         (SRL (AND RS1, VC3), VC1)))
-//
-// Then we check that the constant operands respect these constraints:
-//
-// VC2 == 32 - VC1
-// VC3 | maskTrailingOnes<uint64_t>(VC1) == 0xffffffff
-//
-// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
-// and VC3 being 0xffffffff after accounting for SimplifyDemandedBits removing
-// some bits due to the right shift.
-
-bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
-  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-      Subtarget->getXLenVT() == MVT::i64 &&
-      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
-    if (N.getOperand(0).getOpcode() == ISD::OR) {
-      SDValue Or = N.getOperand(0);
-      SDValue Shl = Or.getOperand(0);
-      SDValue Srl = Or.getOperand(1);
-
-      // OR is commutable so canonicalize SHL to LHS.
-      if (Srl.getOpcode() == ISD::SHL)
-        std::swap(Shl, Srl);
-
-      if (Shl.getOpcode() == ISD::SHL && Srl.getOpcode() == ISD::SRL) {
-        if (Srl.getOperand(0).getOpcode() == ISD::AND) {
-          SDValue And = Srl.getOperand(0);
-          if (And.getOperand(0) == Shl.getOperand(0) &&
-              isa<ConstantSDNode>(Srl.getOperand(1)) &&
-              isa<ConstantSDNode>(Shl.getOperand(1)) &&
-              isa<ConstantSDNode>(And.getOperand(1))) {
-            uint64_t VC1 = Srl.getConstantOperandVal(1);
-            uint64_t VC2 = Shl.getConstantOperandVal(1);
-            uint64_t VC3 = And.getConstantOperandVal(1);
-            // The mask needs to be 0xffffffff, but SimplifyDemandedBits may
-            // have removed lower bits that aren't necessary due to the right
-            // shift.
-            if (VC2 == (32 - VC1) &&
-                (VC3 | maskTrailingOnes<uint64_t>(VC1)) == 0xffffffff) {
-              RS1 = Shl.getOperand(0);
-              Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
-                                              Srl.getOperand(1).getValueType());
-              return true;
-            }
-          }
-        }
-      }
-    }
-  }
-  return false;
-}
-
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -42,6 +42,10 @@
   DIVW,
   DIVUW,
   REMUW,
+  // RV64IB rotates, directly matching the semantics of the named RISC-V
+  // instructions.
+  ROLW,
+  RORW,
   // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
   // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
   // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -151,7 +151,12 @@
   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
 
-  if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
+  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
+    if (Subtarget.is64Bit()) {
+      setOperationAction(ISD::ROTL, MVT::i32, Custom);
+      setOperationAction(ISD::ROTR, MVT::i32, Custom);
+    }
+  } else {
     setOperationAction(ISD::ROTL, XLenVT, Expand);
     setOperationAction(ISD::ROTR, XLenVT, Expand);
   }
@@ -908,6 +913,10 @@
     return RISCVISD::DIVUW;
   case ISD::UREM:
     return RISCVISD::REMUW;
+  case ISD::ROTL:
+    return RISCVISD::ROLW;
+  case ISD::ROTR:
+    return RISCVISD::RORW;
   case RISCVISD::GREVI:
     return RISCVISD::GREVIW;
   case RISCVISD::GORCI:
@@ -1013,6 +1022,12 @@
       return;
     Results.push_back(customLegalizeToWOp(N, DAG));
     break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    Results.push_back(customLegalizeToWOp(N, DAG));
+    break;
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::UREM:
@@ -1267,7 +1282,9 @@
   }
   case RISCVISD::SLLW:
   case RISCVISD::SRAW:
-  case RISCVISD::SRLW: {
+  case RISCVISD::SRLW:
+  case RISCVISD::ROLW:
+  case RISCVISD::RORW: {
     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
     SDValue LHS = N->getOperand(0);
     SDValue RHS = N->getOperand(1);
@@ -1392,6 +1409,8 @@
   case RISCVISD::DIVW:
   case RISCVISD::DIVUW:
   case RISCVISD::REMUW:
+  case RISCVISD::ROLW:
+  case RISCVISD::RORW:
   case RISCVISD::GREVIW:
   case RISCVISD::GORCIW:
     // TODO: As the result is sign-extended, this is conservatively correct. A
@@ -2829,6 +2848,8 @@
   NODE_NAME_CASE(DIVW)
   NODE_NAME_CASE(DIVUW)
   NODE_NAME_CASE(REMUW)
+  NODE_NAME_CASE(ROLW)
+  NODE_NAME_CASE(RORW)
   NODE_NAME_CASE(FMV_W_X_RV64)
   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
   NODE_NAME_CASE(READ_CYCLE_WIDE)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -17,6 +17,9 @@
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
 
+def riscv_rolw      : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
+def riscv_rorw      : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+
 def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
   let Name = "UImmLog2XLenHalf";
   let RenderMethod = "addImmOperands";
@@ -655,7 +658,6 @@
 def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
-def RORIWPat  : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
 
 let Predicates = [HasStdExtZbbOrZbp] in {
 def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
@@ -724,17 +726,11 @@
 
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
-// FIXME: Is grev better than rori?
-def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
-def : Pat<(rotr GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
 def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
 } // Predicates = [HasStdExtZbp, IsRV32]
 
 let Predicates = [HasStdExtZbp, IsRV64] in {
-// FIXME: Is grev better than rori?
-def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
-def : Pat<(rotr GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
 def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
@@ -890,12 +886,14 @@
 } // Predicates = [HasStdExtZbb, IsRV64]
 
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-def : Pat<(or (riscv_sllw GPR:$rs1, GPR:$rs2),
-              (riscv_srlw GPR:$rs1, (ineg GPR:$rs2))),
+def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
           (ROLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (riscv_sllw GPR:$rs1, (ineg GPR:$rs2)),
-              (riscv_srlw GPR:$rs1, GPR:$rs2)),
+def : Pat<(riscv_rorw GPR:$rs1, GPR:$rs2),
           (RORW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_rorw GPR:$rs1, uimm5:$rs2),
+          (RORIW GPR:$rs1, uimm5:$rs2)>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+          (RORIW GPR:$rs1, (ImmROTL2RW uimm5:$rs2))>;
 } // Predicates = [HasStdExtZbbOrZbp, IsRV64]
 
 let Predicates = [HasStdExtZbs, IsRV64] in {
@@ -916,10 +914,6 @@
           (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
-def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
-          (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
-
 let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>;
 def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
--- a/llvm/test/CodeGen/RISCV/rv32Zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
@@ -1126,12 +1126,12 @@
 ;
 ; RV32IB-LABEL: grev16_i32:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rev16 a0, a0
+; RV32IB-NEXT:    rori a0, a0, 16
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rev16 a0, a0
+; RV32IBP-NEXT:    rori a0, a0, 16
 ; RV32IBP-NEXT:    ret
   %shl = shl i32 %a, 16
   %shr = lshr i32 %a, 16
@@ -1152,12 +1152,12 @@
 ;
 ; RV32IB-LABEL: grev16_i32_fshl:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rev16 a0, a0
+; RV32IB-NEXT:    rori a0, a0, 16
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32_fshl:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rev16 a0, a0
+; RV32IBP-NEXT:    rori a0, a0, 16
 ; RV32IBP-NEXT:    ret
   %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16)
   ret i32 %or
@@ -1173,12 +1173,12 @@
 ;
 ; RV32IB-LABEL: grev16_i32_fshr:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rev16 a0, a0
+; RV32IB-NEXT:    rori a0, a0, 16
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32_fshr:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rev16 a0, a0
+; RV32IBP-NEXT:    rori a0, a0, 16
 ; RV32IBP-NEXT:    ret
   %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16)
   ret i32 %or
@@ -1197,14 +1197,14 @@
 ;
 ; RV32IB-LABEL: grev16_i64:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rev16 a0, a0
-; RV32IB-NEXT:    rev16 a1, a1
+; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    rori a1, a1, 16
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i64:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rev16 a0, a0
-; RV32IBP-NEXT:    rev16 a1, a1
+; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    rori a1, a1, 16
 ; RV32IBP-NEXT:    ret
   %and = shl i64 %a, 16
   %shl = and i64 %and, -281470681808896
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
--- a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
@@ -374,7 +374,6 @@
 }
 
 ; Similar to rori_i32_fshl, but doesn't sign extend the result.
-; FIXME: We should be using RORIW, but we need a sext_inreg.
 define void @rori_i32_fshl_nosext(i32 signext %a, i32* %x) nounwind {
 ; RV64I-LABEL: rori_i32_fshl_nosext:
 ; RV64I:       # %bb.0:
@@ -386,25 +385,19 @@
 ;
 ; RV64IB-LABEL: rori_i32_fshl_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    srliw a2, a0, 1
-; RV64IB-NEXT:    slli a0, a0, 31
-; RV64IB-NEXT:    or a0, a0, a2
+; RV64IB-NEXT:    roriw a0, a0, 1
 ; RV64IB-NEXT:    sw a0, 0(a1)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: rori_i32_fshl_nosext:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    srliw a2, a0, 1
-; RV64IBB-NEXT:    slli a0, a0, 31
-; RV64IBB-NEXT:    or a0, a0, a2
+; RV64IBB-NEXT:    roriw a0, a0, 1
 ; RV64IBB-NEXT:    sw a0, 0(a1)
 ; RV64IBB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: rori_i32_fshl_nosext:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    srliw a2, a0, 1
-; RV64IBP-NEXT:    slli a0, a0, 31
-; RV64IBP-NEXT:    or a0, a0, a2
+; RV64IBP-NEXT:    roriw a0, a0, 1
 ; RV64IBP-NEXT:    sw a0, 0(a1)
 ; RV64IBP-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
@@ -440,7 +433,6 @@
 }
 
 ; Similar to rori_i32_fshr, but doesn't sign extend the result.
-; FIXME: We should be using RORIW, but we need a sext_inreg.
 define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind {
 ; RV64I-LABEL: rori_i32_fshr_nosext:
 ; RV64I:       # %bb.0:
@@ -452,25 +444,19 @@
 ;
 ; RV64IB-LABEL: rori_i32_fshr_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a2, a0, 1
-; RV64IB-NEXT:    srliw a0, a0, 31
-; RV64IB-NEXT:    or a0, a0, a2
+; RV64IB-NEXT:    roriw a0, a0, 31
 ; RV64IB-NEXT:    sw a0, 0(a1)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: rori_i32_fshr_nosext:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    slli a2, a0, 1
-; RV64IBB-NEXT:    srliw a0, a0, 31
-; RV64IBB-NEXT:    or a0, a0, a2
+; RV64IBB-NEXT:    roriw a0, a0, 31
 ; RV64IBB-NEXT:    sw a0, 0(a1)
 ; RV64IBB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: rori_i32_fshr_nosext:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    slli a2, a0, 1
-; RV64IBP-NEXT:    srliw a0, a0, 31
-; RV64IBP-NEXT:    or a0, a0, a2
+; RV64IBP-NEXT:    roriw a0, a0, 31
 ; RV64IBP-NEXT:    sw a0, 0(a1)
 ; RV64IBP-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
--- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
@@ -1377,12 +1377,12 @@
 ;
 ; RV64IB-LABEL: grev32:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    rev32 a0, a0
+; RV64IB-NEXT:    rori a0, a0, 32
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    rev32 a0, a0
+; RV64IBP-NEXT:    rori a0, a0, 32
 ; RV64IBP-NEXT:    ret
   %shl = shl i64 %a, 32
   %shr = lshr i64 %a, 32
@@ -1403,12 +1403,12 @@
 ;
 ; RV64IB-LABEL: grev32_fshl:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    rev32 a0, a0
+; RV64IB-NEXT:    rori a0, a0, 32
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32_fshl:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    rev32 a0, a0
+; RV64IBP-NEXT:    rori a0, a0, 32
 ; RV64IBP-NEXT:    ret
   %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32)
   ret i64 %or
@@ -1424,12 +1424,12 @@
 ;
 ; RV64IB-LABEL: grev32_fshr:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    rev32 a0, a0
+; RV64IB-NEXT:    rori a0, a0, 32
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32_fshr:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    rev32 a0, a0
+; RV64IBP-NEXT:    rori a0, a0, 32
 ; RV64IBP-NEXT:    ret
   %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32)
   ret i64 %or