diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5060,9 +5060,26 @@ case ISD::SRL: assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - if (N->getOperand(1).getOpcode() == ISD::Constant) - return; - Results.push_back(customLegalizeToWOp(N, DAG)); + if (N->getOperand(1).getOpcode() != ISD::Constant) { + Results.push_back(customLegalizeToWOp(N, DAG)); + break; + } + + // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is + // similar to customLegalizeToWOpWithSExt, but we must zero_extend the + // shift amount. + if (N->getOpcode() == ISD::SHL) { + SDLoc DL(N); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = + DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); + SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, + DAG.getValueType(MVT::i32)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); + } + break; case ISD::ROTL: case ISD::ROTR: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -1141,6 +1141,9 @@ (and GPR:$rs1, 0x000000000000FFFF)), i32)), (PACKW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), + (and GPR:$rs1, 0x000000000000FFFF))), + (PACKW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))), (PACKUW GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/alu32.ll b/llvm/test/CodeGen/RISCV/alu32.ll --- a/llvm/test/CodeGen/RISCV/alu32.ll +++ b/llvm/test/CodeGen/RISCV/alu32.ll @@ -106,7 +106,7 @@ ; ; RV64I-LABEL: slli: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: slliw a0, a0, 7 ; RV64I-NEXT: ret %1 = shl i32 %a, 7 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -63,7 +63,7 @@ ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -205,9 +205,9 @@ ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: addiw a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, 1048568 +; RV64I-NEXT: lui a2, 8 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slliw a1, a1, 16 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -117,12 +117,12 @@ ; ; RV64I-LABEL: mul_pow2: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: slliw a0, a0, 3 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: mul_pow2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 3 +; RV64IM-NEXT: slliw a0, a0, 3 ; RV64IM-NEXT: ret %1 = mul i32 %a, 8 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll --- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll @@ -1422,7 +1422,7 @@ define i32 @aext_slliw_aext(i32 %a) nounwind { ; RV64I-LABEL: aext_slliw_aext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: ret %1 = shl i32 %a, 1 ret i32 %1 @@ -1431,7 +1431,7 @@ define i32 @aext_slliw_sext(i32 signext %a) nounwind { ; RV64I-LABEL: aext_slliw_sext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: ret %1 = shl i32 %a, 2 ret i32 %1 @@ -1440,7 +1440,7 @@ define i32 @aext_slliw_zext(i32 zeroext %a) nounwind { ; RV64I-LABEL: aext_slliw_zext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: slliw a0, a0, 3 ; RV64I-NEXT: ret %1 = shl i32 %a, 3 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -412,9 +412,8 @@ ; RV64I-LABEL: rori_i32_fshl: ; RV64I: # %bb.0: ; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: slliw a0, a0, 31 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: rori_i32_fshl: @@ -470,10 +469,9 @@ define signext i32 @rori_i32_fshr(i32 signext %a) nounwind { ; RV64I-LABEL: rori_i32_fshr: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: srliw a0, a0, 31 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: rori_i32_fshr: @@ -532,34 +530,30 @@ define signext i32 @not_rori_i32(i32 signext %x, i32 signext %y) nounwind { ; RV64I-LABEL: not_rori_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: slliw a0, a0, 31 ; RV64I-NEXT: srliw a1, a1, 1 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: not_rori_i32: ; RV64B: # %bb.0: -; RV64B-NEXT: slli a0, a0, 31 +; RV64B-NEXT: slliw a0, a0, 31 ; RV64B-NEXT: srliw a1, a1, 1 ; RV64B-NEXT: or a0, a0, a1 -; RV64B-NEXT: sext.w a0, a0 ; RV64B-NEXT: ret ; ; RV64ZBB-LABEL: not_rori_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: slli a0, a0, 31 +; RV64ZBB-NEXT: slliw a0, a0, 31 ; RV64ZBB-NEXT: srliw a1, a1, 1 ; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: sext.w a0, a0 ; RV64ZBB-NEXT: ret ; ; RV64ZBP-LABEL: not_rori_i32: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: slli a0, a0, 31 +; RV64ZBP-NEXT: slliw a0, a0, 31 ; RV64ZBP-NEXT: srliw a1, a1, 1 ; RV64ZBP-NEXT: or a0, a0, a1 -; RV64ZBP-NEXT: sext.w a0, a0 ; RV64ZBP-NEXT: ret %a = shl i32 %x, 31 %b = lshr i32 %y, 1 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1515,10 +1515,9 @@ ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: bswap_i32: diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -9,7 +9,7 @@ define signext i32 @gorc1_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc1_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -19,7 +19,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc1_i32: @@ -88,7 +87,7 @@ define signext i32 @gorc2_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc2_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -98,7 +97,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc2_i32: @@ -167,7 +165,7 @@ define signext i32 @gorc3_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc3_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -177,7 +175,7 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -187,7 +185,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc3_i32: @@ -290,7 +287,7 @@ define signext i32 @gorc4_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc4_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -300,7 +297,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc4_i32: @@ -369,7 +365,7 @@ define signext i32 @gorc5_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc5_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -379,7 +375,7 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -389,7 +385,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc5_i32: @@ -492,7 +487,7 @@ define signext i32 @gorc6_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc6_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -502,7 +497,7 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -512,7 +507,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc6_i32: @@ -615,7 +609,7 @@ define signext i32 @gorc7_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc7_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -625,7 +619,7 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -782,7 +776,7 @@ define signext i32 @gorc8_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc8_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slliw a1, a0, 8 ; RV64I-NEXT: lui a2, 1044496 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 @@ -792,7 +786,6 @@ ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc8_i32: @@ -857,11 +850,10 @@ define signext i32 @gorc16_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc16_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: slliw a1, a0, 16 ; RV64I-NEXT: srliw a2, a0, 16 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc16_i32: @@ -884,7 +876,7 @@ ; RV64I-LABEL: gorc16_rotl_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: slliw a2, a0, 16 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -906,7 +898,7 @@ define i32 @gorc16_rotr_i32(i32 %a) nounwind { ; RV64I-LABEL: gorc16_rotr_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: slliw a1, a0, 16 ; RV64I-NEXT: srliw a2, a0, 16 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a1, a0 @@ -991,7 +983,7 @@ define signext i32 @gorc2b_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc2b_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -1001,13 +993,12 @@ ; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 2 ; RV64I-NEXT: and a2, a2, a4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: gorc2b_i32: @@ -1096,7 +1087,7 @@ define signext i32 @gorc3b_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc3b_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -1106,7 +1097,7 @@ ; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a3, 838861 ; RV64I-NEXT: addiw a3, a3, -820 ; RV64I-NEXT: and a1, a1, a3 @@ -1290,7 +1281,7 @@ define signext i32 @grev1_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev1_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -1299,7 +1290,6 @@ ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev1_i32: @@ -1365,7 +1355,7 @@ define signext i32 @grev2_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev2_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -1374,7 +1364,6 @@ ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev2_i32: @@ -1440,7 +1429,7 @@ define signext i32 @grev3_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev3_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -1449,7 +1438,7 @@ ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -1458,7 +1447,6 @@ ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev3_i32: @@ -1555,7 +1543,7 @@ define signext i32 @grev4_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev4_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -1564,7 +1552,6 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev4_i32: @@ -1630,7 +1617,7 @@ define signext i32 @grev5_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev5_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -1639,7 +1626,7 @@ ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -1648,7 +1635,6 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev5_i32: @@ -1746,7 +1732,7 @@ define signext i32 @grev6_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev6_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -1755,7 +1741,7 @@ ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -1764,7 +1750,6 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev6_i32: @@ -1861,7 +1846,7 @@ define signext i32 @grev7_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev7_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -1870,7 +1855,7 @@ ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -1879,7 +1864,7 @@ ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slliw a1, a0, 4 ; RV64I-NEXT: lui a2, 986895 ; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 @@ -1888,7 +1873,6 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev7_i32: @@ -2016,7 +2000,7 @@ define signext i32 @grev8_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev8_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slliw a1, a0, 8 ; RV64I-NEXT: lui a2, 1044496 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 @@ -2025,7 +2009,6 @@ ; RV64I-NEXT: addiw a2, a2, 255 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev8_i32: @@ -2087,10 +2070,9 @@ define signext i32 @grev16_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev16_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: slliw a1, a0, 16 ; RV64I-NEXT: srliw a0, a0, 16 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev16_i32: @@ -2115,9 +2097,8 @@ ; RV64I-LABEL: grev16_i32_fshl: ; RV64I: # %bb.0: ; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: slliw a0, a0, 16 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev16_i32_fshl: @@ -2136,10 +2117,9 @@ define signext i32 @grev16_i32_fshr(i32 signext %a) nounwind { ; RV64I-LABEL: grev16_i32_fshr: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: slliw a1, a0, 16 ; RV64I-NEXT: srliw a0, a0, 16 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev16_i32_fshr: @@ -2215,7 +2195,7 @@ define signext i32 @grev3b_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev3b_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 @@ -2224,7 +2204,7 @@ ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -2233,7 +2213,6 @@ ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev3b_i32: @@ -2331,7 +2310,7 @@ define signext i32 @grev2b_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev2b_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -2340,7 +2319,7 @@ ; RV64I-NEXT: addiw a3, a3, 1365 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a4, 838861 ; RV64I-NEXT: addiw a4, a4, -820 ; RV64I-NEXT: and a1, a1, a4 @@ -2349,12 +2328,11 @@ ; RV64I-NEXT: addiw a4, a4, 819 ; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev2b_i32: @@ -2468,7 +2446,7 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev0_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 @@ -2477,7 +2455,7 @@ ; RV64I-NEXT: addiw a3, a3, 1365 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: lui a4, 838861 ; RV64I-NEXT: addiw a4, a4, -820 ; RV64I-NEXT: and a1, a1, a4 @@ -2486,17 +2464,16 @@ ; RV64I-NEXT: addiw a5, a5, 819 ; RV64I-NEXT: and a0, a0, a5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slliw a1, a0, 1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slliw a1, a0, 2 ; RV64I-NEXT: and a1, a1, a4 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: grev0_i32: @@ -2702,10 +2679,9 @@ ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: bswap_i32: @@ -2897,7 +2873,7 @@ ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 @@ -2905,23 +2881,22 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 ; RV64I-NEXT: lui a2, 209715 ; RV64I-NEXT: addiw a2, a2, 819 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: bitreverse_i32: @@ -2950,7 +2925,7 @@ ; RV64I-NEXT: slli a3, a0, 8 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: srli a2, a0, 4 @@ -2958,14 +2933,14 @@ ; RV64I-NEXT: addiw a3, a3, -241 ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: srli a2, a0, 2 ; RV64I-NEXT: lui a3, 209715 ; RV64I-NEXT: addiw a3, a3, 819 ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: srli a2, a0, 1 ; RV64I-NEXT: lui a3, 349525 @@ -3084,21 +3059,18 @@ define i32 @bswap_rotr_i32(i32 %a) { ; RV64I-LABEL: bswap_rotr_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: lui a2, 4080 ; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: srliw a0, a0, 8 +; RV64I-NEXT: andi a0, a0, -256 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a1, a0, a1 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: srliw a0, a0, 16 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: slliw a0, a0, 16 +; RV64I-NEXT: srliw a1, a1, 16 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: bswap_rotr_i32: @@ -3118,20 +3090,17 @@ define i32 @bswap_rotl_i32(i32 %a) { ; RV64I-LABEL: bswap_rotl_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: srliw a2, a0, 8 +; RV64I-NEXT: andi a2, a2, -256 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a1, a0, a1 -; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: slliw a1, a1, 16 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -3161,7 +3130,7 @@ ; RV64I-NEXT: slli a3, a0, 8 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 @@ -3169,29 +3138,29 @@ ; RV64I-NEXT: addiw a3, a3, -241 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 ; RV64I-NEXT: lui a3, 209715 ; RV64I-NEXT: addiw a3, a3, 819 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a3, 349525 ; RV64I-NEXT: addiw a3, a3, 1365 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: srliw a1, a0, 8 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: srliw a2, a0, 24 ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -3704,9 +3673,8 @@ ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slliw a1, a1, 16 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64B-LABEL: pack_i32: