diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -46,6 +46,10 @@ // instructions. ROLW, RORW, + // RV64IB funnel shifts, with the semantics of the named RISC-V instructions, + // but the same operand order as fshl/fshr intrinsics. + FSRW, + FSLW, // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X. // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -181,6 +181,11 @@ if (Subtarget.hasStdExtZbt()) { setOperationAction(ISD::FSHL, XLenVT, Legal); setOperationAction(ISD::FSHR, XLenVT, Legal); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::FSHL, MVT::i32, Custom); + setOperationAction(ISD::FSHR, MVT::i32, Custom); + } } ISD::CondCode FPCCToExtend[] = { @@ -1091,6 +1096,26 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); break; } + case ISD::FSHL: + case ISD::FSHR: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. + // Mask the shift amount to 5 bits. + NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, + DAG.getConstant(0x1f, DL, MVT::i64)); + unsigned Opc = + N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; + SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); + break; + } } } @@ -1322,6 +1347,24 @@ } break; } + case RISCVISD::FSLW: + case RISCVISD::FSRW: { + // Only the lower 32 bits of Values and lower 6 bits of shift amount are + // read. + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue ShAmt = N->getOperand(2); + APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); + APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); + if (SimplifyDemandedBits(Op0, OpMask, DCI) || + SimplifyDemandedBits(Op1, OpMask, DCI) || + SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } + break; + } case RISCVISD::GREVIW: case RISCVISD::GORCIW: { // Only the lower 32 bits of the first operand are read @@ -1454,6 +1497,8 @@ case RISCVISD::RORW: case RISCVISD::GREVIW: case RISCVISD::GORCIW: + case RISCVISD::FSLW: + case RISCVISD::FSRW: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -2951,6 +2996,8 @@ NODE_NAME_CASE(REMUW) NODE_NAME_CASE(ROLW) NODE_NAME_CASE(RORW) + NODE_NAME_CASE(FSLW) + NODE_NAME_CASE(FSRW) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -17,8 +17,10 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>; -def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>; +def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>; +def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>; +def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>; +def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>; def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; @@ -920,21 +922,13 @@ } // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbt, IsRV64] in { -def : Pat<(sext_inreg (fshl GPR:$rs1, (shl GPR:$rs3, (i64 32)), - (and GPR:$rs2, (i64 31))), - i32), - (FSLW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>; -def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)), - (or GPR:$rs2, (i64 32))), - i32), - (FSRW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>; -def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)), - uimm6gt32:$shamt), - i32), - (FSRIW GPR:$rs1, GPR:$rs3, (ImmSub32 uimm6gt32:$shamt))>; -def : Pat<(sext_inreg (fshl GPR:$rs3, (shl GPR:$rs1, (i64 32)), - uimm5:$shamt), - i32), +def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2), + (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2), + (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt), + (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>; +def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt), (FSRIW GPR:$rs1, GPR:$rs3, (ImmROTL2RW uimm5:$shamt))>; } // Predicates = [HasStdExtZbt, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll @@ -134,7 +134,6 @@ } ; Similar to fshl_i32 but result is not sign extended. -; FIXME: This should use fslw define void @fshl_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind { ; RV64I-LABEL: fshl_i32_nosext: ; RV64I: # %bb.0: @@ -150,19 +149,15 @@ ; ; RV64IB-LABEL: fshl_i32_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: slli a1, a1, 32 ; RV64IB-NEXT: andi a2, a2, 31 -; RV64IB-NEXT: andi a2, a2, 63 -; RV64IB-NEXT: fsl a0, a0, a1, a2 +; RV64IB-NEXT: fslw a0, a0, a1, a2 ; RV64IB-NEXT: sw a0, 0(a3) ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: fshl_i32_nosext: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slli a1, a1, 32 ; RV64IBT-NEXT: andi a2, a2, 31 -; RV64IBT-NEXT: andi a2, a2, 63 -; RV64IBT-NEXT: fsl a0, a0, a1, a2 +; RV64IBT-NEXT: fslw a0, a0, a1, a2 ; RV64IBT-NEXT: sw a0, 0(a3) ; RV64IBT-NEXT: ret %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) @@ -227,7 +222,6 @@ } ; Similar to fshr_i32 but result is not sign extended. -; FIXME: This should use fsrw define void @fshr_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind { ; RV64I-LABEL: fshr_i32_nosext: ; RV64I: # %bb.0: @@ -242,19 +236,15 @@ ; ; RV64IB-LABEL: fshr_i32_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: ori a2, a2, 32 -; RV64IB-NEXT: andi a2, a2, 63 -; RV64IB-NEXT: fsr a0, a1, a0, a2 +; RV64IB-NEXT: andi a2, a2, 31 +; RV64IB-NEXT: fsrw a0, a1, a0, a2 ; RV64IB-NEXT: sw a0, 0(a3) ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: fshr_i32_nosext: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slli a1, a1, 32 -; RV64IBT-NEXT: ori a2, a2, 32 -; RV64IBT-NEXT: andi a2, a2, 63 -; RV64IBT-NEXT: fsr a0, a1, a0, a2 +; RV64IBT-NEXT: andi a2, a2, 31 +; RV64IBT-NEXT: fsrw a0, a1, a0, a2 ; RV64IBT-NEXT: sw a0, 0(a3) ; RV64IBT-NEXT: ret %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) @@ -312,7 +302,6 @@ } ; Similar to fshr_i32 but result is not sign extended. -; FIXME: This should use fsriw define void @fshri_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind { ; RV64I-LABEL: fshri_i32_nosext: ; RV64I: # %bb.0: @@ -324,15 +313,13 @@ ; ; RV64IB-LABEL: fshri_i32_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: fsri a0, a1, a0, 37 +; RV64IB-NEXT: fsriw a0, a1, a0, 5 ; RV64IB-NEXT: sw a0, 0(a2) ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: fshri_i32_nosext: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slli a1, a1, 32 -; RV64IBT-NEXT: fsri a0, a1, a0, 37 +; RV64IBT-NEXT: fsriw a0, a1, a0, 5 ; RV64IBT-NEXT: sw a0, 0(a2) ; RV64IBT-NEXT: ret %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5) @@ -384,7 +371,6 @@ } ; Similar to fshl_i32 but result is not sign extended. -; FIXME: This should use fsriw define void @fshli_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind { ; RV64I-LABEL: fshli_i32_nosext: ; RV64I: # %bb.0: @@ -396,15 +382,13 @@ ; ; RV64IB-LABEL: fshli_i32_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: fsri a0, a1, a0, 59 +; RV64IB-NEXT: fsriw a0, a1, a0, 27 ; RV64IB-NEXT: sw a0, 0(a2) ; RV64IB-NEXT: ret ; ; RV64IBT-LABEL: fshli_i32_nosext: ; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slli a1, a1, 32 -; RV64IBT-NEXT: fsri a0, a1, a0, 59 +; RV64IBT-NEXT: fsriw a0, a1, a0, 27 ; RV64IBT-NEXT: sw a0, 0(a2) ; RV64IBT-NEXT: ret %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 5)