diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -50,7 +50,6 @@ bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt); // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -376,62 +376,6 @@ return true; } -// Check that it is a RORIW (i32 Right Rotate Immediate on RV64). -// We first check that it is the right node tree: -// -// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), -// (SRL (AND RS1, VC3), VC1))) -// -// Then we check that the constant operands respect these constraints: -// -// VC2 == 32 - VC1 -// VC3 | maskTrailingOnes(VC1) == 0xffffffff -// -// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 -// and VC3 being 0xffffffff after accounting for SimplifyDemandedBits removing -// some bits due to the right shift. - -bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && - Subtarget->getXLenVT() == MVT::i64 && - cast(N.getOperand(1))->getVT() == MVT::i32) { - if (N.getOperand(0).getOpcode() == ISD::OR) { - SDValue Or = N.getOperand(0); - SDValue Shl = Or.getOperand(0); - SDValue Srl = Or.getOperand(1); - - // OR is commutable so canonicalize SHL to LHS. - if (Srl.getOpcode() == ISD::SHL) - std::swap(Shl, Srl); - - if (Shl.getOpcode() == ISD::SHL && Srl.getOpcode() == ISD::SRL) { - if (Srl.getOperand(0).getOpcode() == ISD::AND) { - SDValue And = Srl.getOperand(0); - if (And.getOperand(0) == Shl.getOperand(0) && - isa(Srl.getOperand(1)) && - isa(Shl.getOperand(1)) && - isa(And.getOperand(1))) { - uint64_t VC1 = Srl.getConstantOperandVal(1); - uint64_t VC2 = Shl.getConstantOperandVal(1); - uint64_t VC3 = And.getConstantOperandVal(1); - // The mask needs to be 0xffffffff, but SimplifyDemandedBits may - // have removed lower bits that aren't necessary due to the right - // shift. - if (VC2 == (32 - VC1) && - (VC3 | maskTrailingOnes(VC1)) == 0xffffffff) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - } - } - } - } - return false; -} - // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -42,6 +42,10 @@ DIVW, DIVUW, REMUW, + // RV64IB rotates, directly matching the semantics of the named RISC-V + // instructions. + ROLW, + RORW, // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X. // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -151,7 +151,12 @@ setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); - if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) { + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { + if (Subtarget.is64Bit()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTR, MVT::i32, Custom); + } + } else { setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); } @@ -908,6 +913,10 @@ return RISCVISD::DIVUW; case ISD::UREM: return RISCVISD::REMUW; + case ISD::ROTL: + return RISCVISD::ROLW; + case ISD::ROTR: + return RISCVISD::RORW; case RISCVISD::GREVI: return RISCVISD::GREVIW; case RISCVISD::GORCI: @@ -1013,6 +1022,12 @@ return; Results.push_back(customLegalizeToWOp(N, DAG)); break; + case ISD::ROTL: + case ISD::ROTR: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToWOp(N, DAG)); + break; case ISD::SDIV: case ISD::UDIV: case ISD::UREM: @@ -1267,7 +1282,9 @@ } case RISCVISD::SLLW: case RISCVISD::SRAW: - case RISCVISD::SRLW: { + case RISCVISD::SRLW: + case RISCVISD::ROLW: + case RISCVISD::RORW: { // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -1392,6 +1409,8 @@ case RISCVISD::DIVW: case RISCVISD::DIVUW: case RISCVISD::REMUW: + case RISCVISD::ROLW: + case RISCVISD::RORW: case RISCVISD::GREVIW: case RISCVISD::GORCIW: // TODO: As the result is sign-extended, this is conservatively correct. A @@ -2829,6 +2848,8 @@ NODE_NAME_CASE(DIVW) NODE_NAME_CASE(DIVUW) NODE_NAME_CASE(REMUW) + NODE_NAME_CASE(ROLW) + NODE_NAME_CASE(RORW) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -17,6 +17,9 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>; +def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>; + def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; let RenderMethod = "addImmOperands"; @@ -655,7 +658,6 @@ def SLLIUWPat : ComplexPattern; def SLOIWPat : ComplexPattern; def SROIWPat : ComplexPattern; -def RORIWPat : ComplexPattern; let Predicates = [HasStdExtZbbOrZbp] in { def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>; @@ -724,17 +726,11 @@ let Predicates = [HasStdExtZbp, IsRV32] in { def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>; -// FIXME: Is grev better than rori? -def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>; -def : Pat<(rotr GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>; def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>; def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>; } // Predicates = [HasStdExtZbp, IsRV32] let Predicates = [HasStdExtZbp, IsRV64] in { -// FIXME: Is grev better than rori? -def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>; -def : Pat<(rotr GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>; def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>; def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; } // Predicates = [HasStdExtZbp, IsRV64] @@ -890,12 +886,14 @@ } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { -def : Pat<(or (riscv_sllw GPR:$rs1, GPR:$rs2), - (riscv_srlw GPR:$rs1, (ineg GPR:$rs2))), +def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2), (ROLW GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (riscv_sllw GPR:$rs1, (ineg GPR:$rs2)), - (riscv_srlw GPR:$rs1, GPR:$rs2)), +def : Pat<(riscv_rorw GPR:$rs1, GPR:$rs2), (RORW GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_rorw GPR:$rs1, uimm5:$rs2), + (RORIW GPR:$rs1, uimm5:$rs2)>; +def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), + (RORIW GPR:$rs1, (ImmROTL2RW uimm5:$rs2))>; } // Predicates = [HasStdExtZbbOrZbp, IsRV64] let Predicates = [HasStdExtZbs, IsRV64] in { @@ -916,10 +914,6 @@ (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in -def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt), - (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>; - let Predicates = [HasStdExtZbp, IsRV64] in { def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>; def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>; diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll @@ -1126,12 +1126,12 @@ ; ; RV32IB-LABEL: grev16_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: rev16 a0, a0 +; RV32IB-NEXT: rori a0, a0, 16 ; RV32IB-NEXT: ret ; ; RV32IBP-LABEL: grev16_i32: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: rev16 a0, a0 +; RV32IBP-NEXT: rori a0, a0, 16 ; RV32IBP-NEXT: ret %shl = shl i32 %a, 16 %shr = lshr i32 %a, 16 @@ -1152,12 +1152,12 @@ ; ; RV32IB-LABEL: grev16_i32_fshl: ; RV32IB: # %bb.0: -; RV32IB-NEXT: rev16 a0, a0 +; RV32IB-NEXT: rori a0, a0, 16 ; RV32IB-NEXT: ret ; ; RV32IBP-LABEL: grev16_i32_fshl: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: rev16 a0, a0 +; RV32IBP-NEXT: rori a0, a0, 16 ; RV32IBP-NEXT: ret %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16) ret i32 %or @@ -1173,12 +1173,12 @@ ; ; RV32IB-LABEL: grev16_i32_fshr: ; RV32IB: # %bb.0: -; RV32IB-NEXT: rev16 a0, a0 +; RV32IB-NEXT: rori a0, a0, 16 ; RV32IB-NEXT: ret ; ; RV32IBP-LABEL: grev16_i32_fshr: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: rev16 a0, a0 +; RV32IBP-NEXT: rori a0, a0, 16 ; RV32IBP-NEXT: ret %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16) ret i32 %or @@ -1197,14 +1197,14 @@ ; ; RV32IB-LABEL: grev16_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: rev16 a0, a0 -; RV32IB-NEXT: rev16 a1, a1 +; RV32IB-NEXT: rori a0, a0, 16 +; RV32IB-NEXT: rori a1, a1, 16 ; RV32IB-NEXT: ret ; ; RV32IBP-LABEL: grev16_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: rev16 a0, a0 -; RV32IBP-NEXT: rev16 a1, a1 +; RV32IBP-NEXT: rori a0, a0, 16 +; RV32IBP-NEXT: rori a1, a1, 16 ; RV32IBP-NEXT: ret %and = shl i64 %a, 16 %shl = and i64 %and, -281470681808896 diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll @@ -374,7 +374,6 @@ } ; Similar to rori_i32_fshl, but doesn't sign extend the result. -; FIXME: We should be using RORIW, but we need a sext_inreg. define void @rori_i32_fshl_nosext(i32 signext %a, i32* %x) nounwind { ; RV64I-LABEL: rori_i32_fshl_nosext: ; RV64I: # %bb.0: @@ -386,25 +385,19 @@ ; ; RV64IB-LABEL: rori_i32_fshl_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: srliw a2, a0, 1 -; RV64IB-NEXT: slli a0, a0, 31 -; RV64IB-NEXT: or a0, a0, a2 +; RV64IB-NEXT: roriw a0, a0, 1 ; RV64IB-NEXT: sw a0, 0(a1) ; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: rori_i32_fshl_nosext: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: srliw a2, a0, 1 -; RV64IBB-NEXT: slli a0, a0, 31 -; RV64IBB-NEXT: or a0, a0, a2 +; RV64IBB-NEXT: roriw a0, a0, 1 ; RV64IBB-NEXT: sw a0, 0(a1) ; RV64IBB-NEXT: ret ; ; RV64IBP-LABEL: rori_i32_fshl_nosext: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: srliw a2, a0, 1 -; RV64IBP-NEXT: slli a0, a0, 31 -; RV64IBP-NEXT: or a0, a0, a2 +; RV64IBP-NEXT: roriw a0, a0, 1 ; RV64IBP-NEXT: sw a0, 0(a1) ; RV64IBP-NEXT: ret %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31) @@ -440,7 +433,6 @@ } ; Similar to rori_i32_fshr, but doesn't sign extend the result. -; FIXME: We should be using RORIW, but we need a sext_inreg. define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind { ; RV64I-LABEL: rori_i32_fshr_nosext: ; RV64I: # %bb.0: @@ -452,25 +444,19 @@ ; ; RV64IB-LABEL: rori_i32_fshr_nosext: ; RV64IB: # %bb.0: -; RV64IB-NEXT: slli a2, a0, 1 -; RV64IB-NEXT: srliw a0, a0, 31 -; RV64IB-NEXT: or a0, a0, a2 +; RV64IB-NEXT: roriw a0, a0, 31 ; RV64IB-NEXT: sw a0, 0(a1) ; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: rori_i32_fshr_nosext: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: slli a2, a0, 1 -; RV64IBB-NEXT: srliw a0, a0, 31 -; RV64IBB-NEXT: or a0, a0, a2 +; RV64IBB-NEXT: roriw a0, a0, 31 ; RV64IBB-NEXT: sw a0, 0(a1) ; RV64IBB-NEXT: ret ; ; RV64IBP-LABEL: rori_i32_fshr_nosext: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: slli a2, a0, 1 -; RV64IBP-NEXT: srliw a0, a0, 31 -; RV64IBP-NEXT: or a0, a0, a2 +; RV64IBP-NEXT: roriw a0, a0, 31 ; RV64IBP-NEXT: sw a0, 0(a1) ; RV64IBP-NEXT: ret %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31) diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -1377,12 +1377,12 @@ ; ; RV64IB-LABEL: grev32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: rev32 a0, a0 +; RV64IB-NEXT: rori a0, a0, 32 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: grev32: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: rev32 a0, a0 +; RV64IBP-NEXT: rori a0, a0, 32 ; RV64IBP-NEXT: ret %shl = shl i64 %a, 32 %shr = lshr i64 %a, 32 @@ -1403,12 +1403,12 @@ ; ; RV64IB-LABEL: grev32_fshl: ; RV64IB: # %bb.0: -; RV64IB-NEXT: rev32 a0, a0 +; RV64IB-NEXT: rori a0, a0, 32 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: grev32_fshl: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: rev32 a0, a0 +; RV64IBP-NEXT: rori a0, a0, 32 ; RV64IBP-NEXT: ret %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32) ret i64 %or @@ -1424,12 +1424,12 @@ ; ; RV64IB-LABEL: grev32_fshr: ; RV64IB: # %bb.0: -; RV64IB-NEXT: rev32 a0, a0 +; RV64IB-NEXT: rori a0, a0, 32 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: grev32_fshr: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: rev32 a0, a0 +; RV64IBP-NEXT: rori a0, a0, 32 ; RV64IBP-NEXT: ret %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32) ret i64 %or