diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1354,6 +1354,7 @@ // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) // the inner pattern will first be matched as GREVI and then the outer // pattern will be matched to GORC via the first rule above. +// 4. (or (rotl/rotr x, bitwidth/2), x) static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { EVT VT = Op.getValueType(); @@ -1363,15 +1364,29 @@ SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { + if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && + isPowerOf2_32(Reverse.getConstantOperandVal(1))) + return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); + // We can also form GORCI from ROTL/ROTR by half the bitwidth. + if ((Reverse.getOpcode() == ISD::ROTL || + Reverse.getOpcode() == ISD::ROTR) && + Reverse.getOperand(0) == X && + isa(Reverse.getOperand(1))) { + uint64_t RotAmt = Reverse.getConstantOperandVal(1); + if (RotAmt == (VT.getSizeInBits() / 2)) + return DAG.getNode( + RISCVISD::GORCI, DL, VT, X, + DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); + } + return SDValue(); + }; + // Check for either commutable permutation of (or (GREVI x, shamt), x) - for (const auto &OpPair : - {std::make_pair(Op0, Op1), std::make_pair(Op1, Op0)}) { - if (OpPair.first.getOpcode() == RISCVISD::GREVI && - OpPair.first.getOperand(0) == OpPair.second && - isPowerOf2_32(OpPair.first.getConstantOperandVal(1))) - return DAG.getNode(RISCVISD::GORCI, DL, VT, OpPair.second, - OpPair.first.getOperand(1)); - } + if (SDValue V = MatchOROfReverse(Op0, Op1)) + return V; + if (SDValue V = MatchOROfReverse(Op1, Op0)) + return V; // OR is commutable so canonicalize its OR operand to the left if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll @@ -813,6 +813,52 @@ ret i32 %or2 } +define i32 @gorc16_rotl_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc16_rotl_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc16_rotl_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc16 a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc16_rotl_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc16 a0, a0 +; RV32IBP-NEXT: ret + %rot = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16) + %or = or i32 %rot, %a + ret i32 %or +} + +define i32 @gorc16_rotr_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc16_rotr_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a2, a0, 16 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc16_rotr_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc16 a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc16_rotr_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc16 a0, a0 +; RV32IBP-NEXT: ret + %rot = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16) + %or = or i32 %rot, %a + ret i32 %or +} + define i64 @gorc16_i64(i64 %a) nounwind { ; RV32I-LABEL: gorc16_i64: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -896,6 +896,52 @@ ret i32 %or2 } +define i32 @gorc16_rotl_i32(i32 %a) nounwind { +; RV64I-LABEL: gorc16_rotl_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc16_rotl_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 16 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc16_rotl_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 16 +; RV64IBP-NEXT: ret + %rot = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16) + %or = or i32 %rot, %a + ret i32 %or +} + +define i32 @gorc16_rotr_i32(i32 %a) nounwind { +; RV64I-LABEL: gorc16_rotr_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: srliw a2, a0, 16 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc16_rotr_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 16 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc16_rotr_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 16 +; RV64IBP-NEXT: ret + %rot = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16) + %or = or i32 %rot, %a + ret i32 %or +} + define i64 @gorc16_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc16_i64: ; RV64I: # %bb.0: @@ -1223,6 +1269,52 @@ ret i64 %or3b } +define i64 @gorc32_rotl(i64 %a) nounwind { +; RV64I-LABEL: gorc32_rotl: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: slli a2, a0, 32 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc32_rotl: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc32 a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc32_rotl: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc32 a0, a0 +; RV64IBP-NEXT: ret + %rot = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32) + %or = or i64 %rot, %a + ret i64 %or +} + +define i64 @gorc32_rotr(i64 %a) nounwind { +; RV64I-LABEL: gorc32_rotr: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a2, a0, 32 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc32_rotr: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc32 a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc32_rotr: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc32 a0, a0 +; RV64IBP-NEXT: ret + %rot = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32) + %or = or i64 %rot, %a + ret i64 %or +} + define signext i32 @grev1_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev1_i32: ; RV64I: # %bb.0: