Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4978,8 +4978,13 @@ if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. - bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); - bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + auto HasOperation = [this] (unsigned Opc, EVT T) { + if (LegalOperations) + return TLI.isOperationLegal(Opc, T); + return TLI.isOperationLegalOrCustom(Opc, T); + }; + bool HasROTL = HasOperation(ISD::ROTL, VT); + bool HasROTR = HasOperation(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Check for truncated rotate. Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3900,6 +3900,45 @@ ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::ROTL: + case ISD::ROTR: { + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); + EVT ResVT = Node->getValueType(0); + EVT OpVT = Op0.getValueType(); + assert(OpVT == ResVT && + "The result and the operand types of rotate should match"); + EVT ShVT = Op1.getValueType(); + SDValue Width = DAG.getConstant(OpVT.getSizeInBits(), dl, ShVT); + SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RevRot, ResVT)) { + Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); + break; + } + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) + // + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, + Width, DAG.getConstant(1, dl, ShVT)); + SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, + DAG.getConstant(0, dl, ShVT), Op1); + SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); + SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + + SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, + DAG.getNode(ShOpc, dl, ResVT, Op0, And0), + DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); + Results.push_back(Or); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -159,6 +159,7 @@ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1356,15 +1356,14 @@ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BSWAP, MVT::i32, Legal); setOperationAction(ISD::BSWAP, MVT::i64, Legal); - setOperationAction(ISD::MUL, MVT::i64, Legal); for (unsigned IntExpOp : { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { - setOperationAction(IntExpOp, MVT::i32, Expand); - setOperationAction(IntExpOp, MVT::i64, Expand); + for (MVT VT : MVT::integer_valuetypes()) + setOperationAction(IntExpOp, VT, Expand); } for (unsigned FPExpOp : @@ -1508,6 +1507,10 @@ // Subtarget-specific operation actions. // + if (Subtarget.hasV60TOps()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Custom); + } if (Subtarget.hasV5TOps()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FADD, MVT::f64, Expand); @@ -2093,6 +2096,13 @@ } SDValue +HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + if (isa(Op.getOperand(1).getNode())) + return Op; + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); @@ -2793,6 +2803,7 @@ case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -300,6 +300,7 @@ def Sub: pf2; def Or: pf2; def Srl: pf2; def Mul: pf2; def Xor: pf2; def Shl: pf2; +def Rol: pf2; // --(1) Immediate ------------------------------------------------------- // @@ -988,6 +989,10 @@ def: OpR_RR_pat; def: OpR_RR_pat; +let Predicates = [HasV60T] in { + def: OpR_RI_pat; + def: OpR_RI_pat; +} def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; @@ -1033,6 +1038,20 @@ def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; + + let Predicates = [HasV60T] in { + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { Index: test/CodeGen/Hexagon/rotate.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/rotate.ll @@ -0,0 +1,183 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: f0 +; CHECK: r0 = rol(r0,#7) +define dso_local i32 @f0(i32 %a0) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a0, 7 + %v1 = lshr i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f1 +; No variable-shift rotates. +; CHECK-NOT: rol +define dso_local i32 @f1(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = lshr i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f2 +; CHECK: r0 = rol(r0,#25) +define dso_local i32 @f2(i32 %a0) local_unnamed_addr #0 { +b0: + %v0 = lshr i32 %a0, 7 + %v1 = shl i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f3 +; No variable-shift rotates. +; CHECK-NOT: rol +define dso_local i32 @f3(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = lshr i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = shl i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f4 +; CHECK: r1:0 = rol(r1:0,#7) +define dso_local i64 @f4(i64 %a0) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a0, 7 + %v1 = lshr i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f5 +; CHECK-NOT: rol +define dso_local i64 @f5(i64 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = shl i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = lshr i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f6 +; CHECK: r1:0 = rol(r1:0,#57) +define dso_local i64 @f6(i64 %a0) local_unnamed_addr #0 { +b0: + %v0 = lshr i64 %a0, 7 + %v1 = shl i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f7 +; CHECK-NOT: rol +define dso_local i64 @f7(i64 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = lshr i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = shl i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f8 +; CHECK: r0 += rol(r1,#7) +define dso_local i32 @f8(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = add i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f9 +; CHECK: r0 -= rol(r1,#7) +define dso_local i32 @f9(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = sub i32 %a0, %v2 + ret i32 %v3 +} + +; CHECK-LABEL: f10 +; CHECK: r0 &= rol(r1,#7) +define dso_local i32 @f10(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = and i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f12 +; CHECK: r0 ^= rol(r1,#7) +define dso_local i32 @f12(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = xor i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f13 +; CHECK: r1:0 += rol(r3:2,#7) +define dso_local i64 @f13(i64 %a0, i64 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = add i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f14 +; CHECK: r1:0 -= rol(r3:2,#7) +define dso_local i64 @f14(i64 %a0, i64 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = sub i64 %a0, %v2 + ret i64 %v3 +} + +; CHECK-LABEL: f15 +; CHECK: r1:0 &= rol(r3:2,#7) +define dso_local i64 @f15(i64 %a0, i64 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = and i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f17 +; CHECK: r1:0 ^= rol(r3:2,#7) +define dso_local i64 @f17(i64 %a0, i64 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = xor i64 %v2, %a0 + ret i64 %v3 +} + +attributes #0 = { norecurse nounwind readnone "target-cpu"="hexagonv60" } Index: test/CodeGen/Hexagon/rotl-i64.ll =================================================================== --- test/CodeGen/Hexagon/rotl-i64.ll +++ test/CodeGen/Hexagon/rotl-i64.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: asl +; CHECK: rol ; Function Attrs: nounwind define fastcc void @f0() #0 { @@ -24,7 +24,7 @@ br label %b3 } -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-cpu"="hexagonv60" } !0 = !{!1, !1, i64 0} !1 = !{!"long long", !2}