Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -568,6 +568,16 @@ /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); + /// Helper function to determine whether the target supports operation + /// given by \p Opcode for type \p VT, that is, whether the operation + /// is legal or custom before legalizing operations, and whether is + /// legal (but not custom) after legalization. + bool hasOperation(unsigned Opcode, EVT VT) { + if (LegalOperations) + return TLI.isOperationLegal(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT); + } + public: /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -5033,8 +5043,8 @@ if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. - bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); - bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + bool HasROTL = hasOperation(ISD::ROTL, VT); + bool HasROTR = hasOperation(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Check for truncated rotate. Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3899,6 +3899,47 @@ ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::ROTL: + case ISD::ROTR: { + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); + EVT ResVT = Node->getValueType(0); + EVT OpVT = Op0.getValueType(); + assert(OpVT == ResVT && + "The result and the operand types of rotate should match"); + EVT ShVT = Op1.getValueType(); + SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RevRot, ResVT)) { + SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); + break; + } + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) + // + assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, + Width, DAG.getConstant(1, dl, ShVT)); + SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, + DAG.getConstant(0, dl, ShVT), Op1); + SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); + SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + + SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, + DAG.getNode(ShOpc, dl, ResVT, Op0, And0), + DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); + Results.push_back(Or); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -159,6 +159,7 @@ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1507,6 +1507,10 @@ // Subtarget-specific operation actions. // + if (Subtarget.hasV60TOps()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Custom); + } if (Subtarget.hasV5TOps()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FADD, MVT::f64, Expand); @@ -2092,6 +2096,13 @@ } SDValue +HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + if (isa(Op.getOperand(1).getNode())) + return Op; + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); @@ -2792,6 +2803,7 @@ case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -300,6 +300,7 @@ def Sub: pf2; def Or: pf2; def Srl: pf2; def Mul: pf2; def Xor: pf2; def Shl: pf2; +def Rol: pf2; // --(1) Immediate ------------------------------------------------------- // @@ -988,6 +989,10 @@ def: OpR_RR_pat; def: OpR_RR_pat; +let Predicates = [HasV60T] in { + def: OpR_RI_pat; + def: OpR_RI_pat; +} def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; @@ -1033,6 +1038,20 @@ def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; + + let Predicates = [HasV60T] in { + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { Index: test/CodeGen/Hexagon/rotate.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/rotate.ll @@ -0,0 +1,207 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: f0 +; CHECK: r0 = rol(r0,#7) +define i32 @f0(i32 %a0) #0 { +b0: + %v0 = shl i32 %a0, 7 + %v1 = lshr i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f1 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate left by %a1(r1). +; CHECK: r[[R10:[0-9]+]] = sub(#0,r1) +; CHECK: r[[R11:[0-9]+]] = and(r1,#31) +; CHECK: r[[R12:[0-9]+]] = and(r[[R10]],#31) +; CHECK: r[[R13:[0-9]+]] = asl(r0,r[[R11]]) +; CHECK: r[[R13]] |= lsr(r0,r[[R12]]) +define i32 @f1(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = lshr i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f2 +; CHECK: r0 = rol(r0,#25) +define i32 @f2(i32 %a0) #0 { +b0: + %v0 = lshr i32 %a0, 7 + %v1 = shl i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f3 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate right by %a1(r1) that became a rotate left by 32-%a1. +; CHECK: r[[R30:[0-9]+]] = sub(#32,r1) +; CHECK: r[[R31:[0-9]+]] = and(r[[R30]],#31) +; CHECK: r[[R32:[0-9]+]] = sub(#0,r1) +; CHECK: r[[R33:[0-9]+]] = and(r[[R32]],#31) +; CHECK: r[[R34:[0-9]+]] = asl(r0,r[[R31]]) +; CHECK: r[[R34]] |= lsr(r0,r[[R33]]) +define i32 @f3(i32 %a0, i32 %a1) #0 { +b0: + %v0 = lshr i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = shl i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f4 +; CHECK: r1:0 = rol(r1:0,#7) +define i64 @f4(i64 %a0) #0 { +b0: + %v0 = shl i64 %a0, 7 + %v1 = lshr i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f5 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate left by %a1(r2). +; CHECK: r[[R50:[0-9]+]] = sub(#0,r2) +; CHECK: r[[R51:[0-9]+]] = and(r2,#63) +; CHECK: r[[R52:[0-9]+]] = and(r[[R50]],#63) +; CHECK: r[[R53:[0-9]+]]:[[R54:[0-9]+]] = asl(r1:0,r[[R51]]) +; CHECK: r[[R53]]:[[R54]] |= lsr(r1:0,r[[R52]]) +define i64 @f5(i64 %a0, i32 %a1) #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = shl i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = lshr i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f6 +; CHECK: r1:0 = rol(r1:0,#57) +define i64 @f6(i64 %a0) #0 { +b0: + %v0 = lshr i64 %a0, 7 + %v1 = shl i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f7 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate right by %a1(r2) that became a rotate left by 64-%a1. +; CHECK: r[[R70:[0-9]+]] = sub(#64,r2) +; CHECK: r[[R71:[0-9]+]] = and(r[[R70]],#63) +; CHECK: r[[R72:[0-9]+]] = sub(#0,r[[R70]]) +; CHECK: r[[R73:[0-9]+]] = and(r[[R72]],#63) +; CHECK: r[[R74:[0-9]+]]:[[R75:[0-9]+]] = asl(r1:0,r[[R71]]) +; CHECK: r[[R74]]:[[R75]] |= lsr(r1:0,r[[R73]]) +define i64 @f7(i64 %a0, i32 %a1) #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = lshr i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = shl i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f8 +; CHECK: r0 += rol(r1,#7) +define i32 @f8(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = add i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f9 +; CHECK: r0 -= rol(r1,#7) +define i32 @f9(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = sub i32 %a0, %v2 + ret i32 %v3 +} + +; CHECK-LABEL: f10 +; CHECK: r0 &= rol(r1,#7) +define i32 @f10(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = and i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f12 +; CHECK: r0 ^= rol(r1,#7) +define i32 @f12(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = xor i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f13 +; CHECK: r1:0 += rol(r3:2,#7) +define i64 @f13(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = add i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f14 +; CHECK: r1:0 -= rol(r3:2,#7) +define i64 @f14(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = sub i64 %a0, %v2 + ret i64 %v3 +} + +; CHECK-LABEL: f15 +; CHECK: r1:0 &= rol(r3:2,#7) +define i64 @f15(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = and i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f17 +; CHECK: r1:0 ^= rol(r3:2,#7) +define i64 @f17(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = xor i64 %v2, %a0 + ret i64 %v3 +} + +attributes #0 = { norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" } Index: test/CodeGen/Hexagon/rotl-i64.ll =================================================================== --- test/CodeGen/Hexagon/rotl-i64.ll +++ test/CodeGen/Hexagon/rotl-i64.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: asl +; CHECK: rol ; Function Attrs: nounwind define fastcc void @f0() #0 { @@ -24,7 +24,7 @@ br label %b3 } -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-cpu"="hexagonv60" } !0 = !{!1, !1, i64 0} !1 = !{!"long long", !2}