diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -135,6 +135,8 @@ /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; /// Return the entry encoding for a jump table in the current function. /// The returned value is a member of the MachineJumpTableInfo::JTEntryKind diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -101,6 +101,10 @@ setOperationAction(OP, MVT::i32, Expand); } + for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}) { + setOperationAction(OP, MVT::i32, Custom); + } + // Add/Sub overflow ops with MVT::Glues are lowered to CCR dependences. for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { setOperationAction(ISD::ADDC, VT, Custom); @@ -1315,6 +1319,97 @@ // Custom Lower //===----------------------------------------------------------------------===// +SDValue M68kTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt-XLEN) + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusXLen = DAG.getConstant(-32, DL, VT); + SDValue XLenMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); + SDValue ShiftRightLo = + DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + +SDValue M68kTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // SRA expansion: + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt-XLEN); + // Hi = Hi >>s (XLEN-1) + // + // SRL expansion: + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt-XLEN); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusXLen = DAG.getConstant(-32, DL, VT); + SDValue XLenMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); + SDValue ShiftLeftHi = + DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); + SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); + SDValue HiFalse = + IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + SDValue M68kTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1354,6 +1449,12 @@ return LowerVASTART(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return LowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG, false); } } diff --git a/llvm/test/CodeGen/M68k/Arith/bitwise.ll b/llvm/test/CodeGen/M68k/Arith/bitwise.ll --- a/llvm/test/CodeGen/M68k/Arith/bitwise.ll +++ b/llvm/test/CodeGen/M68k/Arith/bitwise.ll @@ -230,3 +230,124 @@ %1 = xor i32 %a, 305419896 ret i32 %1 } + +define i64 @lshr64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: lshr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #16, %sp +; CHECK-NEXT: movem.l %d2-%d4, (4,%sp) ; 16-byte Folded Spill +; CHECK-NEXT: move.l (32,%sp), %d3 +; CHECK-NEXT: move.l (20,%sp), %d2 +; CHECK-NEXT: move.l %d3, %d1 +; CHECK-NEXT: add.l #-32, %d1 +; CHECK-NEXT: move.w %ccr, (2,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: bmi .LBB18_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l #0, %d0 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bpl .LBB18_5 +; CHECK-NEXT: .LBB18_4: +; CHECK-NEXT: move.l #31, %d4 +; CHECK-NEXT: sub.l %d3, %d4 +; CHECK-NEXT: lsl.l #1, %d2 +; CHECK-NEXT: move.l (24,%sp), %d1 +; CHECK-NEXT: lsl.l %d4, %d2 +; CHECK-NEXT: lsr.l %d3, %d1 +; CHECK-NEXT: or.l %d2, %d1 +; CHECK-NEXT: bra .LBB18_6 +; CHECK-NEXT: .LBB18_1: +; CHECK-NEXT: move.l %d2, %d0 +; CHECK-NEXT: lsr.l %d3, %d0 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bmi .LBB18_4 +; CHECK-NEXT: .LBB18_5: +; CHECK-NEXT: lsr.l %d1, %d2 +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: .LBB18_6: +; CHECK-NEXT: movem.l (4,%sp), %d2-%d4 ; 16-byte Folded Reload +; CHECK-NEXT: adda.l #16, %sp +; CHECK-NEXT: rts + %1 = lshr i64 %a, %b + ret i64 %1 +} + +define i64 @ashr64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: ashr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #16, %sp +; CHECK-NEXT: movem.l %d2-%d4, (4,%sp) ; 16-byte Folded Spill +; CHECK-NEXT: move.l (32,%sp), %d2 +; CHECK-NEXT: move.l #31, %d3 +; CHECK-NEXT: move.l (20,%sp), %d0 +; CHECK-NEXT: move.l %d2, %d4 +; CHECK-NEXT: add.l #-32, %d4 +; CHECK-NEXT: move.w %ccr, (2,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: bmi .LBB19_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l %d0, %d1 +; CHECK-NEXT: asr.l %d4, %d1 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bpl .LBB19_5 +; CHECK-NEXT: .LBB19_4: +; CHECK-NEXT: asr.l %d2, %d0 +; CHECK-NEXT: bra .LBB19_6 +; CHECK-NEXT: .LBB19_1: +; CHECK-NEXT: move.l %d3, %d1 +; CHECK-NEXT: sub.l %d2, %d1 +; CHECK-NEXT: move.l %d0, %d4 +; CHECK-NEXT: lsl.l #1, %d4 +; CHECK-NEXT: lsl.l %d1, %d4 +; CHECK-NEXT: move.l (24,%sp), %d1 +; CHECK-NEXT: lsr.l %d2, %d1 +; CHECK-NEXT: or.l %d4, %d1 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bmi .LBB19_4 +; CHECK-NEXT: .LBB19_5: +; CHECK-NEXT: asr.l %d3, %d0 +; CHECK-NEXT: .LBB19_6: +; CHECK-NEXT: movem.l (4,%sp), %d2-%d4 ; 16-byte Folded Reload +; CHECK-NEXT: adda.l #16, %sp +; CHECK-NEXT: rts + %1 = ashr i64 %a, %b + ret i64 %1 +} + +define i64 @shl64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: shl64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #16, %sp +; CHECK-NEXT: movem.l %d2-%d4, (4,%sp) ; 16-byte Folded Spill +; CHECK-NEXT: move.l (32,%sp), %d3 +; CHECK-NEXT: move.l (24,%sp), %d2 +; CHECK-NEXT: move.l %d3, %d0 +; CHECK-NEXT: add.l #-32, %d0 +; CHECK-NEXT: move.w %ccr, (2,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: bmi .LBB20_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l #0, %d1 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bpl .LBB20_5 +; CHECK-NEXT: .LBB20_4: +; CHECK-NEXT: move.l #31, %d4 +; CHECK-NEXT: sub.l %d3, %d4 +; CHECK-NEXT: lsr.l #1, %d2 +; CHECK-NEXT: move.l (20,%sp), %d0 +; CHECK-NEXT: lsr.l %d4, %d2 +; CHECK-NEXT: lsl.l %d3, %d0 +; CHECK-NEXT: or.l %d2, %d0 +; CHECK-NEXT: bra .LBB20_6 +; CHECK-NEXT: .LBB20_1: +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: lsl.l %d3, %d1 +; CHECK-NEXT: move.w (2,%sp), %ccr ; 1-byte Folded Reload +; CHECK-NEXT: bmi .LBB20_4 +; CHECK-NEXT: .LBB20_5: +; CHECK-NEXT: lsl.l %d0, %d2 +; CHECK-NEXT: move.l %d2, %d0 +; CHECK-NEXT: .LBB20_6: +; CHECK-NEXT: movem.l (4,%sp), %d2-%d4 ; 16-byte Folded Reload +; CHECK-NEXT: adda.l #16, %sp +; CHECK-NEXT: rts + %1 = shl i64 %a, %b + ret i64 %1 +}