Index: lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- lib/Target/Mips/Mips64InstrInfo.td +++ lib/Target/Mips/Mips64InstrInfo.td @@ -316,10 +316,11 @@ let TwoOperandAliasConstraint = "$rd = $rs"; } -class ExtsCins: - InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, uimm5:$pos, uimm5:$lenm1), - !strconcat(opstr, " $rt, $rs, $pos, $lenm1"), - [(set GPR64Opnd:$rt, (Op GPR64Opnd:$rs, imm:$pos, imm:$lenm1))], +class ExtsCins: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm5:$pos, uimm5:$lenm1), + !strconcat(opstr, "\t$rt, $rs, $pos, $lenm1"), + [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$lenm1))], NoItinerary, FrmR, opstr> { let TwoOperandAliasConstraint = "$rt = $rs"; } @@ -380,12 +381,16 @@ ADD_FM<0x1c, 0x03>; // Extract a signed bit field /+32 -def EXTS : ExtsCins<"exts">, EXTS_FM<0x3a>; -def EXTS32: ExtsCins<"exts32">, EXTS_FM<0x3b>; +def EXTS : ExtsCins<"exts", GPR64Opnd, MipsExtS>, EXTS_FM<0x3a>; +def EXTS32: ExtsCins<"exts32", GPR64Opnd, MipsExtS32>, EXTS_FM<0x3b>; +let isCodeGenOnly = 1 in + def EXTS_i32 : ExtsCins<"exts", GPR32Opnd, MipsExtS>, EXTS_FM<0x3a>; // Clear and insert a bit field /+32 -def CINS : ExtsCins<"cins">, EXTS_FM<0x32>; -def CINS32: ExtsCins<"cins32">, EXTS_FM<0x33>; +def CINS : ExtsCins<"cins", GPR64Opnd, MipsCIns>, EXTS_FM<0x32>; +def CINS32: ExtsCins<"cins32", GPR64Opnd, MipsCIns32>, EXTS_FM<0x33>; +let isCodeGenOnly = 1 in + def CINS_i32 : ExtsCins<"cins", GPR32Opnd, MipsCIns>, EXTS_FM<0x32>; // Move to multiplier/product register def MTM0 : MoveToLOHI<"mtm0", GPR64Opnd, [MPL0, P0, P1, P2]>, MTMR_FM<0x08>; Index: lib/Target/Mips/MipsISelLowering.h =================================================================== --- lib/Target/Mips/MipsISelLowering.h +++ lib/Target/Mips/MipsISelLowering.h @@ -104,6 +104,12 @@ Ext, Ins, + // Octeon nodes for cins/cins32/exts/exts32. + CIns, + CIns32, + ExtS, + ExtS32, + // EXTR.W instrinsic nodes. EXTP, EXTPDP, Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -147,6 +147,10 @@ case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; case MipsISD::Ins: return "MipsISD::Ins"; + case MipsISD::CIns: return "MipsISD::CIns"; + case MipsISD::CIns32: return "MipsISD::CIns32"; + case MipsISD::ExtS: return "MipsISD::ExtS"; + case MipsISD::ExtS32: return "MipsISD::ExtS32"; case MipsISD::LWL: return "MipsISD::LWL"; case MipsISD::LWR: return "MipsISD::LWR"; case MipsISD::SWL: return "MipsISD::SWL"; @@ -432,6 +436,8 @@ setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); @@ -694,19 +700,23 @@ // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, size, pos + // If HasCnMips: pattern match CINS/CINS32 + // $dst = and (shl $src, pos), (2**size-1)< cins $src, pos, size (if pos < 32) + // cins32 $src, pos-32, size (otherwise) if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert()) return SDValue(); - SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); - unsigned ShiftRightOpc = ShiftRight.getOpcode(); + SDValue Shift = N->getOperand(0), Mask = N->getOperand(1); + unsigned ShiftOpc = Shift.getOpcode(); - // Op's first operand must be a shift right. - if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL) + // Op's first operand must be a shift. + if (ShiftOpc != ISD::SRA && ShiftOpc != ISD::SRL && ShiftOpc != ISD::SHL) return SDValue(); // The second operand of the shift must be an immediate. ConstantSDNode *CN; - if (!(CN = dyn_cast(ShiftRight.getOperand(1)))) + if (!(CN = dyn_cast(Shift.getOperand(1)))) return SDValue(); uint64_t Pos = CN->getZExtValue(); @@ -717,17 +727,45 @@ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); - // Return if the shifted mask does not start at bit 0 or the sum of its size - // and Pos exceeds the word's size. EVT ValTy = N->getValueType(0); - if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) - return SDValue(); - SDLoc DL(N); - return DAG.getNode(MipsISD::Ext, DL, ValTy, - ShiftRight.getOperand(0), - DAG.getConstant(Pos, DL, MVT::i32), - DAG.getConstant(SMSize, DL, MVT::i32)); + // Op's first operand must be a shift right. + if (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL) { + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Pos exceeds the word's size. + if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(MipsISD::Ext, DL, ValTy, + Shift.getOperand(0), + DAG.getConstant(Pos, DL, MVT::i32), + DAG.getConstant(SMSize, DL, MVT::i32)); + } + // Op's first operand must be a shift left and cnmips must be enabled. + else if (ShiftOpc == ISD::SHL && Subtarget.hasCnMips()) { + // Return if Size exceeds 32 bit or if the sum of Size and Pos + // exceeds the word's size. + if (SMPos != Pos || Pos >= ValTy.getSizeInBits() || SMSize == 0 + || SMSize >= 32 || Pos + SMSize > ValTy.getSizeInBits()) + return SDValue(); + + unsigned Opc; + if (Pos >= 32) { + Opc = MipsISD::CIns32; + Pos -= 32; + } + else + Opc = MipsISD::CIns; + + SDLoc DL(N); + return DAG.getNode(Opc, DL, ValTy, + Shift.getOperand(0), + DAG.getConstant(Pos, DL, MVT::i32), + DAG.getConstant(SMSize-1, DL, MVT::i32)); + } + + return SDValue(); } static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, @@ -813,6 +851,115 @@ return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); } +static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // If HasCnMips: pattern match CINS/CINSS32 + // $dst = shl (and $src, 2**size-1), pos + // => cins $src, pos, size (if pos < 32) + // cins32 $src, pos-32, size (otherwise) + if (DCI.isBeforeLegalizeOps() || !Subtarget.hasCnMips()) + return SDValue(); + + SDValue And = N->getOperand(0); + EVT ValTy = N->getValueType(0); + + // The first operand must be an and + if (And.getOpcode() != ISD::AND) + return SDValue(); + + // The second operand of the left shift must be an immediate less than the + // word's size. + ConstantSDNode *ShiftAmtNode; + uint64_t Pos; + if (!(ShiftAmtNode = dyn_cast(N->getOperand(1))) || + (Pos = ShiftAmtNode->getZExtValue()) >= ValTy.getSizeInBits()) + return SDValue(); + + // The second operand of the and must be an immediate which is a mask. + ConstantSDNode *AndMaskNode; + uint64_t Mask; + if (!(AndMaskNode = dyn_cast(And->getOperand(1))) || + !isMask_64(Mask = AndMaskNode->getZExtValue())) + return SDValue(); + + // The size must fit in 5 bits. + uint64_t Size = Log2_64_Ceil(Mask + 1); + if (Size >= 32) + return SDValue(); + unsigned Opc; + if (Pos >= 32) { + Opc = MipsISD::CIns32; + Pos -= 32; + } + else + Opc = MipsISD::CIns; + + SDLoc DL(N); + return DAG.getNode(Opc, DL, ValTy, + And.getOperand(0), + DAG.getConstant(Pos, DL, MVT::i32), + DAG.getConstant(Size - 1, DL, MVT::i32)); +} + +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // If HasCnMips: pattern match EXTS/EXTS32 + // $dst = sra (shl $src, v1), v2 + // => exts $src, pos, size (if pos < 32) + // exts32 $src, pos-32, size (otherwise) + if (DCI.isBeforeLegalizeOps() || !Subtarget.hasCnMips()) + return SDValue(); + + SDValue ShiftLeft = N->getOperand(0); + EVT ValTy = N->getValueType(0); + + // The first operand must be a left shift + if (ShiftLeft.getOpcode() != ISD::SHL) + return SDValue(); + + // The second operand of the right shift must be an immediate. + ConstantSDNode *ShiftRightValNode; + if (!(ShiftRightValNode = dyn_cast(N->getOperand(1)))) + return SDValue(); + + // The second operand of the left shift must be an immediate. + ConstantSDNode *ShiftLeftValNode; + if (!(ShiftLeftValNode = dyn_cast(ShiftLeft->getOperand(1)))) + return SDValue(); + + uint64_t Right = ShiftRightValNode->getZExtValue(); + uint64_t Left = ShiftLeftValNode->getZExtValue(); + + // The shift amounts must not exceed the word size. + // The left shift amount must not exceed the right shift amount. + if (Right >= ValTy.getSizeInBits() || Left >= ValTy.getSizeInBits() + || Left > Right) + return SDValue(); + + uint64_t Len = ValTy.getSizeInBits() - Right; + uint64_t Pos = Right - Left; + + // The length must fit in 5 bits. + if (Len >= 32) + return SDValue(); + + unsigned Opc; + if (Pos >= 32) { + Opc = MipsISD::ExtS32; + Pos -= 32; + } + else + Opc = MipsISD::ExtS; + + SDLoc DL(N); + return DAG.getNode(Opc, DL, ValTy, + ShiftLeft.getOperand(0), + DAG.getConstant(Pos, DL, MVT::i32), + DAG.getConstant(Len - 1, DL, MVT::i32)); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -834,6 +981,10 @@ return performORCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return performADDCombine(N, DAG, DCI, Subtarget); + case ISD::SHL: + return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); } return SDValue(); Index: lib/Target/Mips/MipsInstrInfo.td =================================================================== --- lib/Target/Mips/MipsInstrInfo.td +++ lib/Target/Mips/MipsInstrInfo.td @@ -126,6 +126,12 @@ def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; +// Octeon nodes for cins/cins32/exts/exts32 +def MipsCIns : SDNode<"MipsISD::CIns", SDT_Ext>; +def MipsCIns32 : SDNode<"MipsISD::CIns32", SDT_Ext>; +def MipsExtS : SDNode<"MipsISD::ExtS", SDT_Ext>; +def MipsExtS32 : SDNode<"MipsISD::ExtS32", SDT_Ext>; + def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def MipsLWR : SDNode<"MipsISD::LWR", SDTMipsLoadLR, Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -1082,9 +1082,11 @@ case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: - return performSHLCombine(N, DAG, DCI, Subtarget); + Val = performSHLCombine(N, DAG, DCI, Subtarget); + break; case ISD::SRA: - return performSRACombine(N, DAG, DCI, Subtarget); + Val = performSRACombine(N, DAG, DCI, Subtarget); + break; case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); case ISD::VSELECT: Index: test/CodeGen/Mips/octeon.ll =================================================================== --- test/CodeGen/Mips/octeon.ll +++ test/CodeGen/Mips/octeon.ll @@ -155,3 +155,107 @@ endif: ret i64 12 } + +define i64 @cins(i64 %a) nounwind { +; ALL-LABEL: cins: +; OCTEON: jr $ra +; OCTEON: cins $2, $4, 5, 1 +; MIPS64: dsll $[[T0:[0-9]+]], $4, 5 +; MIPS64: jr $ra +; MIPS64: andi $2, $[[T0]], 96 + %1 = shl i64 %a, 5 + %2 = and i64 %1, 96 + ret i64 %2 +} + +define i64 @cins2(i64 %a) nounwind { +; ALL-LABEL: cins2: +; OCTEON: jr $ra +; OCTEON: cins $2, $4, 5, 1 +; MIPS64: andi $[[T0:[0-9]+]], $4, 3 +; MIPS64: jr $ra +; MIPS64: dsll $2, $[[T0]], 5 + %1 = and i64 %a, 3 + %2 = shl i64 %1, 5 + ret i64 %2 +} + +define i32 @cins_i32(i32 %a) nounwind { +; ALL-LABEL: cins_i32: +; OCTEON: sll $[[T0:[0-9]+]], $4, 0 +; OCTEON: jr $ra +; OCTEON: cins $2, $[[T0]], 5, 1 +; MIPS64: sll $[[T0:[0-9]+]], $4, 0 +; MIPS64: sll $[[T1:[0-9]+]], $[[T0]], 5 +; MIPS64: jr $ra +; MIPS64: andi $2, $[[T1]], 96 + %1 = shl i32 %a, 5 + %2 = and i32 %1, 96 + ret i32 %2 +} + +define i32 @cins2_i32(i32 %a) nounwind { +; ALL-LABEL: cins2_i32: +; OCTEON: sll $[[T0:[0-9]+]], $4, 0 +; OCTEON: jr $ra +; OCTEON: cins $2, $[[T0]], 5, 1 +; MIPS64: sll $[[T0:[0-9]+]], $4, 0 +; MIPS64: andi $[[T1:[0-9]+]], $[[T0]], 3 +; MIPS64: jr $ra +; MIPS64: sll $2, $[[T1]], 5 + %1 = and i32 %a, 3 + %2 = shl i32 %1, 5 + ret i32 %2 +} + +define i64 @cins32(i64 %a) nounwind { +; ALL-LABEL: cins32: +; OCTEON: jr $ra +; OCTEON: cins32 $2, $4, 10, 3 +; MIPS64: dsll $[[T0:[0-9]+]], $4, 42 +; MIPS64: daddiu $[[T1:[0-9]+]], $zero, 15 +; MIPS64: dsll $[[T2:[0-9]+]], $[[T1]], 42 +; MIPS64: jr $ra +; MIPS64: and $2, $[[T0]], $[[T2]] + %1 = shl i64 %a, 42 + %2 = and i64 %1, 65970697666560 + ret i64 %2 +} + +define i64 @exts(i64 %a) nounwind { +; ALL-LABEL: exts: +; OCTEON: jr $ra +; OCTEON: exts $2, $4, 2, 2 +; MIPS64: dsll $[[T0:[0-9]+]], $4, 59 +; MIPS64: jr $ra +; MIPS64: dsra $2, $[[T0]], 61 + %1 = shl i64 %a, 59 + %2 = ashr i64 %1, 61 + ret i64 %2 +} + +define i32 @exts_i32(i32 %a) nounwind { +; ALL-LABEL: exts_i32: +; OCTEON: sll $[[T0:[0-9]+]], $4, 0 +; OCTEON: jr $ra +; OCTEON: exts $2, $[[T0]], 2, 2 +; MIPS64: sll $[[T0:[0-9]+]], $4, 0 +; MIPS64: sll $[[T1:[0-9]+]], $[[T0]], 27 +; MIPS64: jr $ra +; MIPS64: sra $2, $[[T1]], 29 + %1 = shl i32 %a, 27 + %2 = ashr i32 %1, 29 + ret i32 %2 +} + +define i64 @exts32(i64 %a) nounwind { +; ALL-LABEL: exts32: +; OCTEON: jr $ra +; OCTEON: exts32 $2, $4, 0, 3 +; MIPS64: dsll $[[T0:[0-9]+]], $4, 28 +; MIPS64: jr $ra +; MIPS64: dsra $2, $[[T0]], 60 + %1 = shl i64 %a, 28 + %2 = ashr i64 %1, 60 + ret i64 %2 +}