Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -389,6 +389,37 @@ Ops); } + case ISD::SIGN_EXTEND_INREG: { + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + break; + + if (N->getValueType(0) != MVT::i64) + break; + + // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it + // might not be worth the effort, and will need to expand to shifts when + // fixing SGPR copies. + + SDLoc SL(N); + unsigned Bits = cast(N->getOperand(1))->getVT().getSizeInBits(); + assert(Bits == 8 || Bits == 16); + unsigned SExtOp = Bits == 8 ? AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; + + SDValue Lo32 = CurDAG->getTargetExtractSubreg(AMDGPU::sub0, SL, + MVT::i32, N->getOperand(0)); + SDNode *NegOne + = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, + CurDAG->getTargetConstant(-1, MVT::i32)); + const SDValue Ops[5] = { + CurDAG->getTargetConstant(AMDGPU::SSrc_64RegClassID, MVT::i32), + SDValue(CurDAG->getMachineNode(SExtOp, SL, MVT::i32, Lo32), 0), + CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + SDValue(NegOne, 0), + CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) + }; + + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL, MVT::i64, Ops); + } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) Index: lib/Target/R600/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUISelLowering.cpp +++ lib/Target/R600/AMDGPUISelLowering.cpp @@ -212,15 +212,21 @@ setOperationAction(ISD::SELECT, VT, Expand); } - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); @@ -1029,81 +1035,22 @@ MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); - unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); - unsigned DestBits = ScalarVT.getSizeInBits(); - unsigned BitsDiff = DestBits - SrcBits; - - if (!Subtarget->hasBFE()) - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + if (!VT.isVector()) + return SDValue(); SDValue Src = Op.getOperand(0); - if (VT.isVector()) { - SDLoc DL(Op); - // Need to scalarize this, and revisit each of the scalars later. - // TODO: Don't scalarize on Evergreen? - unsigned NElts = VT.getVectorNumElements(); - SmallVector Args; - ExtractVectorElements(Src, DAG, Args, 0, NElts); - - SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); - for (unsigned I = 0; I < NElts; ++I) - Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); - } - - if (SrcBits == 32) { - SDLoc DL(Op); - - // If the source is 32-bits, this is really half of a 2-register pair, and - // we need to discard the unused half of the pair. - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); - } - - unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; - - // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it - // might not be worth the effort, and will need to expand to shifts when - // fixing SGPR copies. - if (SrcBits < 32 && DestBits <= 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); - - if (DestBits != 32) - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); - - // FIXME: This should use TargetConstant, but that hits assertions for - // Evergreen. - SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, - Op.getOperand(0), // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width - - // Truncate to the original type if necessary. - if (ScalarVT == MVT::i32) - return Ext; - return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); - } - - // For small types, extend to 32-bits first. - if (SrcBits < 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + SDLoc DL(Op); - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); - SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, - DL, - ExtVT, - TruncSrc, // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width + // TODO: Don't scalarize on Evergreen? + unsigned NElts = VT.getVectorNumElements(); + SmallVector Args; + ExtractVectorElements(Src, DAG, Args, 0, NElts); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); - } + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); + for (unsigned I = 0; I < NElts; ++I) + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - // For everything else, use the standard bitshift expansion. - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); } //===----------------------------------------------------------------------===// Index: lib/Target/R600/EvergreenInstructions.td =================================================================== --- lib/Target/R600/EvergreenInstructions.td +++ lib/Target/R600/EvergreenInstructions.td @@ -286,6 +286,13 @@ VecALU >; +def : Pat<(i32 (sext_inreg i32:$src, i1)), + (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>; +def : Pat<(i32 (sext_inreg i32:$src, i8)), + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>; +def : Pat<(i32 (sext_inreg i32:$src, i16)), + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; + defm : BFIPatterns ; def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -147,9 +147,6 @@ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -71,6 +71,7 @@ def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", [(set i32:$dst, (sext_inreg i32:$src0, i16))] >; + ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; @@ -125,21 +126,6 @@ >; */ -// Handle sext_inreg in i64 -def : Pat < - (i64 (sext_inreg i64:$src, i8)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0), - (S_MOV_B32 -1), sub1) ->; - -def : Pat < - (i64 (sext_inreg i64:$src, i16)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0), - (S_MOV_B32 -1), sub1) ->; - let isCompare = 1 in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; @@ -2267,6 +2253,13 @@ >; //===----------------------------------------------------------------------===// +// Conversion Patterns +//===----------------------------------------------------------------------===// + +def : Pat<(i32 (sext_inreg i32:$src, i1)), + (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16 + +//===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===//