Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -147,6 +147,9 @@ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); Index: lib/Target/R600/SIInstrInfo.h =================================================================== --- lib/Target/R600/SIInstrInfo.h +++ lib/Target/R600/SIInstrInfo.h @@ -47,6 +47,7 @@ void splitScalar64BitOp(SmallVectorImpl & Worklist, MachineInstr *Inst, unsigned Opcode) const; + void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); Index: lib/Target/R600/SIInstrInfo.cpp =================================================================== --- lib/Target/R600/SIInstrInfo.cpp +++ lib/Target/R600/SIInstrInfo.cpp @@ -536,6 +536,8 @@ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; + case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; + case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; } } @@ -895,8 +897,10 @@ MachineBasicBlock *MBB = Inst->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Opcode = Inst->getOpcode(); + // Handle some special cases - switch(Inst->getOpcode()) { + switch (Opcode) { case AMDGPU::S_MOV_B64: { DebugLoc DL = Inst->getDebugLoc(); @@ -968,27 +972,29 @@ Inst->RemoveOperand(i); } - // Add the implict and explicit register definitions. - if (NewDesc.ImplicitUses) { - for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { - unsigned Reg = NewDesc.ImplicitUses[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); - } + if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { + // We are converting these to a BFE, so we need to add the missing + // operands for the size and offset. + unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(Size)); + + // XXX - Other pointless operands. There are 4, but it seems you only need + // 3 to not hit an assertion later in MCInstLower. + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); } - if (NewDesc.ImplicitDefs) { - for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { - unsigned Reg = NewDesc.ImplicitDefs[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); - } - } + addDescImplicitUseDef(NewDesc, Inst); legalizeOperands(Inst); // Update the destination register class. const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); - switch (Inst->getOpcode()) { + switch (Opcode) { // For target instructions, getOpRegClass just returns the virtual // register class associated with the operand, so we need to find an // equivalent VGPR register class in order to move the instruction to the @@ -1097,6 +1103,24 @@ Worklist.push_back(HiHalf); } +void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, + MachineInstr *Inst) const { + // Add the implict and explicit register definitions. + if (NewDesc.ImplicitUses) { + for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { + unsigned Reg = NewDesc.ImplicitUses[i]; + Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); + } + } + + if (NewDesc.ImplicitDefs) { + for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { + unsigned Reg = NewDesc.ImplicitDefs[i]; + Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } +} + MachineInstrBuilder SIInstrInfo::buildIndirectWrite( MachineBasicBlock *MBB, MachineBasicBlock::iterator I, Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -65,8 +65,12 @@ //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; -//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; -//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; +def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", + [(set i32:$dst, (sext_inreg i32:$src0, i8))] +>; +def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", + [(set i32:$dst, (sext_inreg i32:$src0, i16))] +>; ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; @@ -121,6 +125,21 @@ >; */ +// Handle sext_inreg in i64 +def : Pat < + (i64 (sext_inreg i64:$src, i8)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) +>; + +def : Pat < + (i64 (sext_inreg i64:$src, i16)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) +>; + let isCompare = 1 in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; Index: test/CodeGen/R600/sext-in-reg.ll =================================================================== --- test/CodeGen/R600/sext-in-reg.ll +++ test/CodeGen/R600/sext-in-reg.ll @@ -19,8 +19,9 @@ ; FUNC-LABEL: @sext_in_reg_i8_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], ; EG: BFE_INT define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -33,8 +34,9 @@ ; FUNC-LABEL: @sext_in_reg_i16_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], ; EG: BFE_INT define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -47,8 +49,9 @@ ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], ; EG: BFE_INT define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { @@ -60,9 +63,10 @@ } ; FUNC-LABEL: @sext_in_reg_i8_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 ; EG: BFE_INT ; EG: ASHR @@ -75,9 +79,10 @@ } ; FUNC-LABEL: @sext_in_reg_i16_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 ; EG: BFE_INT ; EG: ASHR @@ -180,8 +185,8 @@ } ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 ; EG: BFE @@ -195,10 +200,10 @@ } ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX4 ; EG: BFE @@ -214,16 +219,16 @@ } ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 ; EG: BFE ; EG: BFE define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload - %shl = shl <2 x i32> %c, - %ashr = ashr <2 x i32> %shl, + %shl = shl <2 x i32> %c, + %ashr = ashr <2 x i32> %shl, store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 ret void }