Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -276,7 +276,9 @@ if (Opc == AMDGPU::S_BITSET0_B32 || Opc == AMDGPU::S_BITSET1_B32) { Src0->ChangeToImmediate(NewImm); - MI.RemoveOperand(2); + // Remove the immediate and add the tied input. + MI.getOperand(2).ChangeToRegister(Dest->getReg(), false); + MI.tieOperands(0, 2); } else { SrcImm->setImm(NewImm); } Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -76,10 +76,13 @@ let Inst{31-23} = 0x17d; //encoding; } -class SOP1_32 pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0), - "$sdst, $src0", pattern ->; +class SOP1_32 pattern=[], bit tied_in = 0> : SOP1_Pseudo < + opName, (outs SReg_32:$sdst), + !if(tied_in, (ins SSrc_b32:$src0, SReg_32:$sdst_in), + (ins SSrc_b32:$src0)), + "$sdst, $src0", pattern> { + let Constraints = !if(tied_in, "$sdst = $sdst_in", ""); +} // 32-bit input, no output. class SOP1_0_32 pattern = []> : SOP1_Pseudo < @@ -106,10 +109,13 @@ >; // 32-bit input, 64-bit output. -class SOP1_64_32 pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0), - "$sdst, $src0", pattern ->; +class SOP1_64_32 pattern=[], bit tied_in = 0> : SOP1_Pseudo < + opName, (outs SReg_64:$sdst), + !if(tied_in, (ins SSrc_b32:$src0, SReg_64:$sdst_in), + (ins SSrc_b32:$src0)), + "$sdst, $src0", pattern> { + let Constraints = !if(tied_in, "$sdst = $sdst_in", ""); +} // no input, 64-bit output. class SOP1_64_0 pattern=[]> : SOP1_Pseudo < @@ -189,10 +195,10 @@ [(set i32:$sdst, (sext_inreg i32:$src0, i16))] >; -def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">; -def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">; -def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">; -def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">; +def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32", [], 1>; +def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>; +def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32", [], 1>; +def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>; def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", [(set i64:$sdst, (int_amdgcn_s_getpc))] >;