Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -193,6 +193,12 @@ "GFX10 bug, inst_offset ignored in flat segment" >; +def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", + "HasOffset3fBug", + "true", + "Branch offset of 3f hardware bug" +>; + class SubtargetFeatureLDSBankCount : SubtargetFeature < "ldsbankcount"#Value, "LDSBankCount", @@ -767,6 +773,7 @@ FeatureVcmpxExecWARHazard, FeatureLdsBranchVmemWARHazard, FeatureNSAtoVMEMBug, + FeatureOffset3fBug, FeatureFlatSegmentOffsetBug ]; } @@ -1068,6 +1075,9 @@ def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, AssemblerPredicate<"FeatureDot6Insts">; +def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">, + AssemblerPredicate<"FeatureOffset3fBug">; + def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -368,6 +368,7 @@ bool HasVcmpxExecWARHazard; bool HasLdsBranchVmemWARHazard; bool HasNSAtoVMEMBug; + bool HasOffset3fBug; bool HasFlatSegmentOffsetBug; // Dummy feature to use for assembler in tablegen. @@ -855,6 +856,10 @@ return HasR128A16; } + bool hasOffset3fBug() const { + return HasOffset3fBug; + } + bool hasNSAEncoding() const { return HasNSAEncoding; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -261,6 +261,7 @@ HasVcmpxExecWARHazard(false), HasLdsBranchVmemWARHazard(false), HasNSAtoVMEMBug(false), + HasOffset3fBug(false), HasFlatSegmentOffsetBug(false), FeatureDisable(false), Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -37,17 +37,13 @@ const MCSubtargetInfo *STI) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { - return false; - } + const MCAsmLayout &Layout) const override; + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override { - llvm_unreachable("Not implemented"); - } + MCInst &Res) const override; + bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } + const MCSubtargetInfo &STI) const override; unsigned getMinimumNopSize() const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; @@ -57,6 +53,81 @@ } //End anonymous namespace +static unsigned getRelaxedOpcode(const MCInst &Inst) { + unsigned Op = Inst.getOpcode(); + switch (Op) { + default: + return Op; + case AMDGPU::S_BRANCH: + return AMDGPU::S_BRANCH_64; + case AMDGPU::S_CBRANCH_SCC0: + return AMDGPU::S_CBRANCH_SCC0_64; + case AMDGPU::S_CBRANCH_SCC1: + return AMDGPU::S_CBRANCH_SCC1_64; + case AMDGPU::S_CBRANCH_VCCZ: + return AMDGPU::S_CBRANCH_VCCZ_64; + case AMDGPU::S_CBRANCH_VCCNZ: + return AMDGPU::S_CBRANCH_VCCNZ_64; + case AMDGPU::S_CBRANCH_EXECZ: + return AMDGPU::S_CBRANCH_EXECZ_64; + case AMDGPU::S_CBRANCH_EXECNZ: + return AMDGPU::S_CBRANCH_EXECNZ_64; + case AMDGPU::S_CBRANCH_CDBGSYS: + return AMDGPU::S_CBRANCH_CDBGSYS_64; + case AMDGPU::S_CBRANCH_CDBGSYS_AND_USER: + return AMDGPU::S_CBRANCH_CDBGSYS_AND_USER_64; + case AMDGPU::S_CBRANCH_CDBGSYS_OR_USER: + return AMDGPU::S_CBRANCH_CDBGSYS_OR_USER_64; + case AMDGPU::S_CBRANCH_CDBGUSER: + return AMDGPU::S_CBRANCH_CDBGUSER_64; + } // end of switch +} + +void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI, + MCInst &Res) const { + unsigned RelaxedOpcode = getRelaxedOpcode(Inst); + Res.setOpcode(RelaxedOpcode); + Res.addOperand(Inst.getOperand(0)); + return; +} + +bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // if the branch target has an offset of x3f this needs to be relaxed to + // add a s_nop 0 immediately after branch to effectively increment offset + // for hardware workaround in gfx1010 + if (((int64_t(Value)/4)-1) == 0x3f) + return true; + else + return false; +} + +bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const { + if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug]) + return false; + + switch (Inst.getOpcode()) { + case AMDGPU::S_BRANCH: + case AMDGPU::S_CBRANCH_SCC0: + case AMDGPU::S_CBRANCH_SCC1: + case AMDGPU::S_CBRANCH_VCCZ: + case AMDGPU::S_CBRANCH_VCCNZ: + case AMDGPU::S_CBRANCH_EXECZ: + case AMDGPU::S_CBRANCH_EXECNZ: + case AMDGPU::S_CBRANCH_CDBGSYS: + case AMDGPU::S_CBRANCH_CDBGSYS_AND_USER: + case AMDGPU::S_CBRANCH_CDBGSYS_OR_USER: + case AMDGPU::S_CBRANCH_CDBGUSER: + return true; + } // end of switch + + return false; +} + static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { case AMDGPU::fixup_si_sopp_br: Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -918,6 +918,31 @@ let Inst{31-23} = 0x17f; // encoding } +class SOPPe64 op> : Enc64 { + bits <16> simm16; + + let Inst{15-0} = simm16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding + let Inst{47-32} = 0x0000; + let Inst{54-48} = 0x00; + let Inst{63-55} = 0x17f; //encoding +} + +class SOPP64 op, dag ins, string asm, list pattern = []> : + InstSI <(outs), ins, asm, pattern >, SOPPe64 { + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SALU = 1; + let SOPP = 1; + let Size = 8; + let SchedRW = [WriteSALU]; + + let UseNamedOperandTable = 1; +} + class SOPP op, dag ins, string asm, list pattern = []> : InstSI <(outs), ins, asm, pattern >, SOPPe { @@ -932,7 +957,6 @@ let UseNamedOperandTable = 1; } - def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">; let isTerminator = 1 in { @@ -969,6 +993,11 @@ [(br bb:$simm16)]> { let isBarrier = 1; } +def S_BRANCH_64 :SOPP64 < + 0x0000002, (ins sopp_brtarget:$simm16), "s_branch $simm16", + [(br bb:$simm16)]> { + let isBarrier = 1; +} let Uses = [SCC] in { def S_CBRANCH_SCC0 : SOPP < @@ -979,6 +1008,14 @@ 0x00000005, (ins sopp_brtarget:$simm16), "s_cbranch_scc1 $simm16" >; +def S_CBRANCH_SCC0_64 : SOPP64 < + 0x00000004, (ins sopp_brtarget:$simm16), + "s_cbranch_scc0 $simm16" +>; +def S_CBRANCH_SCC1_64 : SOPP64 < + 0x00000005, (ins sopp_brtarget:$simm16), + "s_cbranch_scc1 $simm16" +>; } // End Uses = [SCC] let Uses = [VCC] in { @@ -990,6 +1027,14 @@ 0x00000007, (ins sopp_brtarget:$simm16), "s_cbranch_vccnz $simm16" >; +def S_CBRANCH_VCCZ_64 : SOPP64 < + 0x00000006, (ins sopp_brtarget:$simm16), + "s_cbranch_vccz $simm16" +>; +def S_CBRANCH_VCCNZ_64 : SOPP64 < + 0x00000007, (ins sopp_brtarget:$simm16), + "s_cbranch_vccnz $simm16" +>; } // End Uses = [VCC] let Uses = [EXEC] in { @@ -1001,27 +1046,51 @@ 0x00000009, (ins sopp_brtarget:$simm16), "s_cbranch_execnz $simm16" >; +def S_CBRANCH_EXECZ_64 : SOPP64 < + 0x00000008, (ins sopp_brtarget:$simm16), + "s_cbranch_execz $simm16" +>; +def S_CBRANCH_EXECNZ_64 : SOPP64 < + 0x00000009, (ins sopp_brtarget:$simm16), + "s_cbranch_execnz $simm16" +>; } // End Uses = [EXEC] def S_CBRANCH_CDBGSYS : SOPP < 0x00000017, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys $simm16" >; +def S_CBRANCH_CDBGSYS_64 : SOPP64 < + 0x00000017, (ins sopp_brtarget:$simm16), + "s_cbranch_cdbgsys $simm16" +>; def S_CBRANCH_CDBGSYS_AND_USER : SOPP < 0x0000001A, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys_and_user $simm16" >; +def S_CBRANCH_CDBGSYS_AND_USER_64 : SOPP64 < + 0x0000001A, (ins sopp_brtarget:$simm16), + "s_cbranch_cdbgsys_and_user $simm16" +>; def S_CBRANCH_CDBGSYS_OR_USER : SOPP < 0x00000019, (ins sopp_brtarget:$simm16), "s_cbranch_cdbgsys_or_user $simm16" >; +def S_CBRANCH_CDBGSYS_OR_USER_64 : SOPP64 < + 0x00000019, (ins sopp_brtarget:$simm16), + "s_cbranch_cdbgsys_or_user $simm16" +>; def S_CBRANCH_CDBGUSER : SOPP < 0x00000018, (ins sopp_brtarget:$simm16), "s_cbranch_cdbguser $simm16" >; +def S_CBRANCH_CDBGUSER_64 : SOPP64 < + 0x00000018, (ins sopp_brtarget:$simm16), + "s_cbranch_cdbguser $simm16" +>; } // End isBranch = 1 } // End isTerminator = 1 Index: test/MC/AMDGPU/offsetbug.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/offsetbug.s @@ -0,0 +1,122 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN + s_getpc_b64 s[0:1] + v_add_nc_u32_e32 v4, s6, v0 + s_mov_b64 s[16:17], s[0:1] + s_mov_b64 s[18:19], s[0:1] + s_mov_b64 s[24:25], s[0:1] + s_mov_b32 s0, s5 + s_mov_b32 s18, s4 + s_mov_b32 s16, s3 + s_mov_b32 s24, s2 + s_load_dwordx4 s[8:11], s[0:1], 0x10 + s_load_dwordx4 s[12:15], s[0:1], 0x0 + s_load_dwordx4 s[4:7], s[18:19], 0x0 + s_load_dwordx4 s[20:23], s[16:17], 0x0 + s_load_dwordx4 s[0:3], s[24:25], 0x0 + s_waitcnt lgkmcnt(0) + tbuffer_load_format_x v0, v4, s[8:11], format:22, 0 idxen offset:4 + tbuffer_load_format_xyzw v[9:12], v4, s[8:11], format:56, 0 idxen offset:8 + tbuffer_load_format_xyzw v[13:16], v4, s[8:11], format:56, 0 idxen offset:12 + s_waitcnt vmcnt(1) + s_cbranch_vccnz BB0_2 +// GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf] +// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br +// BIN: s_cbranch_vccnz BB0_2 // 00000000006C: BF870060 + tbuffer_load_format_xyzw v[8:11], v4, s[8:11], format:56, 0 idxen offset:16 + tbuffer_load_format_x v1, v4, s[8:11], format:22, 0 idxen offset:20 + tbuffer_load_format_x v2, v4, s[8:11], format:22, 0 idxen offset:24 + tbuffer_load_format_x v3, v4, s[8:11], format:22, 0 idxen + tbuffer_load_format_xyzw v[4:7], v4, s[12:15], format:74, 0 idxen + s_buffer_load_dword s62, s[4:7], 0x0 + v_nop + s_buffer_load_dwordx8 s[12:19], s[20:23], 0x0 + s_buffer_load_dwordx4 s[8:11], s[20:23], 0x20 + s_waitcnt lgkmcnt(0) + s_and_b64 vcc, exec, s[28:29] + s_cbranch_vccnz BB0_1 +// GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf] +// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br +// BIN: s_cbranch_vccnz BB0_1 // 0000000000BC: BF870041 + s_nop 0 + s_cbranch_execz BB0_3 +// GFX10: s_cbranch_execz BB0_3 ; encoding: [A,A,0x88,0xbf] +// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_3, kind: fixup_si_sopp_br +// BIN: s_cbranch_execz BB0_3 // 0000000000C8: BF880040 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_nop 0 + s_buffer_load_dword s26, s[0:3], 0x48 + s_waitcnt lgkmcnt(0) + s_and_b64 vcc, exec, s[28:29] +BB0_1: + s_buffer_load_dword s28, s[4:7], 0x10 +BB0_3: + s_waitcnt lgkmcnt(0) + exp param0 v3, v0, v1, v2 + exp param1 v4, v4, v4, off + s_cbranch_vccnz BB0_2 +// GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf] +// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br +// BIN: s_cbranch_vccnz BB0_2 // 0000000001E0: BF870003 + s_nop 0 + s_nop 0 + s_nop 0 +BB0_2: + s_nop 0 + s_nop 0 + s_endpgm