diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -133,6 +133,9 @@ // Whether tied sources will be read. TiedSourceNotRead = UINT64_C(1) << 60, + + // Is never uniform. + IsNeverUniform = UINT64_C(1) << 61, }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -153,6 +153,9 @@ // This bit indicates that tied source will not be read. field bit TiedSourceNotRead = 0; + // This bit indicates that the instruction is never-uniform/divergent + field bit IsNeverUniform = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -234,6 +237,8 @@ let TSFlags{60} = TiedSourceNotRead; + let TSFlags{61} = IsNeverUniform; + let SchedRW = [Write32Bit]; let AsmVariantName = AMDGPUAsmVariants.Default; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -781,6 +781,10 @@ return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; } + static bool isNeverUniform(const MachineInstr &MI){ + return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8414,7 +8414,14 @@ InstructionUniformity SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { + + if (isNeverUniform(MI)) + return InstructionUniformity::NeverUniform; + unsigned opcode = MI.getOpcode(); + if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) + return InstructionUniformity::AlwaysUniform; + if (MI.isCopy()) { const MachineOperand &srcOp = MI.getOperand(1); if (srcOp.isReg() && srcOp.getReg().isPhysical()) { @@ -8456,12 +8463,6 @@ return InstructionUniformity::Default; } - if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) - return InstructionUniformity::AlwaysUniform; - - if (opcode == AMDGPU::V_WRITELANE_B32) - return InstructionUniformity::NeverUniform; - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -764,11 +764,10 @@ let isConvergent = 1, Uses = [] in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; - -let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { +let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; -} // End $vdst = $vdst_in, DisableEncoding $vdst_in +} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in } // End isConvergent = 1 let isReMaterializable = 1 in { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s + # readlane, readfirstlane is always uniform --- diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # loads from flat non uniform --- name: flatloads