Index: llvm/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/lib/Target/AMDGPU/SIDefines.h +++ llvm/lib/Target/AMDGPU/SIDefines.h @@ -133,6 +133,12 @@ // Whether tied sources will be read. TiedSourceNotRead = UINT64_C(1) << 60, + + // Is source of divergence. + IsSourceOfDivergence = UINT64_C(1) << 61, + + // Is always uniform + IsAlwaysUniform = UINT64_C(1) << 62, }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. Index: llvm/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -153,6 +153,12 @@ // This bit indicates that tied source will not be read. field bit TiedSourceNotRead = 0; + // This bit indicates that the instruction is divergent + field bit IsSourceOfDivergence = 0; + + // This field indicates that the instruction is uniform + field bit IsAlwaysUniform = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -234,6 +240,10 @@ let TSFlags{60} = TiedSourceNotRead; + let TSFlags{61} = IsSourceOfDivergence; + + let TSFlags{62} = IsAlwaysUniform; + let SchedRW = [Write32Bit]; let AsmVariantName = AMDGPUAsmVariants.Default; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -781,6 +781,14 @@ return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; } + static bool isSourceOfDivergence(const MachineInstr &MI){ + return MI.getDesc().TSFlags & SIInstrFlags::IsSourceOfDivergence; + } + + static bool isAlwaysUniform(const MachineInstr &MI){ + return MI.getDesc().TSFlags & SIInstrFlags::IsAlwaysUniform; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8414,7 +8414,13 @@ InstructionUniformity SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { - unsigned opcode = MI.getOpcode(); + + if(isSourceOfDivergence(MI)) + return InstructionUniformity::NeverUniform; + + if(isAlwaysUniform(MI)) + return InstructionUniformity::AlwaysUniform; + if (MI.isCopy()) { const MachineOperand &srcOp = MI.getOperand(1); if (srcOp.isReg() && srcOp.getReg().isPhysical()) { @@ -8456,12 +8462,6 @@ return InstructionUniformity::Default; } - if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) - return InstructionUniformity::AlwaysUniform; - - if (opcode == AMDGPU::V_WRITELANE_B32) - return InstructionUniformity::NeverUniform; - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -246,6 +246,7 @@ let VALU = 1; let Uses = [EXEC]; let isConvergent = 1; + let IsAlwaysUniform = 1; bits<8> vdst; bits<9> src0; Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -762,10 +762,11 @@ // These are special and do not read the exec mask. let isConvergent = 1, Uses = [] in { +let IsAlwaysUniform = 1 in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; - -let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { +} +let IsSourceOfDivergence = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; } // End $vdst = $vdst_in, DisableEncoding $vdst_in Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir =================================================================== --- llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s + # readlane, readfirstlane is always uniform --- Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir =================================================================== --- llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # loads from flat non uniform --- name: flatloads