Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -54,6 +54,12 @@ class PHINode; class TargetLibraryInfo; class Value; + +template class GenericSSAContext; +using SSAContext = GenericSSAContext; +template class GenericUniformityInfo; +using UniformityInfo = GenericUniformityInfo; + /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. namespace gvn LLVM_LIBRARY_VISIBILITY { @@ -221,6 +227,7 @@ MemoryDependenceResults *MD = nullptr; DominatorTree *DT = nullptr; const TargetLibraryInfo *TLI = nullptr; + const UniformityInfo *UI = nullptr; AssumptionCache *AC = nullptr; SetVector DeadBlocks; OptimizationRemarkEmitter *ORE = nullptr; @@ -260,8 +267,8 @@ using UnavailBlkVect = SmallVector; bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, - const TargetLibraryInfo &RunTLI, AAResults &RunAA, - MemoryDependenceResults *RunMD, LoopInfo *LI, + const TargetLibraryInfo &RunTLI, const UniformityInfo &RunUI, + AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -747,19 +748,21 @@ auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); + auto &UI = AM.getResult(F); auto &AA = AM.getResult(F); auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + bool Changed = runImpl(F, AC, DT, TLI, UI, AA, MemDep, LI, &ORE, MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); + PA.preserve(); if (MSSA) PA.preserve(); if (LI) @@ -2304,12 +2307,15 @@ uint32_t LVN = VN.lookupOrAdd(LHS); if ((isa(LHS) && isa(RHS)) || (isa(LHS) && isa(RHS))) { - // Move the 'oldest' value to the right-hand side, using the value number - // as a proxy for age. - uint32_t RVN = VN.lookupOrAdd(RHS); - if (LVN < RVN) { - std::swap(LHS, RHS); - LVN = RVN; + // Avoid replacing uniform values with non-uniform values. + if (!UI->isUniform(RHS) || UI->isUniform(LHS)) { + // Move the 'oldest' value to the right-hand side, using the value + // number as a proxy for age. + uint32_t RVN = VN.lookupOrAdd(RHS); + if (LVN < RVN) { + std::swap(LHS, RHS); + LVN = RVN; + } } } @@ -2562,13 +2568,15 @@ /// runOnFunction - This is the main transformation entry point for a function. bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, - const TargetLibraryInfo &RunTLI, AAResults &RunAA, + const TargetLibraryInfo &RunTLI, + const UniformityInfo &RunUI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); TLI = &RunTLI; + UI = &RunUI; VN.setAliasAnalysis(&RunAA); MD = RunMD; ImplicitControlFlowTracking ImplicitCFT; @@ -3168,6 +3176,7 @@ F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getTLI(F), + getAnalysis().getUniformityInfo(), getAnalysis().getAAResults(), Impl.isMemDepEnabled() ? &getAnalysis().getMemDep() @@ -3181,6 +3190,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); if (Impl.isMemDepEnabled()) AU.addRequired(); @@ -3188,6 +3198,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -3204,6 +3215,7 @@ INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) Index: llvm/test/Transforms/GVN/AMDGPU/lit.local.cfg =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True Index: llvm/test/Transforms/GVN/AMDGPU/uniform-values.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/AMDGPU/uniform-values.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -mtriple=amdgcn -passes=gvn -S | FileCheck %s + +; CHECK-LABEL: @no_replace_uniform_with_non0 +define void @no_replace_uniform_with_non0(ptr %buf) { + %v1 = load i32, ptr %buf + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + %cond = icmp eq i32 %v1, %v2 + br i1 %cond, label %matches, label %end + +; Make sure we don't replace %v2 with %v1 when %v2 is known to be uniform +; CHECK-LABEL: matches: +; CHECK: call void @opaque(i32 %v2) +matches: + call void @opaque(i32 %v2) + br label %end + +; CHECK-LABEL: end: +; CHECK: call void @opaque(i32 %v1) +end: + call void @opaque(i32 %v1) + ret void +} + +; CHECK-LABEL: @no_replace_uniform_with_non1 +define void @no_replace_uniform_with_non1(ptr %buf) { + %v1 = load i32, ptr %buf + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + %v3 = add i32 %v2, 1 + %cond = icmp eq i32 %v1, %v3 + br i1 %cond, label %matches, label %end + +; CHECK-LABEL: matches: +; CHECK: call void @opaque(i32 %v3) +matches: + call void @opaque(i32 %v3) + br label %end + +; CHECK-LABEL: end: +; CHECK: call void @opaque(i32 %v1) +end: + call void @opaque(i32 %v1) + ret void +} + +; CHECK-LABEL: @replace_non_uniform_with_uniform +define void @replace_non_uniform_with_uniform(ptr %buf, ptr %buf2) { + %v1 = load i32, ptr %buf + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + %v3 = load i32, ptr %buf2 + %cond = icmp eq i32 %v2, %v3 + br i1 %cond, label %matches, label %end + +; CHECK-LABEL: matches: +; CHECK: call void @opaque(i32 %v2) +matches: + call void @opaque(i32 %v3) + br label %end + +; CHECK-LABEL: end: +; CHECK: call void @opaque(i32 %v3) +end: + call void @opaque(i32 %v3) + ret void +} + + +declare i32 @llvm.amdgcn.readfirstlane(i32) nounwind readnone +declare void @opaque(i32)