diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/ValueMap.h" namespace llvm { @@ -33,7 +34,8 @@ Instruction *I, std::map>> &CEPaths, SmallPtrSetImpl *Insts) { - SmallPtrSet Visited; + ValueMap Visited; + for (Use &U : I->operands()) { // The operand U is either not a constant expression operand or the // constant expression paths do not belong to U, ignore U. @@ -48,23 +50,47 @@ BI = &(*(BB->getFirstInsertionPt())); } - // Go through the paths associated with operand U, and convert all the - // constant expressions along all paths to corresponding instructions. + // Go through all the paths associated with operand U, and convert all the + // constant expressions along all the paths to corresponding instructions. auto *II = I; auto &Paths = CEPaths[&U]; for (auto &Path : Paths) { for (auto *CE : Path) { - if (!Visited.insert(CE).second) - continue; - auto *NI = CE->getAsInstruction(BI); + // Instruction which is equivalent to CE. + Instruction *NI = nullptr; + + if (!Visited.count(CE)) { + // CE is encountered first time, convert it into a corresponding + // instruction NI, and appropriately insert NI before the parent + // instruction. + NI = CE->getAsInstruction(BI); + + // Mark CE as visited by mapping CE to NI. + Visited[CE] = NI; + + // If required collect NI. + if (Insts) + Insts->insert(NI); + } else { + // We had already encountered CE, the correponding instruction already + // exist, use it to replace CE. + NI = Visited[CE]; + } + + assert(NI && "Expected an instruction corresponding to constant " + "expression."); + + // Replace all uses of constant expression CE by the corresponding + // instruction NI within the current parent instruction. II->replaceUsesOfWith(CE, NI); - CE->removeDeadConstantUsers(); BI = II = NI; - if (Insts) - Insts->insert(NI); } } } + + // Remove all converted constant expressions which are dead by now. + for (auto Item : Visited) + Item.first->removeDeadConstantUsers(); } void collectConstantExprPaths( diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll @@ -10,6 +10,7 @@ ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i8] } ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i8] } ; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [505 x i32] } +; CHECK: %llvm.amdgcn.kernel.k6.lds.t = type { [4 x i32] } ; Use constant from different kernels ;. @@ -19,6 +20,7 @@ ; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16 ; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 2 ; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t undef, align 16 +; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t undef, align 16 ;. define amdgpu_kernel void @k0(i64 %x) { ; CHECK-LABEL: @k0( @@ -112,3 +114,19 @@ call void undef(i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0), i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0)) ret void } + +@lds.5 = internal addrspace(3) global [4 x i32] undef, align 4 + +; Both the *value* and *pointer* operands of store instruction are constant expressions, and +; both of these constant expression paths use same lds - @lds.5. Hence both of these constant +; expression operands of store should be replaced by corresponding instruction sequence. +define amdgpu_kernel void @k6() { +; CHECK-LABEL: @k6( +; CHECK-NEXT: %1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k6.lds.t, %llvm.amdgcn.kernel.k6.lds.t addrspace(3)* @llvm.amdgcn.kernel.k6.lds, i32 0, i32 0), i32 0, i32 2 +; CHECK-NEXT: %2 = ptrtoint i32 addrspace(3)* %1 to i32 +; CHECK-NEXT: store i32 %2, i32 addrspace(3)* %1, align 8 +; CHECK-NEXT: ret void +; + store i32 ptrtoint (i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @lds.5, i32 0, i32 2) to i32), i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @lds.5, i32 0, i32 2) + ret void +}