Index: llvm/lib/IR/ReplaceConstant.cpp =================================================================== --- llvm/lib/IR/ReplaceConstant.cpp +++ llvm/lib/IR/ReplaceConstant.cpp @@ -84,6 +84,7 @@ Instruction *I, std::map>> &CEPaths, SmallPtrSetImpl *Insts) { + SmallPtrSet Visited; for (Use &U : I->operands()) { // The operand U is either not a constant expression operand or the // constant expression paths do not belong to U, ignore U. @@ -102,7 +103,6 @@ // constant expressions along all paths to corresponding instructions. auto *II = I; auto &Paths = CEPaths[&U]; - SmallPtrSet Visited; for (auto &Path : Paths) { for (auto *CE : Path) { if (!Visited.insert(CE).second) Index: llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll +++ llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll @@ -9,6 +9,7 @@ ; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i32 } ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i8] } ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i8] } +; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [505 x i32] } ; Use constant from different kernels ;. @@ -17,6 +18,7 @@ ; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 4 ; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16 ; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 2 +; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t undef, align 16 ;. define amdgpu_kernel void @k0(i64 %x) { ; CHECK-LABEL: @k0( @@ -97,3 +99,16 @@ store i8 1, i8 addrspace(0)* %ptr, align 1 ret void } + +@lds.4 = internal unnamed_addr addrspace(3) global [505 x i32] undef, align 4 + +; Multiple constexpr use in a same instruction. +define amdgpu_kernel void @k5() { +; CHECK-LABEL: @k5( +; CHECK-NEXT: %1 = addrspacecast [505 x i32] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k5.lds.t, %llvm.amdgcn.kernel.k5.lds.t addrspace(3)* @llvm.amdgcn.kernel.k5.lds, i32 0, i32 0) to [505 x i32]* +; CHECK-NEXT: %2 = getelementptr inbounds [505 x i32], [505 x i32]* %1, i64 0, i64 0 +; CHECK-NEXT: call void undef(i32* %2, i32* %2) +; + call void undef(i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0), i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0)) + ret void +}