Index: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -373,11 +373,12 @@ if (auto *I = dyn_cast(U)) { if (AliasScope && I->mayReadOrWriteMemory()) { MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope); - AS = MDNode::concatenate(AS, AliasScope); + AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope) + : AliasScope); I->setMetadata(LLVMContext::MD_alias_scope, AS); MDNode *NA = I->getMetadata(LLVMContext::MD_noalias); - NA = MDNode::concatenate(NA, NoAlias); + NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias); I->setMetadata(LLVMContext::MD_noalias, NA); } } Index: llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s + +%vec_type = type { %vec_base } +%vec_base = type { %union.anon } +%union.anon = type { %"vec_base::n_vec_" } +%"vec_base::n_vec_" = type { [3 x i8] } + +$_f1 = comdat any +$_f2 = comdat any +@_f1 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1 +@_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1 + +; GCN-LABEL: @test +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1 +; GCN-NEXT: global_store_byte v{{[0-9]+}}, [[REG]] + +; CHECK-LABEL: @test +; CHECK: store i8 3, i8 addrspace(3)* %0, align 4, !alias.scope !0, !noalias !3 +; CHECK: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) %2, i8 addrspace(3)* noundef align 1 dereferenceable(3) %1, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK: %4 = load i8, i8 addrspace(3)* %3, align 4, !alias.scope !8, !noalias !9 +; CHECK: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) %7, i8 addrspace(3)* noundef align 1 dereferenceable(3) %6, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK: %9 = load i8, i8 addrspace(3)* %8, align 4, !alias.scope !8, !noalias !9 + +define protected amdgpu_kernel void @test(i8 addrspace(1)* nocapture %ptr.coerce) local_unnamed_addr #0 { +entry: + store i8 3, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 + tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false) + %0 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 + %cmp.i.i = icmp eq i8 %0, 3 + store i8 2, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 + tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false) + %1 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 + %cmp.i.i19 = icmp eq i8 %1, 2 + %2 = and i1 %cmp.i.i19, %cmp.i.i + %frombool8 = zext i1 %2 to i8 + store i8 %frombool8, i8 addrspace(1)* %ptr.coerce, align 1 + ret void +} + +declare void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i64, i1 immarg) #1 + + Index: llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -5,19 +5,19 @@ @b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4 ; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa -; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !tbaa !0, !alias.scope !5, !noalias !10 -; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !tbaa !0, !alias.scope !5, !noalias !10 -; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !tbaa !0, !alias.scope !10, !noalias !5 -; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !tbaa !0, !alias.scope !10, !noalias !5 +; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !tbaa !0, !noalias !5 +; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !tbaa !0, !noalias !5 +; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !tbaa !0, !noalias !5 +; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !tbaa !0, !noalias !5 define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(i32 addrspace(1)* %arg, i32 %i) { bb: store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4, !alias.scope !0, !noalias !3, !tbaa !5 %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i - %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !0, !noalias !3, !tbaa !5 - store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4, !alias.scope !3, !noalias !0, !tbaa !5 + %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !0, !noalias !3, !tbaa !5 + store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4, !alias.scope !3, !noalias !0, !tbaa !5 %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i - %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !3, !noalias !0, !tbaa !5 + %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !3, !noalias !0, !tbaa !5 %val = add i32 %val.a, %val.b store i32 %val, i32 addrspace(1)* %arg, align 4 ret void @@ -39,11 +39,4 @@ ; CHECK:!2 = !{!"int", !3, i64 0} ; CHECK:!3 = !{!"omnipotent char", !4, i64 0} ; CHECK:!4 = !{!"Simple C++ TBAA"} -; CHECK:!5 = !{!6, !8} -; CHECK:!6 = distinct !{!6, !7} -; CHECK:!7 = distinct !{!7} -; CHECK:!8 = distinct !{!8, !9} -; CHECK:!9 = distinct !{!9} -; CHECK:!10 = !{!11, !12} -; CHECK:!11 = distinct !{!11, !7} -; CHECK:!12 = distinct !{!12, !9} +; CHECK:!5 = !{}