Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -712,10 +712,16 @@ CanBeNull = false; if (const Argument *A = dyn_cast(this)) { DerefBytes = A->getDereferenceableBytes(); - if (DerefBytes == 0 && (A->hasByValAttr() || A->hasStructRetAttr())) { - Type *PT = cast(A->getType())->getElementType(); - DerefBytes = DL.getTypeStoreSize(PT).getKnownMinSize(); + if (DerefBytes == 0) { + // Handle byval/byref/inalloca/preallocated arguments + if (Type *ArgMemTy = A->getPointeeInMemoryValueType()) { + if (ArgMemTy->isSized()) { + // FIXME: Why isn't this the type alloc size? + DerefBytes = DL.getTypeStoreSize(ArgMemTy).getKnownMinSize(); + } + } } + if (DerefBytes == 0) { DerefBytes = A->getDereferenceableOrNullBytes(); CanBeNull = true; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -1277,7 +1277,7 @@ ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1290,7 +1290,7 @@ ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1310,7 +1310,7 @@ ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1323,7 +1323,7 @@ ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1346,7 +1346,7 @@ ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1362,7 +1362,7 @@ ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1385,7 +1385,7 @@ ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1401,7 +1401,7 @@ ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1425,7 +1425,7 @@ ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1441,7 +1441,7 @@ ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1464,7 +1464,7 @@ ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 64 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 @@ -1480,7 +1480,7 @@ ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 64 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 @@ -1504,7 +1504,7 @@ ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.1, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg @@ -1518,7 +1518,7 @@ ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.1, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(1)* %in.byref @@ -1537,7 +1537,7 @@ ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (load 4 from %ir.in.byref) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg @@ -1550,7 +1550,7 @@ ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (load 4 from %ir.in.byref) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32* %in.byref @@ -1569,7 +1569,7 @@ ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (load 4 from %ir.in.byref, addrspace 6) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg @@ -1582,7 +1582,7 @@ ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (load 4 from %ir.in.byref, addrspace 6) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(6)* %in.byref @@ -1601,7 +1601,7 @@ ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (load 4 from %ir.in.byref, addrspace 999) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg @@ -1614,7 +1614,7 @@ ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (load 4 from %ir.in.byref, addrspace 999) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(999)* %in.byref @@ -1634,7 +1634,7 @@ ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (load 4 from %ir.in.byref, addrspace 3) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg @@ -1647,7 +1647,7 @@ ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (load 4 from %ir.in.byref, addrspace 3) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(3)* %in.byref @@ -1670,8 +1670,8 @@ ; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in0.byref, addrspace 4) - ; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 4 from %ir.in1.byref, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) + ; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) @@ -1690,8 +1690,8 @@ ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in0.byref, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 4 from %ir.in1.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) @@ -1712,7 +1712,7 @@ ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 @@ -1722,7 +1722,7 @@ ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(4)* %in.byref Index: llvm/test/Transforms/Attributor/readattrs.ll =================================================================== --- llvm/test/Transforms/Attributor/readattrs.ll +++ llvm/test/Transforms/Attributor/readattrs.ll @@ -106,12 +106,12 @@ define void @test7_1(i32* inalloca %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7_1 -; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] { +; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] { ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7_1 -; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] { +; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] { ; IS__CGSCC____-NEXT: ret void ; ret void @@ -163,11 +163,17 @@ ; CHECK-NOT: readnone ; CHECK-NOT: readonly define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { -; CHECK: Function Attrs: nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@test9 -; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] { -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) [[ATTR11:#.*]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test9 +; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] { +; IS__TUNIT____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) [[ATTR11:#.*]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@test9 +; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] { +; IS__CGSCC____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) [[ATTR12:#.*]] +; IS__CGSCC____-NEXT: ret void ; call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1>) ret void @@ -176,11 +182,17 @@ ; CHECK: declare <4 x i32> @llvm.masked.gather declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @test10(<4 x i32*> %ptrs) { -; CHECK: Function Attrs: nounwind readonly willreturn -; CHECK-LABEL: define {{[^@]+}}@test10 -; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] { -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) [[ATTR12:#.*]] -; CHECK-NEXT: ret <4 x i32> [[RES]] +; IS__TUNIT____: Function Attrs: nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@test10 +; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] { +; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) [[ATTR12:#.*]] +; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]] +; +; IS__CGSCC____: Function Attrs: nounwind readonly willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@test10 +; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] { +; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) [[ATTR13:#.*]] +; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1>, <4 x i32>undef) ret <4 x i32> %res @@ -202,11 +214,17 @@ declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind ; CHECK-NOT: readnone define <4 x i32> @test12_2(<4 x i32*> %ptrs) { -; CHECK: Function Attrs: argmemonly nounwind -; CHECK-LABEL: define {{[^@]+}}@test12_2 -; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] { -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]] -; CHECK-NEXT: ret <4 x i32> [[RES]] +; IS__TUNIT____: Function Attrs: argmemonly nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@test12_2 +; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] { +; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]] +; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]] +; +; IS__CGSCC____: Function Attrs: argmemonly nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2 +; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] { +; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR14:#.*]] +; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs) ret <4 x i32> %res Index: llvm/test/Transforms/Attributor/value-simplify.ll =================================================================== --- llvm/test/Transforms/Attributor/value-simplify.ll +++ llvm/test/Transforms/Attributor/value-simplify.ll @@ -332,12 +332,12 @@ define internal i32* @test_inalloca(i32* inalloca %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_inalloca -; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { +; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef nonnull returned writeonly align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_inalloca -; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { +; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef nonnull returned writeonly align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a @@ -349,17 +349,10 @@ ; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR1]] ; IS__TUNIT____-NEXT: ret i32* [[CALL]] ; -; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_inalloca -; IS__CGSCC_OPM-SAME: () [[ATTR1:#.*]] { -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR5:#.*]] -; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]] -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_inalloca -; IS__CGSCC_NPM-SAME: () [[ATTR1:#.*]] { -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR4:#.*]] -; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]] +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_inalloca +; IS__CGSCC____-SAME: () [[ATTR1]] { +; IS__CGSCC____-NEXT: unreachable ; %call = call i32* @test_inalloca(i32* null) ret i32* %call @@ -368,44 +361,30 @@ define internal i32* @test_preallocated(i32* preallocated(i32) %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_preallocated -; IS__TUNIT____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { +; IS__TUNIT____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_preallocated -; IS__CGSCC____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { +; IS__CGSCC____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] { ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a } define i32* @complicated_args_preallocated() { -; IS__TUNIT_OPM: Function Attrs: nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated -; IS__TUNIT_OPM-SAME: () [[ATTR0:#.*]] { -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] -; IS__TUNIT_OPM-NEXT: ret i32* [[CALL]] -; -; IS__TUNIT_NPM: Function Attrs: nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated -; IS__TUNIT_NPM-SAME: () [[ATTR0:#.*]] { -; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) -; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] -; IS__TUNIT_NPM-NEXT: ret i32* [[CALL]] -; -; IS__CGSCC_OPM: Function Attrs: nounwind -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated -; IS__CGSCC_OPM-SAME: () [[ATTR0:#.*]] { -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR6:#.*]] [ "preallocated"(token [[C]]) ] -; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]] -; -; IS__CGSCC_NPM: Function Attrs: nounwind -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated -; IS__CGSCC_NPM-SAME: () [[ATTR0:#.*]] { -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] -; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]] +; IS________OPM: Function Attrs: nounwind +; IS________OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated +; IS________OPM-SAME: () [[ATTR0:#.*]] { +; IS________OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) +; IS________OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] +; IS________OPM-NEXT: ret i32* [[CALL]] +; +; IS________NPM: Function Attrs: nounwind +; IS________NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated +; IS________NPM-SAME: () [[ATTR0:#.*]] { +; IS________NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) +; IS________NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] +; IS________NPM-NEXT: ret i32* [[CALL]] ; %c = call token @llvm.call.preallocated.setup(i32 1) %call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)] @@ -475,14 +454,14 @@ ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_nest -; IS__CGSCC_OPM-SAME: () [[ATTR1]] { -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) [[ATTR5]] +; IS__CGSCC_OPM-SAME: () [[ATTR1:#.*]] { +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) [[ATTR6:#.*]] ; IS__CGSCC_OPM-NEXT: ret %struct.X* [[CALL]] ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_nest -; IS__CGSCC_NPM-SAME: () [[ATTR1]] { -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) [[ATTR4]] +; IS__CGSCC_NPM-SAME: () [[ATTR1:#.*]] { +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) [[ATTR5:#.*]] ; IS__CGSCC_NPM-NEXT: ret %struct.X* [[CALL]] ; %call = call %struct.X* @test_nest(%struct.X* null) Index: llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll @@ -23,6 +23,47 @@ ret i8 %load } +; Simple memcpy to alloca from byref constant address space argument. +define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 byref([32 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) { +; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca( +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1 +; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; + %alloca = alloca [32 x i8], align 4, addrspace(5) + %alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + %arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)* + call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 32, i1 false) + %gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx + %load = load i8, i8 addrspace(5)* %gep + store i8 %load, i8 addrspace(1)* %out + ret void +} + +; Simple memcpy to alloca from byref constant address space argument, but not enough bytes are dereferenceable +define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes([31 x i8] addrspace(4)* noalias readonly align 4 byref([31 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) { +; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 dereferenceable(31) [[ALLOCA_CAST]], i8 addrspace(4)* align 4 dereferenceable(31) [[ARG_CAST]], i64 31, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1 +; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; + %alloca = alloca [32 x i8], align 4, addrspace(5) + %alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + %arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)* + call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 31, i1 false) + %gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx + %load = load i8, i8 addrspace(5)* %gep + store i8 %load, i8 addrspace(1)* %out + ret void +} + ; Simple memcpy to alloca from constant address space intrinsic call define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(1)* %out, i32 %idx) { ; CHECK-LABEL: @memcpy_constant_intrinsic_ptr_to_alloca( @@ -44,18 +85,18 @@ } ; Alloca is written through a flat pointer -define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) { +define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([31 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) { ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat( ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %alloca = alloca [32 x i8], align 4, addrspace(5) %alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)* %alloca.cast.asc = addrspacecast i8 addrspace(5)* %alloca.cast to i8* - %arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)* - call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 32, i1 false) + %arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)* + call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 31, i1 false) %gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx %load = load i8, i8 addrspace(5)* %gep ret i8 %load