Index: llvm/lib/CodeGen/MachineFunction.cpp =================================================================== --- llvm/lib/CodeGen/MachineFunction.cpp +++ llvm/lib/CodeGen/MachineFunction.cpp @@ -485,9 +485,11 @@ ? commonAlignment(MMO->getBaseAlign(), Offset) : MMO->getBaseAlign(); + // Do not preserve ranges, since we don't necessarily know what the high bits + // are anymore. return new (Allocator) MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, - Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(), + Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2373,7 +2373,6 @@ if (WideMemSize == ValSize) { MachineFunction &MF = B.getMF(); - // FIXME: This is losing AA metadata MachineMemOperand *WideMMO = MF.getMachineMemOperand(MMO, 0, WideMemSize / 8); Observer.changingInstr(MI); @@ -2388,7 +2387,6 @@ LLT WideTy = widenToNextPowerOf2(ValTy); - // FIXME: This is losing AA metadata Register WideLoad; if (!WideTy.isVector()) { WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir @@ -41,7 +41,7 @@ liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_range0_tbaa ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -61,7 +61,7 @@ liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_range1_tbaa ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1) @@ -75,7 +75,7 @@ liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_tbaa0 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -95,7 +95,7 @@ liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_tbaa1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=regbankselect %s -o - | FileCheck -check-prefix=SI %s + +--- | + + define amdgpu_ps i96 @split_smrd_load_range(i96 addrspace(4)* %ptr) { + %load = load i96, i96 addrspace(4)* %ptr, !range !0 + ret i96 %load + } + + define amdgpu_ps <3 x i32> @split_smrd_load_tbaa(<3 x i32> addrspace(4)* %ptr) { + %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, !tbaa !1 + ret <3 x i32> %load + } + + !0 = !{i96 0, i96 9223372036854775808} + !1 = !{!"omnipotent char", !2} + !2 = !{!"Simple C/C++ TBAA"} +... + +# Make sure range metadata is not preserved when widening loads, but +# tbaa is. + +--- +name: split_smrd_load_range +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; SI-LABEL: name: split_smrd_load_range + ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 8, align 8, addrspace 4) + ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !range !0) + $sgpr0_sgpr1_sgpr2 = COPY %1 + +... + +--- +name: split_smrd_load_tbaa +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; SI-LABEL: name: split_smrd_load_tbaa + ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, !tbaa !2, addrspace 4) + ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 8, align 8, !tbaa !2, addrspace 4) + ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !tbaa !1) + $sgpr0_sgpr1_sgpr2 = COPY %1 + +...