diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -941,9 +941,13 @@ // // Only 4-byte alignment is really needed to access anything. Transformations // on the pointer value itself may rely on the alignment / known low bits of - // the pointer. Set this to something above the minimum to avoid needing - // dynamic realignment in common cases. - Align getStackAlignment() const { return Align(16); } + // the pointer. Set this to something above the minimum for HSA to avoid + // needing dynamic realignment in common cases. + Align getStackAlignment() const { + if (isAmdHsaOS()) + return Align(16); + return Align(4); + } bool enableMachineScheduler() const override { return true; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -266,7 +266,7 @@ ; GFX900: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; GFX900: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; GFX900: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; GFX900: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GFX900: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -351,7 +351,7 @@ ; GFX908: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; GFX908: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; GFX908: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; GFX908: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GFX908: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -480,7 +480,7 @@ ; GFX900: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) - ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; GFX900: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) ; GFX900: $sgpr4_sgpr5 = COPY [[COPY26]](p4) @@ -605,7 +605,7 @@ ; GFX908: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) - ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; GFX908: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) ; GFX908: $sgpr4_sgpr5 = COPY [[COPY26]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2389,7 +2389,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2481,7 +2481,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) @@ -2574,7 +2574,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) @@ -3349,7 +3349,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -3444,7 +3444,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) @@ -3546,13 +3546,13 @@ ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) ; CHECK: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) ; CHECK: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 12, addrspace 5) @@ -3654,13 +3654,13 @@ ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) ; CHECK: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -3788,7 +3788,7 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) - ; CHECK: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -4270,13 +4270,13 @@ ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -4392,19 +4392,19 @@ ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -4533,19 +4533,19 @@ ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -4666,31 +4666,31 @@ ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store 4 into stack + 20, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, align 8, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store 4 into stack + 28, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, align 16, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -4807,31 +4807,31 @@ ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store 4 into stack + 20, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, align 8, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store 4 into stack + 28, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, align 16, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -759,7 +759,7 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1154,7 +1154,7 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) @@ -1590,7 +1590,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) @@ -1613,9 +1613,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1650,9 +1650,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) @@ -1706,11 +1706,11 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) @@ -1766,11 +1766,11 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 1 from %fixed-stack.2, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -1832,7 +1832,7 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -1888,11 +1888,11 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -1950,7 +1950,7 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -2006,19 +2006,19 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -2080,19 +2080,19 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -2150,35 +2150,35 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -2236,67 +2236,67 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.30, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.29, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.29, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.28, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.27, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.27, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.26, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.25, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.25, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.24, addrspace 5) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.23, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.23, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.22, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.21, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.21, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.20, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.19, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.19, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.18, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.16, addrspace 5) ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, addrspace 5) ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.13, addrspace 5) ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.11, addrspace 5) ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.9, addrspace 5) ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 @@ -2471,35 +2471,35 @@ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 2 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 2 from %fixed-stack.15, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 2 from %fixed-stack.14, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.13, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.12, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 2 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 2 from %fixed-stack.11, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 2 from %fixed-stack.10, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 2 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 2 from %fixed-stack.9, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 2 from %fixed-stack.8, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 2 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 2 from %fixed-stack.7, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 ; CHECK: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 2 from %fixed-stack.6, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 2 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 2 from %fixed-stack.5, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 2 from %fixed-stack.4, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 2 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 2 from %fixed-stack.3, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 2 from %fixed-stack.2, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 2 from %fixed-stack.1, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir @@ -1,8 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64 %s -# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64 %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32 %s -# XUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64-NONHSA %s +# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64-NONHSA %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32-NONHSA %s +# XUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32-NONHSA %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64-HSA %s +# XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64-HSA %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32-HSA %s +# XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32-HSA %s --- @@ -16,20 +20,34 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align1 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align1 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align1 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align1 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align1 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align1 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 1 S_ENDPGM 0, implicit %1 @@ -46,20 +64,34 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align2 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align2 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align2 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align2 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align2 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align2 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 2 S_ENDPGM 0, implicit %1 @@ -76,20 +108,34 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align4 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align4 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align4 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align4 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align4 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align4 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 4 S_ENDPGM 0, implicit %1 @@ -106,20 +152,38 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align8 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align8 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align8 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -512 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align8 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -256 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align8 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align8 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 8 S_ENDPGM 0, implicit %1 @@ -136,20 +200,38 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align16 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align16 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align16 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align16 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -512 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align16 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align16 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 16 S_ENDPGM 0, implicit %1 @@ -166,24 +248,42 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align32 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE64: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTRMASK]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align32 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 - ; WAVE32: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align32 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align32 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align32 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE64-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align32 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE32-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 32 S_ENDPGM 0, implicit %1 @@ -200,24 +300,42 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align64 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 - ; WAVE64: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTRMASK]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align64 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE32: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align64 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align64 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align64 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 + ; WAVE64-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align64 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE32-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 64 S_ENDPGM 0, implicit %1 @@ -234,24 +352,42 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_align128 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 - ; WAVE64: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTRMASK]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align128 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 - ; WAVE32: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align128 + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_align128 + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-NONHSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align128 + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE64-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 + ; WAVE64-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_align128 + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) + ; WAVE32-HSA: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 + ; WAVE32-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 128 S_ENDPGM 0, implicit %1 @@ -267,20 +403,34 @@ body: | bb.0: - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 4 S_ENDPGM 0, implicit %1 @@ -297,20 +447,38 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -512 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -256 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 8 S_ENDPGM 0, implicit %1 @@ -327,20 +495,38 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTR_ADD]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -512 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 16 S_ENDPGM 0, implicit %1 @@ -357,24 +543,42 @@ bb.0: liveins: $sgpr0 - ; WAVE64-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 - ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 - ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 - ; WAVE64: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE64: S_ENDPGM 0, implicit [[PTRMASK]](p5) - ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 - ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 - ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 - ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) - ; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 - ; WAVE32: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) - ; WAVE32: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 + ; WAVE64-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE64-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE64-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-NONHSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 + ; WAVE32-NONHSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-NONHSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-NONHSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-NONHSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-NONHSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NONHSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE32-NONHSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE32-NONHSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE64-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 + ; WAVE64-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE64-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; WAVE64-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE64-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE64-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-HSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 + ; WAVE64-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE64-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) + ; WAVE32-HSA-LABEL: name: test_dyn_stackalloc_sgpr_constant_align32 + ; WAVE32-HSA: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 + ; WAVE32-HSA: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 + ; WAVE32-HSA: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) + ; WAVE32-HSA: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg + ; WAVE32-HSA: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-HSA: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 + ; WAVE32-HSA: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) + ; WAVE32-HSA: S_ENDPGM 0, implicit [[PTRMASK]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 32 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -653,8 +653,8 @@ ; GCN-LABEL: {{^}}test_call_external_void_func_byval_struct_i8_i32: ; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 ; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 -; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], 0 offset:8 -; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], 0 offset:12 +; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], 0 offset:4 +; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], 0 offset:8 ; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], 0 offset:8 ; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], 0 offset:12 @@ -662,10 +662,11 @@ ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], 0 offset:8 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], 0 offset:12 -; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], 0 offset:8 -; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], 0 offset:12 +; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], 0 offset:4 +; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], 0 offset:8 -; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x400{{$}} +; HSA-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x400{{$}} +; MESA-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x300{{$}} ; HSA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]]{{$}} ; HSA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:4 @@ -686,22 +687,29 @@ } ; GCN-LABEL: {{^}}test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: -; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x800{{$}} +; HSA-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x800{{$}} +; MESA-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x500{{$}} ; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 ; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 -; GCN-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; HSA-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; HSA-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; MESA-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; MESA-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; HSA-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; HSA-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; MESA-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; MESA-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN-NOT: s_add_u32 [[SP]] ; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]]{{$}} ; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 ; GCN: s_swappc_b64 -; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 -; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:20 +; HSA-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 +; HSA-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:20 +; MESA-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; MESA-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 ; GCN-NOT: s_sub_u32 [[SP]] ; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off @@ -743,15 +751,19 @@ } ; GCN-LABEL: {{^}}tail_call_byval_align16: -; GCN-NOT: s32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; HSA-NOT: s32 +; HSA: buffer_load_dword v32, off, s[0:3], s32 offset:12 +; HSA: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; MESA: buffer_load_dword v32, off, s[0:3], s33 offset:12 +; MESA: buffer_load_dword v33, off, s[0:3], s33 offset:8 ; GCN: s_getpc_b64 -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}} -; GCN-NOT: s32 +; HSA: buffer_store_dword v32, off, s[0:3], s32 offset:4 +; HSA: buffer_store_dword v33, off, s[0:3], s32{{$}} +; MESA: buffer_store_dword v32, off, s[0:3], s34 offset:4 +; MESA: buffer_store_dword v33, off, s[0:3], s34{{$}} +; HSA-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -82,7 +82,7 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} +; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}} ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, @@ -94,7 +94,7 @@ ; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]] ; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]] -; GCN: s_sub_u32 s32, s32, 0x400{{$}} +; GCN: s_sub_u32 s32, s32, 0x300{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -120,7 +120,7 @@ ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-DAG: s_add_u32 s32, s32, 0x400 +; GCN-DAG: s_add_u32 s32, s32, 0x200 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]] ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 @@ -130,7 +130,7 @@ ; GCN-DAG: v_readlane_b32 s4, v40, 0 ; GCN-DAG: v_readlane_b32 s5, v40, 1 -; GCN: s_sub_u32 s32, s32, 0x400 +; GCN: s_sub_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]] ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload @@ -447,10 +447,10 @@ ; GCN-LABEL: {{^}}ipra_call_with_stack: ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN: s_mov_b32 s33, s32 -; GCN: s_add_u32 s32, s32, 0x400 +; GCN: s_add_u32 s32, s32, 0x200 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x400 +; GCN: s_sub_u32 s32, s32, 0x200 ; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -544,9 +544,10 @@ } ; GCN-LABEL: {{^}}void_func_byval_i32_byval_i64: -; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s32 offset:8{{$}} -; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s32 offset:12{{$}} +; GCN: s_mov_b32 s34, s32 +; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s34{{$}} +; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s34 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s34 offset:12{{$}} ; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -16,7 +16,7 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-DAG: v_writelane_b32 v40, s33, 2 ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x400 +; GCN-DAG: s_add_u32 s32, s32, 0x200 ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 @@ -25,7 +25,7 @@ ; GCN: v_readlane_b32 s4, v40, 0 ; GCN: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_sub_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -40,10 +40,10 @@ ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use: ; GCN: s_waitcnt ; GCN: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}} +; GCN-DAG: s_add_u32 s32, s32, 0x1200{{$}} ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x1400{{$}} +; GCN: s_sub_u32 s32, s32, 0x1200{{$}} ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { %alloca = alloca [16 x i32], align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -80,7 +80,7 @@ ; ASM: buffer_store_dword ; ASM: buffer_store_dword ; ASM: s_swappc_b64 -; ASM: ScratchSize: 16400 +; ASM: ScratchSize: 16396 define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 { entry: %tmp = alloca [2 x i32], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -39,7 +39,7 @@ ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 ; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9]+]] ; GFX6: NumSgprs: 48 -; GFX6: ScratchSize: 8624 +; GFX6: ScratchSize: 8608 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 { entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PAL %s ; Check that we properly realign the stack. While 4-byte access is all ; that is ever needed, some transformations rely on the known bits from the alignment of the pointer (e.g. @@ -9,8 +10,10 @@ ; = 144 bytes with padding between them ; GCN-LABEL: {{^}}needs_align16_default_stack_align: +; PAL: s_and_b32 s33, s{{[0-9]+}}, 0xfffffc00 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, v0 -; GCN-DAG: v_lshrrev_b32_e64 [[FRAMEDIFF:v[0-9]+]], 6, s32 +; PAL-DAG: v_lshrrev_b32_e64 [[FRAMEDIFF:v[0-9]+]], 6, s33 +; HSA-DAG: v_lshrrev_b32_e64 [[FRAMEDIFF:v[0-9]+]], 6, s32 ; GCN: v_add_u32_e32 [[FI:v[0-9]+]], vcc, [[FRAMEDIFF]], [[SCALED_IDX]] ; GCN-NOT: s32 @@ -21,9 +24,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen -; GCN-NOT: s32 +; HSA-NOT: s32 -; GCN: ; ScratchSize: 144 +; HSA: ; ScratchSize: 144 +; PAl: ; ScratchSize: 160 define void @needs_align16_default_stack_align(i32 %idx) #0 { %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], [8 x <4 x i32>] addrspace(5)* %alloca.align16, i32 0, i32 %idx @@ -76,12 +80,15 @@ ; GCN-LABEL: {{^}}force_realign4: ; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}} ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00 -; GCN: s_add_u32 s32, s32, 0xd00{{$}} +; HSA: s_add_u32 s32, s32, 0xd00{{$}} +; PAL: s_add_u32 s32, s32, 0xa00{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen -; GCN: s_sub_u32 s32, s32, 0xd00 +; HSA: s_sub_u32 s32, s32, 0xd00 +; PAL: s_sub_u32 s32, s32, 0xa00 -; GCN: ; ScratchSize: 52 +; HSA: ; ScratchSize: 52 +; PAL: ; ScratchSize: 40 define void @force_realign4(i32 %idx) #1 { %alloca.align16 = alloca [8 x i32], align 4, addrspace(5) %gep0 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca.align16, i32 0, i32 %idx @@ -90,7 +97,8 @@ } ; GCN-LABEL: {{^}}kernel_call_align16_from_8: -; GCN: s_movk_i32 s32, 0x400{{$}} +; HSA: s_movk_i32 s32, 0x400{{$}} +; PAL: s_movk_i32 s32, 0x200{{$}} ; GCN-NOT: s32 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_8() #0 { @@ -102,7 +110,8 @@ ; The call sequence should keep the stack on call aligned to 4 ; GCN-LABEL: {{^}}kernel_call_align16_from_5: -; GCN: s_movk_i32 s32, 0x400 +; HSA: s_movk_i32 s32, 0x400 +; PAL: s_movk_i32 s32, 0x200 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_5() { %alloca0 = alloca i8, align 1, addrspace(5) @@ -113,7 +122,8 @@ } ; GCN-LABEL: {{^}}kernel_call_align4_from_5: -; GCN: s_movk_i32 s32, 0x400 +; HSA: s_movk_i32 s32, 0x400 +; PAL: s_movk_i32 s32, 0x200 ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align4_from_5() { %alloca0 = alloca i8, align 1, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1065,8 +1065,8 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN: s_mov_b32 s33, s32 -; GFX1064: s_add_u32 s32, s32, 0x400 -; GFX1032: s_add_u32 s32, s32, 0x200 +; GFX1064: s_add_u32 s32, s32, 0x200 +; GFX1032: s_add_u32 s32, s32, 0x100 ; GCN-DAG: v_writelane_b32 v40, s30, 0 @@ -1076,8 +1076,8 @@ ; GCN-DAG: v_readlane_b32 s5, v40, 1 -; GFX1064: s_sub_u32 s32, s32, 0x400 -; GFX1032: s_sub_u32 s32, s32, 0x200 +; GFX1064: s_sub_u32 s32, s32, 0x200 +; GFX1032: s_sub_u32 s32, s32, 0x100 ; GCN: v_readlane_b32 s33, v40, 2 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}