Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -78,7 +78,7 @@ bool EnablePromoteAlloca; bool HasTrigReducedRange; unsigned MaxWavesPerEU; - int LocalMemorySize; + unsigned LocalMemorySize; char WavefrontSizeLog2; public: @@ -202,7 +202,7 @@ return WavefrontSizeLog2; } - int getLocalMemorySize() const { + unsigned getLocalMemorySize() const { return LocalMemorySize; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11425,6 +11425,13 @@ Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2()); break; } + case Intrinsic::amdgcn_groupstaticsize: { + // We can report everything over the maximum size as 0. We can't report + // based on the actual size because we don't know if it's accurate or not + // at any given point. + Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize())); + break; + } default: break; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --- name: remove_and_255_zextload @@ -180,3 +180,86 @@ $vgpr0 = COPY %and ... + +# Test known bits for groupstaticsize is the maximum LDS size. +--- +name: remove_and_65535_groupstaticsize +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: remove_and_65535_groupstaticsize + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK: %mask:_(s32) = G_CONSTANT i32 65535 + ; CHECK: %and:_(s32) = G_AND %lds_size, %mask + ; CHECK: $vgpr0 = COPY %and(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(s32) = G_CONSTANT i32 65535 + %and:_(s32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and + +... + +--- +name: remove_and_131071_groupstaticsize +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: remove_and_131071_groupstaticsize + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK: $vgpr0 = COPY %lds_size(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(s32) = G_CONSTANT i32 131071 + %and:_(s32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and + +... + +--- +name: no_remove_and_65536_groupstaticsize +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: no_remove_and_65536_groupstaticsize + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK: %mask:_(s32) = G_CONSTANT i32 65536 + ; CHECK: %and:_(s32) = G_AND %lds_size, %mask + ; CHECK: $vgpr0 = COPY %and(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(s32) = G_CONSTANT i32 65536 + %and:_(s32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and + +... + +--- +name: no_remove_and_32767_groupstaticsize +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: no_remove_and_32767_groupstaticsize + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + ; CHECK: %mask:_(s32) = G_CONSTANT i32 32767 + ; CHECK: %and:_(s32) = G_AND %lds_size, %mask + ; CHECK: $vgpr0 = COPY %and(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + %mask:_(s32) = G_CONSTANT i32 32767 + %and:_(s32) = G_AND %lds_size, %mask + $vgpr0 = COPY %and + +...