Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -129,6 +129,10 @@ return KB; } + MachineIRBuilder &getBuilder() const { + return Builder; + } + const TargetLowering &getTargetLowering() const; /// \returns true if the combiner is running pre-legalization. Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -157,6 +157,22 @@ [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]), (apply [{ Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>; +def sext_inreg_to_zext_inreg : GICombineRule< + (defs root:$dst), + (match + (G_SEXT_INREG $dst, $src, $imm):$root, + [{ + unsigned BitWidth = MRI.getType(${src}.getReg()).getScalarSizeInBits(); + return Helper.getKnownBits()->maskedValueIsZero(${src}.getReg(), + APInt::getOneBitSet(BitWidth, ${imm}.getImm() - 1)); }]), + (apply [{ + Helper.getBuilder().setInstrAndDebugLoc(*${root}); + Helper.getBuilder().buildZExtInReg(${dst}, ${src}, ${imm}.getImm()); + ${root}->eraseFromParent(); + return true; + }]) +>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, @@ -1027,7 +1043,8 @@ def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>; + zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, narrow_binop_feeding_and]>; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir @@ -0,0 +1,111 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: sext_inreg_i32_8_and_neg255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_i32_8_and_neg255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %inreg:_(s32) = G_AND %load, [[C]] + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) + %mask:_(s32) = G_CONSTANT i32 -255 + %and:_(s32) = G_AND %load, %mask + %inreg:_(s32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg + +... + +--- +name: sext_inreg_i32_8_and_255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_i32_8_and_255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %and:_(s32) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4) + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %load, %mask + %inreg:_(s32) = G_SEXT_INREG %and, 8 + $vgpr0 = COPY %inreg + +... + +--- +name: sext_inreg_v2i32_8_and_neg255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_neg255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 -255 + ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) + ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_AND %and, [[BUILD_VECTOR]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) + %mask_elt:_(s32) = G_CONSTANT i32 -255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt + %and:_(<2 x s32>) = G_AND %load, %mask + %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg + +... + +--- +name: sext_inreg_v2i32_8_and_255 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_255 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32) + ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask + ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>) + %ptr:_(p1) = COPY $vgpr0_vgpr1 + %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8) + %mask_elt:_(s32) = G_CONSTANT i32 255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt + %and:_(<2 x s32>) = G_AND %load, %mask + %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8 + $vgpr0_vgpr1 = COPY %inreg + +...