Index: llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -34,6 +34,10 @@ /// Cache maintained during a computeKnownBits request. SmallDenseMap ComputeKnownBitsCache; + void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + public: GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6); virtual ~GISelKnownBits() = default; Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -94,6 +94,25 @@ << "\n"; } +/// Compute known bits for the intersection of \p Src0 and \p Src1 +void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, + KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); + + // If we don't know any bits, early out. + if (Known.isUnknown()) + return; + + KnownBits Known2; + computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); + + // Only known if known in both the LHS and RHS. + Known &= Known2; +} + void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth) { @@ -284,15 +303,16 @@ break; } case TargetOpcode::G_SELECT: { - computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, - Depth + 1); - // If we don't know any bits, early out. - if (Known.isUnknown()) - break; - computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, - Depth + 1); - // Only known if known in both the LHS and RHS. - Known &= Known2; + computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), + Known, DemandedElts, Depth + 1); + break; + } + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { + computeKnownBitsMin(MI.getOperand(1).getReg(), MI.getOperand(2).getReg(), + Known, DemandedElts, Depth + 1); break; } case TargetOpcode::G_FCMP: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -20,3 +20,163 @@ $vgpr0 = COPY %and ... + +--- +name: remove_and_255_smin_zextload +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_smin_zextload + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 + ; CHECK: $vgpr0 = COPY %smin(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %smin:_(s32) = G_SMIN %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %smin, %mask + $vgpr0 = COPY %and + +... + +--- +name: remove_and_255_smax_zextload +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_smax_zextload + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %smax:_(s32) = G_SMAX %load0, %load1 + ; CHECK: $vgpr0 = COPY %smax(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %smax:_(s32) = G_SMAX %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %smax, %mask + $vgpr0 = COPY %and + +... + +--- +name: remove_and_255_umin_zextload +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_umin_zextload + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %umin:_(s32) = G_UMIN %load0, %load1 + ; CHECK: $vgpr0 = COPY %umin(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %umin:_(s32) = G_UMIN %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %umin, %mask + $vgpr0 = COPY %and + +... + +--- +name: remove_and_255_umax_zextload +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_umax_zextload + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %umax:_(s32) = G_UMAX %load0, %load1 + ; CHECK: $vgpr0 = COPY %umax(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %umax:_(s32) = G_UMAX %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %umax, %mask + $vgpr0 = COPY %and + +... + +# Don't have enough known bits for lhs +--- +name: remove_and_255_smin_fail_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_smin_fail_lhs + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load 4, addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 + ; CHECK: $vgpr0 = COPY %smin(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_LOAD %ptr0 :: (load 4, addrspace 1, align 4) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %smin:_(s32) = G_SMIN %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %smin, %mask + $vgpr0 = COPY %and + +... + +# Don't have enough known bits for rhs +--- +name: remove_and_255_smin_fail_rhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: remove_and_255_smin_fail_rhs + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) + ; CHECK: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load 4, addrspace 1) + ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 + ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK: %and:_(s32) = G_AND %smin, %mask + ; CHECK: $vgpr0 = COPY %and(s32) + %ptr0:_(p1) = COPY $vgpr0_vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) + %load1:_(s32) = G_LOAD %ptr1 :: (load 4, addrspace 1, align 4) + %smin:_(s32) = G_SMIN %load0, %load1 + %mask:_(s32) = G_CONSTANT i32 255 + %and:_(s32) = G_AND %smin, %mask + $vgpr0 = COPY %and + +...