Index: llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -38,6 +38,9 @@ const APInt &DemandedElts, unsigned Depth = 0); + unsigned computeNumSignBitsMin(Register Src0, Register Src1, + const APInt &DemandedElts, unsigned Depth = 0); + public: GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6); virtual ~GISelKnownBits() = default; Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -488,6 +488,17 @@ ComputeKnownBitsCache[R] = Known; } +/// Compute number of sign bits for the intersection of \p Src0 and \p Src1 +unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); + if (Src1SignBits == 1) + return 1; + return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -568,6 +579,11 @@ return NumSrcSignBits - (NumSrcBits - DstTyBits); break; } + case TargetOpcode::G_SELECT: { + return computeNumSignBitsMin(MI.getOperand(2).getReg(), + MI.getOperand(3).getReg(), DemandedElts, + Depth + 1); + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir @@ -242,3 +242,98 @@ $vgpr0 = COPY %2 ... + +--- +name: sext_inreg_s32_select_sextload_from_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + + ; GCN-LABEL: name: sext_inreg_s32_select_sextload_from_1 + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load 1, addrspace 1) + ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[SEXTLOAD1]] + ; GCN: $vgpr0 = COPY [[SELECT]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = COPY $vgpr2_vgpr3 + %2:_(s32) = COPY $vgpr4 + %3:_(s32) = COPY $vgpr5 + %4:_(s1) = G_ICMP intpred(eq), %2, %3 + %5:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %6:_(s32) = G_SEXTLOAD %1 :: (load 1, addrspace 1) + %7:_(s32) = G_SELECT %4, %5, %6 + %8:_(s32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8 + +... + +--- +name: sext_inreg_s32_select_sextload_from_1_fail_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + + ; GCN-LABEL: name: sext_inreg_s32_select_sextload_from_1_fail_lhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load 1, addrspace 1) + ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[LOAD]], [[SEXTLOAD]] + ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 + ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = COPY $vgpr2_vgpr3 + %2:_(s32) = COPY $vgpr4 + %3:_(s32) = COPY $vgpr5 + %4:_(s1) = G_ICMP intpred(eq), %2, %3 + %5:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) + %6:_(s32) = G_SEXTLOAD %1 :: (load 1, addrspace 1) + %7:_(s32) = G_SELECT %4, %5, %6 + %8:_(s32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8 + +... + +--- +name: sext_inreg_s32_select_sextload_from_1_fail_rhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + + ; GCN-LABEL: name: sext_inreg_s32_select_sextload_from_1_fail_rhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4, addrspace 1) + ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[LOAD]] + ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 + ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = COPY $vgpr2_vgpr3 + %2:_(s32) = COPY $vgpr4 + %3:_(s32) = COPY $vgpr5 + %4:_(s1) = G_ICMP intpred(eq), %2, %3 + %5:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %6:_(s32) = G_LOAD %1 :: (load 4, addrspace 1) + %7:_(s32) = G_SELECT %4, %5, %6 + %8:_(s32) = G_SEXT_INREG %7, 8 + $vgpr0 = COPY %8 + +...