Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -553,6 +553,12 @@ /// or false constant based off of KnownBits information. bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo); + /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of + /// KnownBits information. + bool + matchICmpToLHSKnownBits(MachineInstr &MI, + std::function &MatchInfo); + bool matchBitfieldExtractFromSExtInReg( MachineInstr &MI, std::function &MatchInfo); /// Match: and (lshr x, cst), mask -> ubfx x, cst, width Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -644,6 +644,12 @@ [{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; +def icmp_to_lhs_known_bits : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ICMP):$root, + [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + def bitfield_extract_from_and : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -702,7 +708,7 @@ def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits]>; + zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, narrow_binop_feeding_and]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4118,6 +4118,48 @@ return true; } +bool CombinerHelper::matchICmpToLHSKnownBits( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + // Given: + // + // %x = G_WHATEVER (... x is known to be 0 or 1 ...) + // %cmp = G_ICMP ne %x, 0 + // + // Or: + // + // %x = G_WHATEVER (... x is known to be 0 or 1 ...) + // %cmp = G_ICMP eq %x, 1 + // + // We can replace %cmp with %x assuming true is 1 on the target. + auto Pred = static_cast(MI.getOperand(1).getPredicate()); + if (!CmpInst::isEquality(Pred)) + return false; + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(), + /* IsFP = */ false) != 1) + return false; + int64_t OneOrZero = Pred == CmpInst::ICMP_EQ; + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero))) + return false; + Register LHS = MI.getOperand(2).getReg(); + auto KnownLHS = KB->getKnownBits(LHS); + if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1) + return false; + // Make sure replacing Dst with the LHS is a legal operation. + LLT LHSTy = MRI.getType(LHS); + const auto LHSSize = LHSTy.getSizeInBits(); + const auto DstSize = DstTy.getSizeInBits(); + unsigned Op = TargetOpcode::COPY; + if (DstSize != LHSSize) + Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT; + if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); }; + return true; +} + /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. bool CombinerHelper::matchBitfieldExtractFromSExtInReg( MachineInstr &MI, std::function &MatchInfo) { Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir @@ -0,0 +1,206 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="icmp_to_lhs_known_bits" -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: apply_ne +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: apply_ne + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one + ; CHECK: %cmp:_(s1) = G_TRUNC %known_zero_or_one(s32) + ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %known_zero_or_one:_(s32) = G_AND %x, %one + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %zero + %ext:_(s32) = G_ZEXT %cmp(s1) + $w0 = COPY %ext(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: apply_eq +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: apply_eq + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one + ; CHECK: %cmp:_(s1) = G_TRUNC %known_zero_or_one(s32) + ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %known_zero_or_one:_(s32) = G_AND %x, %one + %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %one + %ext:_(s32) = G_ZEXT %cmp(s1) + $w0 = COPY %ext(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_wrong_cst_eq +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; Wrong constant on the RHS of the compare. + + ; CHECK-LABEL: name: dont_apply_wrong_cst_eq + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one + ; CHECK: %wrong_cst:_(s32) = G_CONSTANT i32 10 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %wrong_cst + ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %known_zero_or_one:_(s32) = G_AND %x, %one + %wrong_cst:_(s32) = G_CONSTANT i32 10 + %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %wrong_cst + %ext:_(s32) = G_ZEXT %cmp(s1) + $w0 = COPY %ext(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_wrong_cst_ne +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; Wrong constant on the RHS of the compare. + + ; CHECK-LABEL: name: dont_apply_wrong_cst_ne + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one + ; CHECK: %wrong_cst:_(s32) = G_CONSTANT i32 10 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %wrong_cst + ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %known_zero_or_one:_(s32) = G_AND %x, %one + %wrong_cst:_(s32) = G_CONSTANT i32 10 + %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %wrong_cst + %ext:_(s32) = G_ZEXT %cmp(s1) + $w0 = COPY %ext(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_vector +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; True is -1 for vectors on AArch64 so we don't want to combine. + + ; CHECK-LABEL: name: dont_apply_vector + ; CHECK: liveins: $x0 + ; CHECK: %x:_(<2 x s32>) = COPY $x0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one(s32), %one(s32) + ; CHECK: %vec_and:_(<2 x s32>) = G_AND %x, %one_vec + ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK: %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32) + ; CHECK: %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec + ; CHECK: %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %zero(s32) + ; CHECK: %ext:_(s32) = G_ZEXT %elt(s1) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(<2 x s32>) = COPY $x0 + %one:_(s32) = G_CONSTANT i32 1 + %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one, %one + %vec_and:_(<2 x s32>) = G_AND %x, %one_vec + %zero:_(s32) = G_CONSTANT i32 0 + %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero + %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec + %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp, %zero + %ext:_(s32) = G_ZEXT %elt(s1) + $w0 = COPY %ext(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: apply_no_zext_or_trunc +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: apply_no_zext_or_trunc + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one + ; CHECK: %cmp:_(s32) = COPY %known_zero_or_one(s32) + ; CHECK: $w0 = COPY %cmp(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %known_zero_or_one:_(s32) = G_AND %x, %one + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s32) = G_ICMP intpred(ne), %known_zero_or_one(s32), %zero + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: apply_wide_cmp +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; Make sure we t + + ; CHECK-LABEL: name: apply_wide_cmp + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK: %known_zero_or_one:_(s64) = G_AND %x, %one + ; CHECK: %cmp:_(s64) = COPY %known_zero_or_one(s64) + ; CHECK: %trunc:_(s32) = G_TRUNC %cmp(s64) + ; CHECK: $w0 = COPY %trunc(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s64) = COPY $x0 + %one:_(s64) = G_CONSTANT i64 1 + %known_zero_or_one:_(s64) = G_AND %x, %one + %zero:_(s64) = G_CONSTANT i64 0 + %cmp:_(s64) = G_ICMP intpred(ne), %known_zero_or_one(s64), %zero + %trunc:_(s32) = G_TRUNC %cmp + $w0 = COPY %trunc(s32) + RET_ReallyLR implicit $w0