diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -697,6 +697,9 @@ bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Fold boolean selects to logical operations. + bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -283,6 +283,13 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) >; +def select_to_logical : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) +>; + // Fold x op 0 -> x def right_identity_zero: GICombineRule< (defs root:$root), @@ -864,7 +871,8 @@ def phi_combines : GICombineGroup<[extend_through_phis]>; -def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; +def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, + select_to_logical]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5344,6 +5344,65 @@ return false; } +bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GSelect &Sel = cast(MI); + Register DstReg = Sel.getReg(0); + Register Cond = Sel.getCondReg(); + Register TrueReg = Sel.getTrueReg(); + Register FalseReg = Sel.getFalseReg(); + + auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); + auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); + if (!TrueDef || !FalseDef) + return false; + + const LLT CondTy = MRI.getType(Cond); + const LLT OpTy = MRI.getType(TrueReg); + if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) + return false; + + // We have a boolean select. + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); + if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, Cond, FalseReg); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); + if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, Cond, TrueReg); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (MaybeCstFalse && MaybeCstFalse->isOne()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (MaybeCstTrue && MaybeCstTrue->isZero()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); + }; + return true; + } + return false; +} + + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombinerhelper-only-enable-rule="select_to_logical" %s -o - | FileCheck %s # RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# REQUIRES: asserts --- # select (c, x, x) -> x name: test_combine_select_same_res @@ -92,3 +93,182 @@ %3:_(<4 x s32>) = G_SELECT %condvec, %0, %1 $q0 = COPY %3(<4 x s32>) ... +--- +# select Cond, Cond, F --> or Cond, F +name: bool_cond_cond_false +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_cond_false + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %sel:_(s1) = G_SELECT %c, %c, %f + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select Cond, 1, F --> or Cond, F +name: bool_cond_one_false +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_one_false + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %one:_(s1) = G_CONSTANT i1 1 + %sel:_(s1) = G_SELECT %c, %one, %f + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select Cond, 1, F --> or Cond, F +name: bool_cond_one_false_vector +body: | + bb.1: + liveins: $d0, $d1, $d2 + ; CHECK-LABEL: name: bool_cond_one_false_vector + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d2 + ; CHECK-NEXT: %c:_(<2 x s1>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[COPY1]](<2 x s32>) + ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, %f + ; CHECK-NEXT: %ext:_(<2 x s32>) = G_ANYEXT %sel(<2 x s1>) + ; CHECK-NEXT: $d0 = COPY %ext(<2 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = COPY $d2 + %c:_(<2 x s1>) = G_TRUNC %0 + %t:_(<2 x s1>) = G_TRUNC %1 + %f:_(<2 x s1>) = G_TRUNC %2 + %one:_(s1) = G_CONSTANT i1 1 + %one_vec:_(<2 x s1>) = G_BUILD_VECTOR %one, %one + %sel:_(<2 x s1>) = G_SELECT %c, %one_vec, %f + %ext:_(<2 x s32>) = G_ANYEXT %sel + $d0 = COPY %ext(<2 x s32>) +... +--- +# select Cond, T, Cond --> and Cond, T +name: bool_cond_true_cond +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_true_cond + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %one:_(s1) = G_CONSTANT i1 1 + %sel:_(s1) = G_SELECT %c, %t, %c + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select Cond, T, 0 --> and Cond, T +name: bool_cond_true_zero +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_true_zero + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %zero:_(s1) = G_CONSTANT i1 0 + %sel:_(s1) = G_SELECT %c, %t, %zero + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select Cond, T, 1 --> or (not Cond), T +name: bool_cond_true_one +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_true_one + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one + ; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], %t + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %one:_(s1) = G_CONSTANT i1 1 + %sel:_(s1) = G_SELECT %c, %t, %one + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# select Cond, 0, F --> and (not Cond), F +name: bool_cond_zero_false +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: bool_cond_zero_false + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]] + ; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], %f + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %c:_(s1) = G_TRUNC %0 + %t:_(s1) = G_TRUNC %1 + %f:_(s1) = G_TRUNC %2 + %zero:_(s1) = G_CONSTANT i1 0 + %sel:_(s1) = G_SELECT %c, %zero, %f + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll --- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -2159,8 +2159,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn w0, w1 ; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -2195,8 +2195,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x1 ; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -2231,8 +2231,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn w0, w1 ; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -2267,8 +2267,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x1 ; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -2303,8 +2303,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp w0, w1 ; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) @@ -2339,8 +2339,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp x0, x1 ; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) @@ -2375,8 +2375,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp w0, w1 ; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) @@ -2411,8 +2411,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp x0, x1 ; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) @@ -2451,7 +2451,8 @@ ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: asr x8, x8, #32 ; GISEL-NEXT: cmp w8, w9, asr #31 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) @@ -2491,7 +2492,8 @@ ; GISEL-NEXT: mul x8, x0, x1 ; GISEL-NEXT: smulh x9, x0, x1 ; GISEL-NEXT: cmp x9, x8, asr #63 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -2526,8 +2528,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x0 ; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) @@ -2565,7 +2567,8 @@ ; GISEL-NEXT: umull x8, w0, w1 ; GISEL-NEXT: lsr x8, x8, #32 ; GISEL-NEXT: cmp w8, #0 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) @@ -2602,7 +2605,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umulh x8, x0, x1 ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) @@ -2637,8 +2641,8 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x0 ; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)