Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -777,6 +777,19 @@ /// (X ^ Y) != X -> Y != 0 bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform: + /// Given bw = bitwidth of the type: + /// select (icmp slt x, 0), x, 0 --> x & (x >> bw-1) + /// select (icmp sle x, 0), x, 0 --> x & (x >> bw-1) + bool matchSimplifySelectToAndShift(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform: + /// Given bw = bitwidth of the type: + /// select (icmp sgt x, 0), x, 0 --> x & ((~x) >> bw-1) + /// select (icmp sge x, 0), x, 0 --> x & ((~x) >> bw-1) + /// \note Should only be used by targets which have a single-instruction + /// representation of and-not. + bool matchSimplifySelectToAndNotShift(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -985,6 +985,19 @@ [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def select_to_and_shift: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSimplifySelectToAndShift(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Only add if target supports a single-instruction "and-not". +def select_to_and_not_shift: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSimplifySelectToAndNotShift(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1023,7 +1036,7 @@ def phi_combines : GICombineGroup<[extend_through_phis]>; def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, - select_to_logical]>; + select_to_logical, select_to_and_shift]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6081,6 +6081,61 @@ return CmpInst::isEquality(Pred) && Y.isValid(); } +bool CombinerHelper::matchSimplifySelectToAndShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Given bw = bitwidth of the type: + // select (icmp slt x, 0), x, 0 --> x & (x >> bw-1) + // select (icmp sle x, 0), x, 0 --> x & (x >> bw-1) + Register Dst = MI.getOperand(0).getReg(); + CmpInst::Predicate Pred; + Register X1, X2; + if (!mi_match(Dst, MRI, + m_GISelect(m_c_GICmp(m_Pred(Pred), m_Reg(X1), + m_SpecificICstOrSplat(0)), + m_Reg(X2), m_SpecificICstOrSplat(0))) || + X1 != X2) + return false; + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SLE) + return false; + LLT Ty = MRI.getType(Dst); + MatchInfo = [=](MachineIRBuilder &B) { + auto BW = B.buildConstant(Ty, Ty.getScalarSizeInBits() - 1); + auto Shift = B.buildAShr(Ty, X1, BW); + B.buildAnd(Dst, X1, Shift); + }; + return true; +} + +bool CombinerHelper::matchSimplifySelectToAndNotShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + // This combine should only be used by targets which have a single-instruction + // representation of "and-not". + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Given bw = bitwidth of the type: + // select (icmp sgt x, 0), x, 0 --> x & ((~x) >> bw-1) + // select (icmp sge x, 0), x, 0 --> x & ((~x) >> bw-1) + Register Dst = MI.getOperand(0).getReg(); + CmpInst::Predicate Pred; + Register X1, X2; + if (!mi_match(Dst, MRI, + m_GISelect(m_c_GICmp(m_Pred(Pred), m_Reg(X1), + m_SpecificICstOrSplat(0)), + m_Reg(X2), m_SpecificICstOrSplat(0))) || + X1 != X2 || !ICmpInst::isSigned(Pred)) + return false; + if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SGE) + return false; + LLT Ty = MRI.getType(Dst); + MatchInfo = [=](MachineIRBuilder &B) { + auto BW = B.buildConstant(Ty, Ty.getScalarSizeInBits() - 1); + auto Shift = B.buildAShr(Ty, X1, BW); + Shift = B.buildNot(Ty, Shift); + B.buildAnd(Dst, X1, Shift); + }; + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -35,6 +35,7 @@ def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, + select_to_and_not_shift, fconstant_to_constant, icmp_redundant_trunc, fold_global_offset]> { @@ -233,8 +234,8 @@ mul_const, redundant_sext_inreg, form_bitfield_extract, rotate_out_of_range, icmp_to_true_false_known_bits, merge_unmerge, - select_combines, fold_merge_to_zext, - constant_fold, identity_combines, + select_combines, select_to_and_not_shift, + fold_merge_to_zext, constant_fold, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, select_to_minmax]> { Index: llvm/test/CodeGen/AArch64/combine-select-to-and-shift.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/combine-select-to-and-shift.mir @@ -0,0 +1,290 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s + +# (z = x <= 0 ? z = x : z = 0) -> x & (x >> bw-1) +# (z = x >= 0 ? z = x : z = 0) -> x & ((!x) >> bw-1) + +... +--- +name: sgt +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sgt + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[C1]] + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[XOR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sgt), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sgt_commuted +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sgt_commuted + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[ASHR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sgt), %zero(s32), %x + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sgt_vector +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: sgt_vector + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR %x, [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[ASHR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: %select:_(<4 x s32>) = G_AND %x, [[XOR]] + ; CHECK-NEXT: $q0 = COPY %select(<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %x:_(<4 x s32>) = COPY $q0 + %zero:_(s32) = G_CONSTANT i32 0 + %bv_zero:_(<4 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32), %zero(s32), %zero(s32) + %cmp:_(<4 x s1>) = G_ICMP intpred(sgt), %x(<4 x s32>), %bv_zero + %select:_(<4 x s32>) = G_SELECT %cmp(<4 x s1>), %x, %bv_zero + $q0 = COPY %select(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: sge +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sge + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[C1]] + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[XOR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sge), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sge_commuted +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sge_commuted + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[ASHR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sge), %zero(s32), %x + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: slt +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: slt + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[ASHR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(slt), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: slt_commuted +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: slt_commuted + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[C1]] + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[XOR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(slt), %zero(s32), %x + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sle +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sle + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[ASHR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sle), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sle_commuted +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sle_commuted + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %x, [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[C1]] + ; CHECK-NEXT: %select:_(s32) = G_AND %x, [[XOR]] + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(sle), %zero(s32), %x + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: dont_combine_eq +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_combine_eq + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + ; CHECK-NEXT: %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + ; CHECK-NEXT: $w0 = COPY %select(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... +--- +name: dont_combine_unsigned +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_combine_unsigned + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ugt), %x(s32), %zero + ; CHECK-NEXT: %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + ; CHECK-NEXT: $w0 = COPY %select(s32) + %x:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(ugt), %x(s32), %zero + %select:_(s32) = G_SELECT %cmp(s1), %x, %zero + $w0 = COPY %select(s32) +... +--- +name: dont_combine_nonzero +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_combine_nonzero + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sgt), %x(s32), %one + ; CHECK-NEXT: %select:_(s32) = G_SELECT %cmp(s1), %x, %one + ; CHECK-NEXT: $w0 = COPY %select(s32) + %x:_(s32) = COPY $w0 + %one:_(s32) = G_CONSTANT i32 1 + %cmp:_(s1) = G_ICMP intpred(sgt), %x(s32), %one + %select:_(s32) = G_SELECT %cmp(s1), %x, %one + $w0 = COPY %select(s32)