diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -228,6 +228,9 @@ /// Optimize (cond ? x : x) -> x bool matchSelectSameVal(MachineInstr &MI); + /// Optimize (x op x) -> x + bool matchBinOpSameVal(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -200,6 +200,14 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold (x op x) - > x +def binop_same_val: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_AND, G_OR):$root, + [{ return Helper.matchBinOpSameVal(*${root}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -207,7 +215,8 @@ propagate_undef_all_ops, propagate_undef_shuffle_mask]>; -def identity_combines : GICombineGroup<[select_same_val, right_identity_zero]>; +def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, + binop_same_val]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1574,6 +1574,12 @@ MRI); } +bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { + return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), + MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -153,7 +153,9 @@ define void @nonpow2_or_narrowing() { %a = add i128 undef, undef %b = trunc i128 %a to i96 - %dummy = or i96 %b, %b + %a2 = add i128 undef, undef + %b2 = trunc i128 %a2 to i96 + %dummy = or i96 %b, %b2 store i96 %dummy, i96* undef ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir @@ -0,0 +1,96 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +name: or_same +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; Fold: x or x -> x + ; CHECK-LABEL: name: or_same + ; CHECK: liveins: $x0 + ; CHECK: %copy:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY %copy(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %copy:_(s64) = COPY $x0 + %or:_(s64) = G_OR %copy, %copy + $x0 = COPY %or(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: and_same +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; Fold: x and x -> x + + ; CHECK-LABEL: name: and_same + ; CHECK: liveins: $x0 + ; CHECK: %copy:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY %copy(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %copy:_(s64) = COPY $x0 + %and:_(s64) = G_AND %copy, %copy + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: and_same2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; We can fold when the LHS and RHS are guaranteed to be identical. + + ; CHECK-LABEL: name: and_same2 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %copy1:_(s64) = COPY $x0 + ; CHECK: %copy2:_(s64) = COPY $x1 + ; CHECK: %or:_(s64) = G_OR %copy1, %copy2 + ; CHECK: $x0 = COPY %or(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %copy1:_(s64) = COPY $x0 + %copy2:_(s64) = COPY $x1 + %or:_(s64) = G_OR %copy1, %copy2 + %same_as_or:_(s64) = COPY %or(s64) + %and:_(s64) = G_AND %or, %same_as_or + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: or_and_not_same +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; None of the G_ORs or G_ANDs should be eliminated here, because their LHS + ; and RHS values are different. + + ; CHECK-LABEL: name: or_and_not_same + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %copy1:_(s64) = COPY $x0 + ; CHECK: %copy2:_(s64) = COPY $x1 + ; CHECK: %copy3:_(s64) = COPY $x2 + ; CHECK: %or1:_(s64) = G_OR %copy1, %copy2 + ; CHECK: %or2:_(s64) = G_OR %copy1, %copy3 + ; CHECK: %and:_(s64) = G_AND %or1, %or2 + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %copy1:_(s64) = COPY $x0 + %copy2:_(s64) = COPY $x1 + %copy3:_(s64) = COPY $x2 + %or1:_(s64) = G_OR %copy1, %copy2 + %or2:_(s64) = G_OR %copy1, %copy3 + %and:_(s64) = G_AND %or1, %or2 + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +...