Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -210,6 +210,16 @@ /// Replace an instruction with a G_IMPLICIT_DEF. bool replaceInstWithUndef(MachineInstr &MI); + /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. + bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by + /// equivalent instructions. + bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); + + /// Optimize (cond ? x : x) -> x + bool matchSelectSameVal(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -184,6 +184,14 @@ [{ return Helper.matchUndefShuffleVectorMask(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; +// Fold (cond ? x : x) -> x +def select_same_val: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelectSameVal(*${root}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(MI, 2); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -193,4 +201,5 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, - combines_for_extload, combine_indexed_load_store, undef_combines]>; + combines_for_extload, combine_indexed_load_store, undef_combines, + select_same_val]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1503,6 +1503,43 @@ return all_of(Mask, [](int Elt) { return Elt < 0; }); } +bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, + const MachineOperand &MOP2) { + if (!MOP1.isReg() || !MOP2.isReg()) + return false; + MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); + if (!I1) + return false; + MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); + if (!I2) + return false; + + // On the off-chance that there's some target instruction feeding into the + // select, let's use produceSameValue instead of isIdenticalTo. + return Builder.getTII().produceSameValue(*I1, *I2, &MRI); +} + +bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, + unsigned OpIdx) { + assert(OpIdx < MI.getNumOperands() && "OpIdx is out of bounds"); + assert(MI.getOperand(OpIdx).isReg() && "Expected register operand?"); + assert(MI.getNumDefs() == 1 && "Expected one def?"); + Register OldReg = MI.getOperand(0).getReg(); + Register Replacement = MI.getOperand(OpIdx).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + +bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Match (cond ? x : x) + return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), + MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: self +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %a) -> %a + ; CHECK-LABEL: name: self + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %a + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_with_copy +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %b) -> %a + ; + ; This shows that we are looking through copies correctly and deduce that + ; %b is a copy from %a. + ; + ; CHECK-LABEL: name: self_with_copy + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY %a + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_with_equivalent +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %b) -> %a + ; + ; This shows that we can detect when %a == %b, even though they define + ; different virtual registers. + ; + ; CHECK-LABEL: name: self_with_equivalent + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY $w0 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_not_equivalent +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; In this case, the copies are not equivalent, so there is no optimization. + ; CHECK-LABEL: name: self_not_equivalent + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: %b:_(s32) = COPY $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w1 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %select:_(s32) = G_SELECT %cond(s1), %a, %b + ; CHECK: $w0 = COPY %select(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY $w1 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0