diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -756,6 +756,7 @@ bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo); bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + bool matchBitcastBitcastFold(MachineInstr &MI, Register &MatchInfo); /// Transform: /// (x + y) - y -> x diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -967,6 +967,12 @@ [{ return Helper.matchSubAddSameReg(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +def bitcast_bitcast_fold : GICombineRule< + (defs root:$op, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_BITCAST):$op, + [{ return Helper.matchBitcastBitcastFold(*${op}, ${matchinfo}); }]), + (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${matchinfo}); }])>; + def select_to_minmax: GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_SELECT):$root, @@ -993,7 +999,8 @@ fneg_fneg_fold, right_identity_one, add_sub_reg, buildvector_identity_fold, trunc_buildvector_fold, - trunc_lshr_buildvector_fold]>; + trunc_lshr_buildvector_fold, + bitcast_bitcast_fold]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and, mulo_by_2, mulo_by_0, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5929,6 +5929,21 @@ MatchTy == MRI.getType(MI.getOperand(0).getReg()); } +bool CombinerHelper::matchBitcastBitcastFold(MachineInstr &MI, + Register &MatchInfo) { + // Matches two bitcasts in a row that cancel each-other. + + Register Src = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = getDefIgnoringCopies(Src, MRI); + + if (SrcMI->getOpcode() != TargetOpcode::G_BITCAST) + return false; + + Register Dst = MI.getOperand(0).getReg(); + MatchInfo = SrcMI->getOperand(1).getReg(); + return MRI.getType(MatchInfo) == MRI.getType(Dst); +} + unsigned CombinerHelper::getFPMinMaxOpcForSelect( CmpInst::Predicate Pred, LLT DstTy, SelectPatternNaNBehaviour VsNaNRetVal) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-redundant-bitcast.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: s32_bitcast +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: s32_bitcast + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) + %src:_(<2 x s16>) = COPY $vgpr0 + %b1:_(s32) = G_BITCAST %src + %b2:_(<2 x s16>) = G_BITCAST %b1 + $vgpr0 = COPY %b2 +... + +--- +name: s64_bitcast +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: s64_bitcast + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %src(<2 x s32>) + %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %b1:_(s64) = G_BITCAST %src + %b2:_(<2 x s32>) = G_BITCAST %b1 + $vgpr0_vgpr1 = COPY %b2 +... + +--- +name: s64_bitcast_differentypes_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: s64_bitcast_differentypes_nofold + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %b1:_(s64) = G_BITCAST %src(<2 x s32>) + ; CHECK-NEXT: %b2:_(<4 x s16>) = G_BITCAST %b1(s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %b2(<4 x s16>) + %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %b1:_(s64) = G_BITCAST %src + %b2:_(<4 x s16>) = G_BITCAST %b1 + $vgpr0_vgpr1 = COPY %b2 +...