diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -755,6 +755,7 @@ bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo); bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo); /// Transform: /// (x + y) - y -> x diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -950,6 +950,12 @@ [{ return Helper.matchTruncBuildVectorFold(*${op}, ${matchinfo}); }]), (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${matchinfo}); }])>; +def trunc_lshr_buildvector_fold : GICombineRule< + (defs root:$op, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$op, + [{ return Helper.matchTruncLshrBuildVectorFold(*${op}, ${matchinfo}); }]), + (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${matchinfo}); }])>; + // Transform: // (x + y) - y -> x // (x + y) - x -> y @@ -986,7 +992,8 @@ i2p_to_p2i, anyext_trunc_fold, fneg_fneg_fold, right_identity_one, add_sub_reg, buildvector_identity_fold, - trunc_buildvector_fold]>; + trunc_buildvector_fold, + trunc_lshr_buildvector_fold]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and, mulo_by_2, mulo_by_0, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5914,6 +5914,21 @@ return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg()); } +bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI, + Register &MatchInfo) { + // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with + // y if K == size of vector element type + Optional ShiftAmt; + if (!mi_match(MI.getOperand(1).getReg(), MRI, + m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo))), + m_GCst(ShiftAmt)))) + return false; + + LLT MatchTy = MRI.getType(MatchInfo); + return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() && + MatchTy == MRI.getType(MI.getOperand(0).getReg()); +} + unsigned CombinerHelper::getFPMinMaxOpcForSelect( CmpInst::Predicate Pred, LLT DstTy, SelectPatternNaNBehaviour VsNaNRetVal) const { diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll --- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll +++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll @@ -118,8 +118,6 @@ } define i32 @f7() { -; FIXME: GlobalISel doesn't handle vectors well. -; ; CHECK-LABEL: f7: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, x3+108 @@ -128,12 +126,8 @@ ; ; GISEL-LABEL: f7: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adrp x8, x3+88 -; GISEL-NEXT: add x8, x8, :lo12:x3+88 -; GISEL-NEXT: mov v0.d[1], x8 -; GISEL-NEXT: mov d0, v0.d[1] -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: ldr w0, [x8, #20] +; GISEL-NEXT: adrp x8, x3+108 +; GISEL-NEXT: ldr w0, [x8, :lo12:x3+108] ; GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir @@ -29,6 +29,73 @@ $vgpr0 = COPY %9 ... +--- +name: s16_trunc_v2s16_buildvector_shift8_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector_shift8_nofold + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC2]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + %5:_(s32) = G_BITCAST %4 + %6:_(s32) = G_CONSTANT i32 8 + %7:_(s32) = G_LSHR %5, %6 + %8:_(s16) = G_TRUNC %7 + %9:_(s16) = G_CONSTANT i16 42 + %10:_(s16) = G_OR %9, %8 + %11:_(s32) = G_ZEXT %10 + $vgpr0 = COPY %11 +... + +--- +name: s16_trunc_v2s16_buildvector_shift16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector_shift16 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + %5:_(s32) = G_BITCAST %4 + %6:_(s32) = G_CONSTANT i32 16 + %7:_(s32) = G_LSHR %5, %6 + %8:_(s16) = G_TRUNC %7 + %9:_(s16) = G_CONSTANT i16 42 + %10:_(s16) = G_OR %9, %8 + %11:_(s32) = G_ZEXT %10 + $vgpr0 = COPY %11 +... + --- name: s16_trunc_v2s32_buildvector_nofold tracksRegLiveness: true