Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -135,13 +135,22 @@ (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }]) >; +def build_vector_to_dup : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_BUILD_VECTOR):$root, + [{ return matchBuildVectorToDup(*${root}, MRI); }]), + (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }]) +>; + +def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. def AArch64PostLegalizerLoweringHelper : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", [shuffle_vector_lowering, vashr_vlshr_imm, - icmp_lowering]> { + icmp_lowering, build_vector_lowering]> { let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; } Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -696,6 +696,26 @@ return true; } +static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + // Constant splats should be selected to MOV. + auto Reg = MI.getOperand(1).getReg(); + auto Range = make_range(MI.operands_begin() + 2, MI.operands_end()); + return !getConstantVRegValWithLookThrough(Reg, MRI) && + all_of(Range, [&Reg](const MachineOperand &Op) { + return Op.getReg() == Reg; + }); +} + +static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) { + B.setInstrAndDebugLoc(MI); + B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, + {MI.getOperand(1).getReg()}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGILowering.inc" #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: same_reg +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: same_reg + ; CHECK: liveins: $d0 + ; CHECK: %r:_(s8) = G_IMPLICIT_DEF + ; CHECK: %build_vector:_(<8 x s8>) = G_DUP %r(s8) + ; CHECK: $d0 = COPY %build_vector(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %r:_(s8) = G_IMPLICIT_DEF + %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r + $d0 = COPY %build_vector(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_different_reg +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0, $w1 + ; CHECK-LABEL: name: dont_combine_different_reg + ; CHECK: liveins: $d0, $w0, $w1 + ; CHECK: %r:_(s32) = COPY $w0 + ; CHECK: %q:_(s32) = COPY $w1 + ; CHECK: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32) + ; CHECK: $d0 = COPY %build_vector(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %r:_(s32) = COPY $w0 + %q:_(s32) = COPY $w1 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q + $d0 = COPY %build_vector(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_constant_splat +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; These can be better selected as a MOV. + + ; CHECK-LABEL: name: dont_combine_constant_splat + ; CHECK: liveins: $d0 + ; CHECK: %cst:_(s32) = G_CONSTANT i32 0 + ; CHECK: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32) + ; CHECK: $d0 = COPY %build_vector(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %cst:_(s32) = G_CONSTANT i32 0 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + $d0 = COPY %build_vector(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_constant_fp_splat +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + + ; These can be better selected as a MOV. + + ; CHECK-LABEL: name: dont_combine_constant_fp_splat + ; CHECK: liveins: $d0 + ; CHECK: %cst:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32) + ; CHECK: $d0 = COPY %build_vector(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %cst:_(s32) = G_FCONSTANT float 0.0 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + $d0 = COPY %build_vector(<2 x s32>) + RET_ReallyLR implicit $d0