Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -232,11 +232,6 @@ return false; Register Dst = MI.getOperand(0).getReg(); - if (MRI.getType(Dst).getScalarSizeInBits() < 32) { - LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet"); - return false; - } - MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, Dst, {InsMI->getOperand(2).getReg()}); return true; Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir @@ -292,3 +292,51 @@ %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 + +... +--- +name: splat_4xi16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $h0 + ; CHECK-LABEL: name: splat_4xi16 + ; CHECK: liveins: $h0 + ; CHECK: %copy:fpr(s16) = COPY $h0 + ; CHECK: %splat:fpr(<4 x s16>) = G_DUP %copy(s16) + ; CHECK: $d0 = COPY %splat(<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %copy:fpr(s16) = COPY $h0 + %undef:fpr(<4 x s16>) = G_IMPLICIT_DEF + %cst:gpr(s32) = G_CONSTANT i32 0 + %ins:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) + %splat:fpr(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) + $d0 = COPY %splat(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: splat_8xi8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0 + ; CHECK-LABEL: name: splat_8xi8 + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr(s32) = COPY $w0 + ; CHECK: %splat:fpr(<8 x s8>) = G_DUP %copy(s32) + ; CHECK: $d0 = COPY %splat(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %copy:gpr(s32) = COPY $w0 + %undef:fpr(<8 x s8>) = G_IMPLICIT_DEF + %cst:gpr(s32) = G_CONSTANT i32 0 + %ins:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) + %splat:fpr(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + $d0 = COPY %splat(<8 x s8>) + RET_ReallyLR implicit $d0 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir @@ -1,31 +1,31 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# GPR variants should not use INSERT_SUBREG. FPR variants (DUPlane) should. ... --- -name: splat_4xi32 -alignment: 4 +name: DUPv4i32gpr legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0.entry: liveins: $w0 - - ; CHECK-LABEL: name: splat_4xi32 + ; CHECK-LABEL: name: DUPv4i32gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[DUPv4i32gpr:%[0-9]+]]:fpr128 = DUPv4i32gpr [[COPY]] - ; CHECK: $q0 = COPY [[DUPv4i32gpr]] + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr128 = DUPv4i32gpr %copy + ; CHECK: $q0 = COPY %dup ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s32) = COPY $w0 - %4:fpr(<4 x s32>) = G_DUP %0(s32) - $q0 = COPY %4(<4 x s32>) + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<4 x s32>) = G_DUP %copy(s32) + $q0 = COPY %dup(<4 x s32>) RET_ReallyLR implicit $q0 ... --- -name: splat_2xi64 +name: DUPv2i64gpr alignment: 4 legalized: true regBankSelected: true @@ -33,21 +33,20 @@ body: | bb.0.entry: liveins: $x0 - - ; CHECK-LABEL: name: splat_2xi64 + ; CHECK-LABEL: name: DUPv2i64gpr ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY]] - ; CHECK: $q0 = COPY [[DUPv2i64gpr]] + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %dup:fpr128 = DUPv2i64gpr %copy + ; CHECK: $q0 = COPY %dup ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %4:fpr(<2 x s64>) = G_DUP %0(s64) - $q0 = COPY %4(<2 x s64>) + %copy:gpr(s64) = COPY $x0 + %dup:fpr(<2 x s64>) = G_DUP %copy(s64) + $q0 = COPY %dup(<2 x s64>) RET_ReallyLR implicit $q0 ... --- -name: splat_2xi32 +name: DUPv2i32gpr alignment: 4 legalized: true regBankSelected: true @@ -55,21 +54,20 @@ body: | bb.0.entry: liveins: $w0 - - ; CHECK-LABEL: name: splat_2xi32 + ; CHECK-LABEL: name: DUPv2i32gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[DUPv2i32gpr:%[0-9]+]]:fpr64 = DUPv2i32gpr [[COPY]] - ; CHECK: $d0 = COPY [[DUPv2i32gpr]] + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr64 = DUPv2i32gpr %copy + ; CHECK: $d0 = COPY %dup ; CHECK: RET_ReallyLR implicit $d0 - %0:gpr(s32) = COPY $w0 - %4:fpr(<2 x s32>) = G_DUP %0(s32) - $d0 = COPY %4(<2 x s32>) + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<2 x s32>) = G_DUP %copy(s32) + $d0 = COPY %dup(<2 x s32>) RET_ReallyLR implicit $d0 ... --- -name: splat_4xf32 +name: DUPv4i32lane alignment: 4 legalized: true regBankSelected: true @@ -78,22 +76,22 @@ bb.0.entry: liveins: $s0 - ; CHECK-LABEL: name: splat_4xf32 + ; CHECK-LABEL: name: DUPv4i32lane ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: %copy:fpr32 = COPY $s0 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub - ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0 - ; CHECK: $q0 = COPY [[DUPv4i32lane]] + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub + ; CHECK: %dup:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY %dup ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %4:fpr(<4 x s32>) = G_DUP %0(s32) - $q0 = COPY %4(<4 x s32>) + %copy:fpr(s32) = COPY $s0 + %dup:fpr(<4 x s32>) = G_DUP %copy(s32) + $q0 = COPY %dup(<4 x s32>) RET_ReallyLR implicit $q0 ... --- -name: splat_2xf64 +name: DUPv2i64lane alignment: 4 legalized: true regBankSelected: true @@ -101,23 +99,22 @@ body: | bb.0.entry: liveins: $d0 - - ; CHECK-LABEL: name: splat_2xf64 + ; CHECK-LABEL: name: DUPv2i64lane ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: %copy:fpr64 = COPY $d0 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0 - ; CHECK: $q0 = COPY [[DUPv2i64lane]] + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.dsub + ; CHECK: %dup:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY %dup ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %4:fpr(<2 x s64>) = G_DUP %0(s64) - $q0 = COPY %4(<2 x s64>) + %copy:fpr(s64) = COPY $d0 + %dup:fpr(<2 x s64>) = G_DUP %copy(s64) + $q0 = COPY %dup(<2 x s64>) RET_ReallyLR implicit $q0 ... --- -name: splat_2xf32 +name: DUPv2i32lane alignment: 4 legalized: true regBankSelected: true @@ -125,40 +122,145 @@ body: | bb.0.entry: liveins: $s0 - - ; CHECK-LABEL: name: splat_2xf32 + ; CHECK-LABEL: name: DUPv2i32lane ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: %copy:fpr32 = COPY $s0 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub - ; CHECK: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 - ; CHECK: $d0 = COPY [[DUPv2i32lane]] + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub + ; CHECK: %dup:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 + ; CHECK: $d0 = COPY %dup ; CHECK: RET_ReallyLR implicit $d0 - %0:fpr(s32) = COPY $s0 - %4:fpr(<2 x s32>) = G_DUP %0(s32) - $d0 = COPY %4(<2 x s32>) + %copy:fpr(s32) = COPY $s0 + %dup:fpr(<2 x s32>) = G_DUP %copy(s32) + $d0 = COPY %dup(<2 x s32>) RET_ReallyLR implicit $d0 + ... --- -name: splat_2xf64_copies +name: DUPv4i16lane alignment: 4 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0.entry: - liveins: $d0 + liveins: $h0 + ; CHECK-LABEL: name: DUPv4i16lane + ; CHECK: liveins: $h0 + ; CHECK: %copy:fpr16 = COPY $h0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub + ; CHECK: %dup:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0 + ; CHECK: $d0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $d0 + %copy:fpr(s16) = COPY $h0 + %dup:fpr(<4 x s16>) = G_DUP %copy(s16) + $d0 = COPY %dup(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: DUPv4i16gpr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv4i16gpr + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy + ; CHECK: $d0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $d0 + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<4 x s16>) = G_DUP %copy(s32) + $d0 = COPY %dup(<4 x s16>) + RET_ReallyLR implicit $d0 - ; CHECK-LABEL: name: splat_2xf64_copies - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 +... +--- +name: DUPv8i16lane +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $h0 + ; CHECK-LABEL: name: DUPv8i16lane + ; CHECK: liveins: $h0 + ; CHECK: %copy:fpr16 = COPY $h0 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0 - ; CHECK: $q0 = COPY [[DUPv2i64lane]] + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub + ; CHECK: %dup:fpr128 = DUPv8i16lane [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $q0 + %copy:fpr(s16) = COPY $h0 + %dup:fpr(<8 x s16>) = G_DUP %copy(s16) + $q0 = COPY %dup(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: DUPv8i16gpr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv8i16gpr + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr128 = DUPv8i16gpr %copy + ; CHECK: $q0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $q0 + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<8 x s16>) = G_DUP %copy(s32) + $q0 = COPY %dup(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: DUPv8i8gpr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv8i8gpr + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy + ; CHECK: $d0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $d0 + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<8 x s8>) = G_DUP %copy(s32) + $d0 = COPY %dup(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: DUPv16i8gpr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv16i8gpr + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr128 = DUPv16i8gpr %copy + ; CHECK: $q0 = COPY %dup ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %6:fpr(<2 x s64>) = G_DUP %0(s64) - $q0 = COPY %6(<2 x s64>) + %copy:gpr(s32) = COPY $w0 + %dup:fpr(<16 x s8>) = G_DUP %copy(s32) + $q0 = COPY %dup(<16 x s8>) RET_ReallyLR implicit $q0