Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -283,6 +283,9 @@ /// True iff the specified type indices are both the same bit size. LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1); +/// True iff all specified types are identical. +LegalityPredicate sameType(unsigned TypeIdx0, unsigned TypeIdx1); + /// True iff the first type index has a larger total bit size than second type /// index. LegalityPredicate largerThan(unsigned TypeIdx0, unsigned TypeIdx1); Index: llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -174,6 +174,14 @@ }; } +LegalityPredicate LegalityPredicates::sameType(unsigned TypeIdx0, + unsigned TypeIdx1) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx0] == + Query.Types[TypeIdx1]; + }; +} + LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -669,19 +669,9 @@ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower(); getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) - .legalIf([=](const LegalityQuery &Query) { - const LLT &DstTy = Query.Types[0]; - const LLT &SrcTy = Query.Types[1]; - // For now just support the TBL2 variant which needs the source vectors - // to be the same size as the dest. - if (DstTy != SrcTy) - return false; - for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { - if (DstTy == Ty) - return true; - } - return false; - }) + // For now just support the TBL2 variant which needs the source vectors + // to be the same size as the dest. + .legalIf(all(typeInSet(0, PackedVectorAllTypeList), sameType(0, 1))) // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we // just want those lowered into G_BUILD_VECTOR .lowerIf([=](const LegalityQuery &Query) { Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -212,3 +212,64 @@ RET_ReallyLR ... +--- +name: shuffle_v4i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: shuffle_v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s16>), [[COPY1]], shufflemask(0, 0, 0, 0) + ; CHECK: $d0 = COPY [[SHUF]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<4 x s16>) = G_SHUFFLE_VECTOR %0(<4 x s16>), %1, shufflemask(0, 0, 0, 0) + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v8i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: shuffle_v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s8>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + ; CHECK: $d0 = COPY [[SHUF]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v2i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: shuffle_v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 0) + ; CHECK: $d0 = COPY [[SHUF]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 0) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir @@ -27,6 +27,9 @@ ret <2 x i64> %shuf } + define void @shuffle_v4i16() { unreachable } + define void @shuffle_v8i8() { unreachable } + define void @shuffle_v2i32() { unreachable } ... --- name: shuffle_v2f32 @@ -39,15 +42,11 @@ liveins: $d0, $d1 ; CHECK-LABEL: name: shuffle_v2f32 - ; CHECK: constants: - ; CHECK: - id: 0 - ; CHECK: value: '<8 x i8> ' - ; CHECK: alignment: 8 ; CHECK: liveins: $d0, $d1 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 8 from constant-pool) ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -77,15 +76,11 @@ liveins: $q0, $q1 ; CHECK-LABEL: name: shuffle_v4i32 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> ' - ; CHECK: alignment: 16 - ; CHECK: isTargetSpecific: false ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 16 from constant-pool) ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] ; CHECK: $q0 = COPY [[TBLv16i8Two]] @@ -108,15 +103,11 @@ liveins: $q0, $q1 ; CHECK-LABEL: name: shuffle_tbl_v4i32 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> ' - ; CHECK: alignment: 16 - ; CHECK: isTargetSpecific: false ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 16 from constant-pool) ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] ; CHECK: $q0 = COPY [[TBLv16i8Two]] @@ -139,15 +130,11 @@ liveins: $q0, $q1 ; CHECK-LABEL: name: shuffle_v2i64 - ; CHECK: constants: - ; CHECK: value: '<16 x i8> ' - ; CHECK: alignment: 16 - ; CHECK: isTargetSpecific: false ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 16 from constant-pool) ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[COPY]], %subreg.qsub0, [[COPY1]], %subreg.qsub1 ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] ; CHECK: $q0 = COPY [[TBLv16i8Two]] @@ -159,3 +146,101 @@ RET_ReallyLR implicit $q0 ... +--- +name: shuffle_v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 8 from constant-pool) + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub + ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[LDRDui]], %subreg.dsub + ; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[INSERT_SUBREG2]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub + ; CHECK: $d0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_SHUFFLE_VECTOR %0(<4 x s16>), %1, shufflemask(0, 0, 0, 0) + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 8 from constant-pool) + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub + ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[LDRDui]], %subreg.dsub + ; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[INSERT_SUBREG2]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub + ; CHECK: $d0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: shuffle_v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 + ; CHECK: $d0 = COPY [[DUPv2i32lane]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %4:fpr(<2 x s32>) = G_IMPLICIT_DEF + %5:fpr(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %4(<2 x s32>) + %3:gpr(s64) = G_CONSTANT i64 0 + %2:fpr(<2 x s32>) = G_DUPLANE32 %5, %3(s64) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +...