diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -328,6 +328,9 @@ unsigned TypeIdx, LLT NarrowTy); + /// Equalize source and destination vector sizes of G_SHUFFLE_VECTOR. + LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI); + LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4978,9 +4978,8 @@ } } -/// Expand source vectors to the size of destination vector. -static LegalizerHelper::LegalizeResult -equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) { +LegalizerHelper::LegalizeResult +LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) { MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); @@ -4991,10 +4990,24 @@ Register DstReg = MI.getOperand(0).getReg(); LLT DestEltTy = DstTy.getElementType(); - // TODO: Normalize the shuffle vector since mask and vector length don't - // match. - if (MaskNumElts <= SrcNumElts) { - return LegalizerHelper::LegalizeResult::UnableToLegalize; + if (MaskNumElts == SrcNumElts) + return Legalized; + + if (MaskNumElts < SrcNumElts) { + // Extend mask to match new destination vector size with + // undef values. + SmallVector NewMask(Mask); + for (unsigned I = MaskNumElts; I < SrcNumElts; ++I) + NewMask.push_back(-1); + + moreElementsVectorDst(MI, SrcTy, 0); + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg(), + MI.getOperand(2).getReg(), NewMask); + MI.eraseFromParent(); + + return Legalized; } unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); @@ -5055,8 +5068,8 @@ unsigned WidenNumElts = MoreTy.getNumElements(); if (DstTy.isVector() && Src1Ty.isVector() && - DstTy.getNumElements() > Src1Ty.getNumElements()) { - return equalizeVectorShuffleLengths(MI, MIRBuilder); + DstTy.getNumElements() != Src1Ty.getNumElements()) { + return equalizeVectorShuffleLengths(MI); } if (TypeIdx != 0) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -730,7 +730,14 @@ changeTo(1, 0)) .moreElementsToNextPow2(0) .clampNumElements(0, v4s32, v4s32) - .clampNumElements(0, v2s64, v2s64); + .clampNumElements(0, v2s64, v2s64) + .moreElementsIf( + [](const LegalityQuery &Query) { + return Query.Types[0].isVector() && Query.Types[1].isVector() && + Query.Types[0].getNumElements() < + Query.Types[1].getNumElements(); + }, + changeTo(0, 1)); getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}}); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -460,3 +460,144 @@ RET_ReallyLR ... +--- +name: shuffle_v3i32_v4i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: shuffle_v3i32_v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) + ; CHECK-NEXT: $w0 = COPY [[UV]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<3 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4) + %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %2(<3 x s32>) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: shuffle_v2i32_v4i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: shuffle_v2i32_v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 4, undef, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) + ; CHECK-NEXT: $d0 = COPY [[UV]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v4i16_v8i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: shuffle_v4i16_v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(15, 14, 13, 4, undef, undef, undef, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[SHUF]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %2:_(<4 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %1, shufflemask(15, 14, 13, 4) + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: shuffle_v8i8_v16i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: shuffle_v8i8_v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(7, 13, 6, 4, 17, 3, 0, 0, undef, undef, undef, undef, undef, undef, undef, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[SHUF]](<16 x s8>) + ; CHECK-NEXT: $d0 = COPY [[UV]](<8 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(7, 13, 6, 4, 17, 3, 0, 0) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: size_shuffle_v4i32_v6i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0, $s1, $s2, $s3, $s4, $s5 + + ; CHECK-LABEL: name: size_shuffle_v4i32_v6i32 + ; CHECK: liveins: $s0, $s1, $s2, $s3, $s4, $s5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $s1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $s3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $s4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $s5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[C1]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) + ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR3]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %3:_(s32) = COPY $s0 + %4:_(s32) = COPY $s1 + %5:_(s32) = COPY $s2 + %6:_(s32) = COPY $s3 + %7:_(s32) = COPY $s4 + %8:_(s32) = COPY $s5 + %0:_(<6 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32), %7(s32), %8(s32) + %19:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<6 x s32>), %0, shufflemask(3, 4, 7, 0) + $q0 = COPY %19(<4 x s32>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -47,13 +47,23 @@ ret <2 x i64> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} sabdl2_8h define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sabdl2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: sabdl.8h v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: sabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.8h v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl2_8h: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: sabdl.8h v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> @@ -63,13 +73,23 @@ ret <8 x i16> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} sabdl2_4s define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sabdl2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: sabdl.4s v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: sabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.4s v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl2_4s: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: sabdl.4s v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -79,13 +99,23 @@ ret <4 x i32> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} sabdl2_2d define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sabdl2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: sabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: sabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl2_2d: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: sabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -140,13 +170,23 @@ ret <2 x i64> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} uabdl2_8h define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: uabdl2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: uabdl.8h v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: uabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.8h v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl2_8h: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: uabdl.8h v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> @@ -157,13 +197,23 @@ ret <8 x i16> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} uabdl2_4s define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: uabdl2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: uabdl.4s v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: uabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.4s v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl2_4s: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: uabdl.4s v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -173,13 +223,23 @@ ret <4 x i32> %tmp4 } +; FALLBACK-NOT: remark:{{.*}} uabdl2_2d define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: uabdl2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: uabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: uabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl2_2d: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -971,14 +1031,25 @@ ret <2 x i64> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} sabal2_8h define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sabal2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sabal.8h v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: sabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.8h v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabal2_8h: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: sabal.8h v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B %tmp3 = load <8 x i16>, ptr %C @@ -990,14 +1061,25 @@ ret <8 x i16> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} sabal2_4s define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sabal2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sabal.4s v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: sabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.4s v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabal2_4s: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: sabal.4s v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -1009,14 +1091,25 @@ ret <4 x i32> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} sabal2_2d define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sabal2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sabal.2d v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: sabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.2d v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabal2_2d: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: sabal.2d v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -1106,14 +1199,25 @@ ret <2 x i64> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} uabal2_8h define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: uabal2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: uabal.8h v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: uabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.8h v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabal2_8h: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: uabal.8h v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B %tmp3 = load <8 x i16>, ptr %C @@ -1125,14 +1229,25 @@ ret <8 x i16> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} uabal2_4s define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: uabal2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: uabal.4s v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: uabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.4s v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabal2_4s: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: uabal.4s v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -1144,14 +1259,25 @@ ret <4 x i32> %tmp5 } +; FALLBACK-NOT: remark:{{.*}} uabal2_2d define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: uabal2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: uabal.2d v0, v1, v2 -; CHECK-NEXT: ret +; DAG-LABEL: uabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.2d v0, v1, v2 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabal2_2d: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q1, [x1] +; GISEL-NEXT: ext.16b v2, v0, v0, #8 +; GISEL-NEXT: ext.16b v1, v1, v0, #8 +; GISEL-NEXT: ldr q0, [x2] +; GISEL-NEXT: uabal.2d v0, v2, v1 +; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -1496,10 +1622,12 @@ declare float @llvm.fabs.f32(float) nounwind readnone declare double @llvm.fabs.f64(double) nounwind readnone +; FALLBACK-NOT: remark:{{.*}} uabdl_from_extract_dup define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: uabdl_from_extract_dup: ; CHECK: // %bb.0: ; CHECK-NEXT: dup.2s v1, w0 +; GISEL-NEXT: ext.16b v0, v0, v0, #0 ; CHECK-NEXT: uabdl.2d v0, v0, v1 ; CHECK-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 @@ -1512,12 +1640,20 @@ ret <2 x i64> %res1 } +; FALLBACK-NOT: remark:{{.*}} uabdl2_from_extract_dup define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: uabdl2_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.4s v1, w0 -; CHECK-NEXT: uabdl2.2d v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: uabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: uabdl2.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl2_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -1528,10 +1664,12 @@ ret <2 x i64> %res1 } +; FALLBACK-NOT: remark:{{.*}} sabdl_from_extract_dup define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: sabdl_from_extract_dup: ; CHECK: // %bb.0: ; CHECK-NEXT: dup.2s v1, w0 +; GISEL-NEXT: ext.16b v0, v0, v0, #0 ; CHECK-NEXT: sabdl.2d v0, v0, v1 ; CHECK-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 @@ -1544,12 +1682,20 @@ ret <2 x i64> %res1 } +; FALLBACK-NOT: remark:{{.*}} sabdl2_from_extract_dup define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: sabdl2_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.4s v1, w0 -; CHECK-NEXT: sabdl2.2d v0, v0, v1 -; CHECK-NEXT: ret +; DAG-LABEL: sabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: sabdl2.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl2_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: sabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -31,7 +31,8 @@ ; ; GISEL-LABEL: test_vcvt_high_f64_f32: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double> @@ -79,7 +80,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> %bc2 = bitcast <2 x i32> %ext to <2 x float> @@ -95,7 +97,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %bc2 = bitcast <4 x i16> %ext to <2 x float> @@ -111,7 +114,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> %bc2 = bitcast <8 x i8> %ext to <2 x float> @@ -143,7 +147,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.4s, v0.4h ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> %bc2 = bitcast <2 x i32> %ext to <4 x half> @@ -159,7 +164,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.4s, v0.4h ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %bc2 = bitcast <4 x i16> %ext to <4 x half> @@ -175,7 +181,8 @@ ; ; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h +; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: fcvtl v0.4s, v0.4h ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> %bc2 = bitcast <8 x i8> %ext to <4 x half>