Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -900,17 +900,21 @@ // same sequence. Search for elements using findValueFromDefImpl. bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, GUnmerge *Unmerge, unsigned UnmergeIdxStart, - unsigned NumElts, unsigned EltSize) { + unsigned NumElts, unsigned EltSize, + bool AllowUndef) { assert(MergeStartIdx + NumElts <= MI.getNumSources()); for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { unsigned EltUnmergeIdx; GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( MI.getSourceReg(i), EltSize, EltUnmergeIdx); // Check if source i comes from the same Unmerge. - if (!EltUnmerge || EltUnmerge != Unmerge) - return false; - // Check that source i's def has same index in sequence in Unmerge. - if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) + if (EltUnmerge == Unmerge) { + // Check that source i's def has same index in sequence in Unmerge. + if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) + return false; + } else if (!AllowUndef || + MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() != + TargetOpcode::G_IMPLICIT_DEF) return false; } return true; @@ -944,8 +948,34 @@ // // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { - if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) + bool AllowUndefShuffle = + DstTy.isVector() && + LI.isLegal({TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, DstTy}}); + if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize, + /*AllowUndef=*/AllowUndefShuffle)) return false; + + // If any of the operands was an undef, generate an identity shuffle + // with undef elements in the correct elements. + if (AllowUndefShuffle) { + SmallVector Mask(NumMIElts); + bool AnyUndef = false; + for (unsigned i = 0; i < NumMIElts; i++) { + bool IsUndef = MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() == + TargetOpcode::G_IMPLICIT_DEF; + Mask[i] = IsUndef ? -1 : i; + AnyUndef |= IsUndef; + } + if (AnyUndef) { + MIB.setInstrAndDebugLoc(MI); + Register UndefReg = MIB.buildUndef(DstTy)->getOperand(0).getReg(); + MIB.buildShuffleVector(Dst, UnmergeSrc, UndefReg, Mask); + UpdatedDefs.push_back(Dst); + DeadInsts.push_back(&MI); + return true; + } + } + replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); DeadInsts.push_back(&MI); return true; @@ -965,7 +995,7 @@ (Elt0UnmergeIdx % NumMIElts == 0) && getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, - EltSize)) + EltSize, false)) return false; MIB.setInstrAndDebugLoc(MI); auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); @@ -998,7 +1028,8 @@ if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || (EltUnmergeIdx != 0)) return false; - if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) + if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize, + false)) return false; ConcatSources.push_back(UnmergeI->getSourceReg()); } Index: llvm/test/CodeGen/AArch64/fabs.ll =================================================================== --- llvm/test/CodeGen/AArch64/fabs.ll +++ llvm/test/CodeGen/AArch64/fabs.ll @@ -115,25 +115,10 @@ } define <3 x float> @fabs_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: fabs_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fabs v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fabs_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fabs v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fabs_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fabs v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) ret <3 x float> %c @@ -213,32 +198,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -249,33 +224,7 @@ ; ; CHECK-GI-FP16-LABEL: fabs_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: fabs v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.fabs.v7f16(<7 x half> %a) Index: llvm/test/CodeGen/AArch64/fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt.ll +++ llvm/test/CodeGen/AArch64/fcvt.ll @@ -115,25 +115,10 @@ } define <3 x float> @ceil_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: ceil_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintp v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ceil_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintp v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ceil_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a) ret <3 x float> %c @@ -213,32 +198,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -249,33 +224,7 @@ ; ; CHECK-GI-FP16-LABEL: ceil_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a) @@ -622,25 +571,10 @@ } define <3 x float> @floor_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: floor_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintm v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: floor_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintm v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: floor_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a) ret <3 x float> %c @@ -720,32 +654,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -756,33 +680,7 @@ ; ; CHECK-GI-FP16-LABEL: floor_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a) @@ -1129,25 +1027,10 @@ } define <3 x float> @nearbyint_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: nearbyint_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frinti v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: nearbyint_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frinti v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: nearbyint_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frinti v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1227,32 +1110,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -1263,33 +1136,7 @@ ; ; CHECK-GI-FP16-LABEL: nearbyint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a) @@ -1636,25 +1483,10 @@ } define <3 x float> @roundeven_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: roundeven_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintn v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: roundeven_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintn v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: roundeven_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1734,32 +1566,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -1770,33 +1592,7 @@ ; ; CHECK-GI-FP16-LABEL: roundeven_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a) @@ -2143,25 +1939,10 @@ } define <3 x float> @rint_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: rint_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintx v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: rint_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintx v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: rint_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -2241,32 +2022,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -2277,33 +2048,7 @@ ; ; CHECK-GI-FP16-LABEL: rint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a) @@ -2650,25 +2395,10 @@ } define <3 x float> @round_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: round_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frinta v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: round_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frinta v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: round_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.round.v3f32(<3 x float> %a) ret <3 x float> %c @@ -2748,32 +2478,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -2784,33 +2504,7 @@ ; ; CHECK-GI-FP16-LABEL: round_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.round.v7f16(<7 x half> %a) @@ -3157,25 +2851,10 @@ } define <3 x float> @trunc_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: trunc_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintz v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: trunc_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintz v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: trunc_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a) ret <3 x float> %c @@ -3255,32 +2934,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -3291,33 +2960,7 @@ ; ; CHECK-GI-FP16-LABEL: trunc_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a) Index: llvm/test/CodeGen/AArch64/fminimummaximum.ll =================================================================== --- llvm/test/CodeGen/AArch64/fminimummaximum.ll +++ llvm/test/CodeGen/AArch64/fminimummaximum.ll @@ -243,60 +243,20 @@ } define <3 x float> @min_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: min_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: min_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: min_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c } define <3 x float> @max_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: max_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: max_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: max_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c @@ -686,42 +646,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmin v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmin v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmin v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -733,46 +678,7 @@ ; ; CHECK-FP16-GI-LABEL: min_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmin v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.minimum.v7f16(<7 x half> %a, <7 x half> %b) @@ -849,42 +755,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmax v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmax v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmax v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -896,46 +787,7 @@ ; ; CHECK-FP16-GI-LABEL: max_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmax v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.maximum.v7f16(<7 x half> %a, <7 x half> %b) Index: llvm/test/CodeGen/AArch64/fminmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/fminmax.ll +++ llvm/test/CodeGen/AArch64/fminmax.ll @@ -243,60 +243,20 @@ } define <3 x float> @min_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: min_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: min_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: min_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c } define <3 x float> @max_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: max_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: max_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: max_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c @@ -686,42 +646,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fminnm v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fminnm v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fminnm v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -733,46 +678,7 @@ ; ; CHECK-FP16-GI-LABEL: min_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fminnm v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.minnum.v7f16(<7 x half> %a, <7 x half> %b) @@ -849,42 +755,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmaxnm v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmaxnm v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmaxnm v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -896,46 +787,7 @@ ; ; CHECK-FP16-GI-LABEL: max_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmaxnm v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.maxnum.v7f16(<7 x half> %a, <7 x half> %b) Index: llvm/test/CodeGen/AArch64/fpext.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpext.ll +++ llvm/test/CodeGen/AArch64/fpext.ll @@ -180,26 +180,10 @@ } define <3 x float> @fpext_v3f16_v3f32(<3 x half> %a) { -; CHECK-SD-LABEL: fpext_v3f16_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v3f16_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] -; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v3f16_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: ret entry: %c = fpext <3 x half> %a to <3 x float> ret <3 x float> %c Index: llvm/test/CodeGen/AArch64/fptrunc.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptrunc.ll +++ llvm/test/CodeGen/AArch64/fptrunc.ll @@ -192,26 +192,10 @@ } define <3 x half> @fptrunc_v3f32_v3f16(<3 x float> %a) { -; CHECK-SD-LABEL: fptrunc_v3f32_v3f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v3f32_v3f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v3f32_v3f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <3 x float> %a to <3 x half> ret <3 x half> %c Index: llvm/test/CodeGen/AArch64/fsqrt.ll =================================================================== --- llvm/test/CodeGen/AArch64/fsqrt.ll +++ llvm/test/CodeGen/AArch64/fsqrt.ll @@ -115,25 +115,10 @@ } define <3 x float> @sqrt_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: sqrt_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fsqrt v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqrt_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fsqrt v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqrt_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fsqrt v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.sqrt.v3f32(<3 x float> %a) ret <3 x float> %c @@ -212,32 +197,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fsqrt v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: fsqrt v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fsqrt v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -248,33 +223,7 @@ ; ; CHECK-GI-FP16-LABEL: sqrt_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: fsqrt v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.sqrt.v7f16(<7 x half> %a)