Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -900,17 +900,21 @@ // same sequence. Search for elements using findValueFromDefImpl. bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, GUnmerge *Unmerge, unsigned UnmergeIdxStart, - unsigned NumElts, unsigned EltSize) { + unsigned NumElts, unsigned EltSize, + bool AllowUndef) { assert(MergeStartIdx + NumElts <= MI.getNumSources()); for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { unsigned EltUnmergeIdx; GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( MI.getSourceReg(i), EltSize, EltUnmergeIdx); // Check if source i comes from the same Unmerge. - if (!EltUnmerge || EltUnmerge != Unmerge) - return false; - // Check that source i's def has same index in sequence in Unmerge. - if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) + if (EltUnmerge == Unmerge) { + // Check that source i's def has same index in sequence in Unmerge. + if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) + return false; + } else if (!AllowUndef || + MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() != + TargetOpcode::G_IMPLICIT_DEF) return false; } return true; @@ -944,8 +948,10 @@ // // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { - if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) + if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize, + /*AllowUndef=*/DstTy.isVector())) return false; + replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); DeadInsts.push_back(&MI); return true; @@ -965,7 +971,7 @@ (Elt0UnmergeIdx % NumMIElts == 0) && getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, - EltSize)) + EltSize, false)) return false; MIB.setInstrAndDebugLoc(MI); auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); @@ -998,7 +1004,8 @@ if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || (EltUnmergeIdx != 0)) return false; - if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) + if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize, + false)) return false; ConcatSources.push_back(UnmergeI->getSourceReg()); } Index: llvm/test/CodeGen/AArch64/fabs.ll =================================================================== --- llvm/test/CodeGen/AArch64/fabs.ll +++ llvm/test/CodeGen/AArch64/fabs.ll @@ -115,25 +115,10 @@ } define <3 x float> @fabs_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: fabs_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fabs v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fabs_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fabs v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fabs_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fabs v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) ret <3 x float> %c @@ -213,32 +198,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -249,33 +224,7 @@ ; ; CHECK-GI-FP16-LABEL: fabs_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: fabs v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.fabs.v7f16(<7 x half> %a) Index: llvm/test/CodeGen/AArch64/fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt.ll +++ llvm/test/CodeGen/AArch64/fcvt.ll @@ -115,25 +115,10 @@ } define <3 x float> @ceil_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: ceil_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintp v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ceil_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintp v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ceil_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a) ret <3 x float> %c @@ -213,32 +198,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -249,33 +224,7 @@ ; ; CHECK-GI-FP16-LABEL: ceil_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a) @@ -622,25 +571,10 @@ } define <3 x float> @floor_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: floor_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintm v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: floor_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintm v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: floor_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a) ret <3 x float> %c @@ -720,32 +654,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -756,33 +680,7 @@ ; ; CHECK-GI-FP16-LABEL: floor_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a) @@ -1129,25 +1027,10 @@ } define <3 x float> @nearbyint_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: nearbyint_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frinti v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: nearbyint_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frinti v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: nearbyint_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frinti v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1227,32 +1110,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -1263,33 +1136,7 @@ ; ; CHECK-GI-FP16-LABEL: nearbyint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a) @@ -1636,25 +1483,10 @@ } define <3 x float> @roundeven_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: roundeven_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintn v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: roundeven_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintn v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: roundeven_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1734,32 +1566,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -1770,33 +1592,7 @@ ; ; CHECK-GI-FP16-LABEL: roundeven_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a) @@ -2143,25 +1939,10 @@ } define <3 x float> @rint_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: rint_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintx v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: rint_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintx v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: rint_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -2241,32 +2022,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -2277,33 +2048,7 @@ ; ; CHECK-GI-FP16-LABEL: rint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a) @@ -2650,25 +2395,10 @@ } define <3 x float> @round_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: round_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frinta v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: round_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frinta v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: round_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.round.v3f32(<3 x float> %a) ret <3 x float> %c @@ -2748,32 +2478,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -2784,33 +2504,7 @@ ; ; CHECK-GI-FP16-LABEL: round_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.round.v7f16(<7 x half> %a) @@ -3157,25 +2851,10 @@ } define <3 x float> @trunc_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: trunc_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: frintz v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: trunc_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: frintz v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: trunc_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a) ret <3 x float> %c @@ -3255,32 +2934,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -3291,33 +2960,7 @@ ; ; CHECK-GI-FP16-LABEL: trunc_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a) Index: llvm/test/CodeGen/AArch64/fminimummaximum.ll =================================================================== --- llvm/test/CodeGen/AArch64/fminimummaximum.ll +++ llvm/test/CodeGen/AArch64/fminimummaximum.ll @@ -243,60 +243,20 @@ } define <3 x float> @min_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: min_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: min_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: min_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c } define <3 x float> @max_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: max_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: max_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: max_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c @@ -686,42 +646,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmin v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmin v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmin v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -733,46 +678,7 @@ ; ; CHECK-FP16-GI-LABEL: min_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmin v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.minimum.v7f16(<7 x half> %a, <7 x half> %b) @@ -849,42 +755,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmax v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmax v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmax v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -896,46 +787,7 @@ ; ; CHECK-FP16-GI-LABEL: max_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmax v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.maximum.v7f16(<7 x half> %a, <7 x half> %b) Index: llvm/test/CodeGen/AArch64/fminmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/fminmax.ll +++ llvm/test/CodeGen/AArch64/fminmax.ll @@ -243,60 +243,20 @@ } define <3 x float> @min_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: min_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: min_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: min_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c } define <3 x float> @max_v3f32(<3 x float> %a, <3 x float> %b) { -; CHECK-SD-LABEL: max_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: max_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: mov s4, v0.s[2] -; CHECK-GI-NEXT: mov s5, v1.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v4.s[0] -; CHECK-GI-NEXT: mov v1.s[2], v5.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: max_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) ret <3 x float> %c @@ -686,42 +646,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fminnm v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fminnm v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fminnm v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -733,46 +678,7 @@ ; ; CHECK-FP16-GI-LABEL: min_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fminnm v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.minnum.v7f16(<7 x half> %a, <7 x half> %b) @@ -849,42 +755,27 @@ ; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[5] ; CHECK-NOFP16-GI-NEXT: mov h4, v1.h[4] ; CHECK-NOFP16-GI-NEXT: mov h5, v1.h[5] -; CHECK-NOFP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-NOFP16-GI-NEXT: mov h7, v1.h[6] -; CHECK-NOFP16-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v6.4s, v0.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NOFP16-GI-NEXT: mov h0, v0.h[6] +; CHECK-NOFP16-GI-NEXT: mov h1, v1.h[6] ; CHECK-NOFP16-GI-NEXT: mov v2.h[1], v3.h[0] ; CHECK-NOFP16-GI-NEXT: mov v4.h[1], v5.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v6.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v7.h[0] +; CHECK-NOFP16-GI-NEXT: fmaxnm v3.4s, v6.4s, v7.4s +; CHECK-NOFP16-GI-NEXT: mov v2.h[2], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov v4.h[2], v1.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v3.4s ; CHECK-NOFP16-GI-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NOFP16-GI-NEXT: fcvtl v3.4s, v4.4h -; CHECK-NOFP16-GI-NEXT: mov s4, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s6, v2.s[2] -; CHECK-NOFP16-GI-NEXT: mov s5, v3.s[1] -; CHECK-NOFP16-GI-NEXT: mov s7, v3.s[2] -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v4.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[1], v5.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v6.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[2], v7.s[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v3.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NOFP16-GI-NEXT: fmaxnm v2.4s, v2.4s, v3.4s -; CHECK-NOFP16-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NOFP16-GI-NEXT: mov s1, v2.s[1] -; CHECK-NOFP16-GI-NEXT: mov s3, v2.s[2] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NOFP16-GI-NEXT: mov v2.s[1], v1.s[0] ; CHECK-NOFP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-NOFP16-GI-NEXT: mov h4, v0.h[2] +; CHECK-NOFP16-GI-NEXT: mov v4.h[3], v0.h[0] +; CHECK-NOFP16-GI-NEXT: mov h3, v0.h[2] ; CHECK-NOFP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-NOFP16-GI-NEXT: mov v2.s[2], v3.s[0] +; CHECK-NOFP16-GI-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NOFP16-GI-NEXT: fcvtl v4.4s, v4.4h ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NOFP16-GI-NEXT: mov v2.s[3], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v4.h[0] -; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v2.4s +; CHECK-NOFP16-GI-NEXT: fmaxnm v1.4s, v2.4s, v4.4s +; CHECK-NOFP16-GI-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NOFP16-GI-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NOFP16-GI-NEXT: mov v0.h[3], v5.h[0] ; CHECK-NOFP16-GI-NEXT: mov h2, v1.h[1] ; CHECK-NOFP16-GI-NEXT: mov v0.h[4], v1.h[0] @@ -896,46 +787,7 @@ ; ; CHECK-FP16-GI-LABEL: max_v7f16: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h16, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[2] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[6] -; CHECK-FP16-GI-NEXT: mov v1.h[1], v16.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[2], v17.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[3], v18.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[4], v19.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[5], v20.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v7.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[6], v21.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] -; CHECK-FP16-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: fmaxnm v0.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[5], v5.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[6], v6.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v0.h[0] ; CHECK-FP16-GI-NEXT: ret entry: %c = call <7 x half> @llvm.maxnum.v7f16(<7 x half> %a, <7 x half> %b) Index: llvm/test/CodeGen/AArch64/fpext.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpext.ll +++ llvm/test/CodeGen/AArch64/fpext.ll @@ -180,26 +180,10 @@ } define <3 x float> @fpext_v3f16_v3f32(<3 x half> %a) { -; CHECK-SD-LABEL: fpext_v3f16_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v3f16_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] -; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v3f16_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: ret entry: %c = fpext <3 x half> %a to <3 x float> ret <3 x float> %c Index: llvm/test/CodeGen/AArch64/fptrunc.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptrunc.ll +++ llvm/test/CodeGen/AArch64/fptrunc.ll @@ -192,26 +192,10 @@ } define <3 x half> @fptrunc_v3f32_v3f16(<3 x float> %a) { -; CHECK-SD-LABEL: fptrunc_v3f32_v3f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v3f32_v3f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v3f32_v3f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <3 x float> %a to <3 x half> ret <3 x half> %c Index: llvm/test/CodeGen/AArch64/fsqrt.ll =================================================================== --- llvm/test/CodeGen/AArch64/fsqrt.ll +++ llvm/test/CodeGen/AArch64/fsqrt.ll @@ -115,25 +115,10 @@ } define <3 x float> @sqrt_v3f32(<3 x float> %a) { -; CHECK-SD-LABEL: sqrt_v3f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fsqrt v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqrt_v3f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: fsqrt v0.4s, v0.4s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqrt_v3f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fsqrt v0.4s, v0.4s +; CHECK-NEXT: ret entry: %c = call <3 x float> @llvm.sqrt.v3f32(<3 x float> %a) ret <3 x float> %c @@ -212,32 +197,22 @@ ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] ; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: fsqrt v2.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0] ; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s ; CHECK-GI-NOFP16-NEXT: fsqrt v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fsqrt v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] -; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] -; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0] ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] @@ -248,33 +223,7 @@ ; ; CHECK-GI-FP16-LABEL: sqrt_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: fsqrt v0.8h, v0.8h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.sqrt.v7f16(<7 x half> %a) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -692,16 +692,15 @@ ; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff ; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff ; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] ; GFX6-NEXT: s_lshr_b32 s2, s0, 16 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff @@ -746,14 +745,13 @@ ; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff ; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] @@ -797,31 +795,29 @@ ; GFX6-LABEL: s_andn2_v3i16_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff -; GFX6-NEXT: s_mov_b32 s0, s2 -; GFX6-NEXT: s_mov_b32 s1, s3 -; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff -; GFX6-NEXT: s_xor_b64 s[2:3], s[6:7], s[2:3] -; GFX6-NEXT: s_and_b32 s0, s0, 0xffff +; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] +; GFX6-NEXT: s_and_b32 s1, s3, 0xffff +; GFX6-NEXT: s_and_b32 s0, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s4, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GFX6-NEXT: s_lshr_b32 s4, s0, 16 -; GFX6-NEXT: s_lshr_b32 s5, s2, 16 +; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_lshr_b32 s5, s6, 16 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff -; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s4 -; GFX6-NEXT: s_and_b32 s2, s2, 0xffff -; GFX6-NEXT: s_lshl_b32 s4, s5, 16 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s2, s6, 0xffff +; GFX6-NEXT: s_lshl_b32 s3, s5, 16 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff -; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_and_b32 s3, s7, 0xffff ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_v3i16_multi_use: @@ -875,16 +871,15 @@ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_and_b32_e32 v2, v1, v2 +; GFX6-NEXT: v_and_b32_e32 v2, v1, v4 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -609,11 +609,9 @@ ; GFX8-LABEL: v_bswap_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX8-NEXT: s_mov_b32 s4, 0x2030001 -; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_bswap_v3i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -145,8 +145,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000 -; GFX8-NEXT: v_xor_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 @@ -176,8 +175,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000 -; GFX8-NEXT: v_xor_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -4390,7 +4390,6 @@ ; GFX8-NEXT: s_lshr_b32 s4, s6, s4 ; GFX8-NEXT: s_or_b32 s2, s2, s4 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s3 -; GFX8-NEXT: s_and_b32 s5, s5, 0xffff ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 ; GFX8-NEXT: s_lshr_b32 s4, s4, 15 ; GFX8-NEXT: s_or_b32 s1, s1, s4 @@ -4640,8 +4639,7 @@ ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_mov_b32_e32 v4, -1 -; GFX8-NEXT: v_xor_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v4 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -451,42 +451,30 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -535,70 +523,60 @@ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND7]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND5]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] - ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -22,6 +22,7 @@ ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) ; SI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) + ; ; VI-LABEL: name: test_fshl_s32_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -35,6 +36,7 @@ ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) ; VI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) + ; ; GFX9-LABEL: name: test_fshl_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -82,6 +84,7 @@ ; SI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fshl_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -103,6 +106,7 @@ ; VI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fshl_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -163,6 +167,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fshl_s16_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -184,6 +189,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fshl_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -274,6 +280,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_fshl_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -316,6 +323,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fshl_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -368,6 +376,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_fshl_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -387,6 +396,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_fshl_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -442,6 +452,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_fshl_s8_s8 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -453,24 +464,25 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C3]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[AND6]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_fshl_s8_s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -482,20 +494,20 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C3]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[AND6]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C3]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -559,6 +571,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_fshl_s24_s24 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -601,6 +614,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_fshl_s24_s24 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -750,6 +764,7 @@ ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; VI-NEXT: {{ $}} @@ -826,6 +841,7 @@ ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -835,55 +851,45 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR6]] - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR7]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR6]] - ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[AND2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR1]], [[BUILD_VECTOR8]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY3]], [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR10]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR9]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -1009,6 +1015,7 @@ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fshl_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1086,6 +1093,7 @@ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fshl_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -19,6 +19,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) ; SI-NEXT: $vgpr0 = COPY [[FSHR]](s32) + ; ; VI-LABEL: name: test_fshr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) ; VI-NEXT: $vgpr0 = COPY [[FSHR]](s32) + ; ; GFX9-LABEL: name: test_fshr_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -61,6 +63,7 @@ ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fshr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -74,6 +77,7 @@ ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fshr_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -125,6 +129,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fshr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -146,6 +151,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fshr_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -279,6 +285,7 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -347,6 +354,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -399,6 +407,7 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_fshr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -418,6 +427,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_fshr_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -472,6 +482,7 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_fshr_s8_s8 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -485,21 +496,22 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C4]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND3]](s16) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_fshr_s8_s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -586,6 +598,7 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_fshr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -627,6 +640,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_fshr_s24_s24 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -701,150 +715,145 @@ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C6]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[ZEXT1]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[ZEXT3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL5]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C5]] - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND10]](s16) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C4]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C5]] - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY12]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[ZEXT7]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY12]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[COPY13]](s32) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND18]](s16) - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND20]], [[ZEXT9]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL10]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C4]] - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C5]] - ; SI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[SHL9]], [[C1]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND22]](s16) - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C1]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND24]], [[ZEXT11]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL9]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]] + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT11]](s32) ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL12]] - ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL13]] - ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C1]] - ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL14]] - ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST12]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST13]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST14]](<2 x s16>) + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C5]] + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; VI-NEXT: {{ $}} @@ -867,101 +876,96 @@ ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND2]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND3]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND4]](s16) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL5]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C5]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND5]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C3]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND6]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR8]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C4]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C5]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[OR2]], [[AND7]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[SHL4]], [[C3]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[LSHR9]], [[AND8]](s16) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR10]] - ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND9]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND10]](s16) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR12]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL10]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] - ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND11]](s16) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[SHL9]], [[C3]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[LSHR13]], [[AND12]](s16) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR14]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C1]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR8]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C1]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[LSHR9]], [[AND7]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR10]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND8]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND9]](s16) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR12]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL9]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C2]] + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C3]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[OR5]], [[AND10]](s16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[SHL8]], [[C1]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[LSHR13]], [[AND11]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR14]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL12]] - ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL13]] - ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C1]] - ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL14]] - ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST12]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST13]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST14]](<2 x s16>) + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL11]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C4]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL13]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -971,55 +975,45 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[AND]](<2 x s16>) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR6]] - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR7]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR6]] - ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR8]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY5]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BUILD_VECTOR4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY1]], [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY3]], [[AND2]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR1]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR10]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR9]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -1230,6 +1224,7 @@ ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fshr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1359,6 +1354,7 @@ ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fshr_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -23,6 +23,7 @@ ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -75,6 +76,7 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -132,6 +134,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -199,6 +202,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -247,6 +251,7 @@ ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -306,6 +311,7 @@ ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -372,6 +378,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -393,17 +400,9 @@ ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -448,6 +447,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -490,6 +490,7 @@ ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -530,6 +531,7 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -563,6 +565,7 @@ ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -609,6 +612,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -668,6 +672,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -717,6 +722,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -774,6 +780,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -832,6 +839,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -887,6 +895,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -924,6 +933,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -962,6 +972,7 @@ ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1021,6 +1032,7 @@ ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1081,6 +1093,7 @@ ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1146,6 +1159,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1215,6 +1229,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1284,6 +1299,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1356,6 +1372,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1423,6 +1440,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1488,6 +1506,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 @@ -1553,6 +1572,7 @@ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -514,10 +514,10 @@ ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -546,10 +546,10 @@ ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -578,11 +578,11 @@ ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[UV4]](s32) + ; GCN-NEXT: $vgpr2 = COPY [[UV5]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -677,12 +677,12 @@ ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = COPY [[UV4]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[UV5]](s32) + ; GCN-NEXT: $vgpr2 = COPY [[UV6]](s32) + ; GCN-NEXT: $vgpr3 = COPY [[UV7]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -26,6 +26,7 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 + ; ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -46,6 +47,7 @@ ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 + ; ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -66,6 +68,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -114,6 +117,7 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 + ; ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -136,6 +140,7 @@ ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 + ; ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -155,6 +160,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 @@ -204,6 +210,7 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 + ; ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -243,6 +250,7 @@ ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 + ; ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -260,20 +268,11 @@ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -291,18 +290,8 @@ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -336,6 +325,7 @@ ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) ; UNPACKED-NEXT: S_ENDPGM 0 + ; ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -361,6 +351,7 @@ ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) ; GFX81-NEXT: S_ENDPGM 0 + ; ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -382,6 +373,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 8) ; GFX9-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -454,42 +454,30 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -538,70 +526,60 @@ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR7]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR5]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] - ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -399,42 +399,30 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -161,11 +161,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -47,6 +47,7 @@ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0 + ; ; GFX9-LABEL: name: and_v2i16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -104,27 +105,18 @@ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-LABEL: name: add_v3i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY1]], [[COPY3]] ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[ADD1]](<2 x s16>) ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %3:_(<2 x s16>) = COPY $vgpr0 %4:_(<2 x s16>) = COPY $vgpr1 @@ -188,6 +180,7 @@ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-LABEL: name: shl_v3i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -283,6 +276,7 @@ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-LABEL: name: fma_v4f16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -383,37 +377,28 @@ ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-LABEL: name: maxnum_v5i16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY3]] ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY4]] ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY5]] ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[FMAXNUM_IEEE2]](<2 x s16>) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %2:_(<2 x s16>) = COPY $vgpr0 %3:_(<2 x s16>) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -454,42 +454,30 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -537,70 +525,60 @@ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] - ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] - ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -691,16 +691,15 @@ ; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff ; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] ; GFX6-NEXT: s_lshr_b32 s2, s0, 16 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff @@ -745,14 +744,13 @@ ; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff ; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] @@ -796,31 +794,29 @@ ; GFX6-LABEL: s_orn2_v3i16_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff -; GFX6-NEXT: s_mov_b32 s0, s2 -; GFX6-NEXT: s_mov_b32 s1, s3 -; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: s_mov_b32 s0, -1 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_mov_b32 s1, 0xffff ; GFX6-NEXT: s_or_b32 s6, s5, s6 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s1, s1, 0xffff -; GFX6-NEXT: s_xor_b64 s[2:3], s[6:7], s[2:3] -; GFX6-NEXT: s_and_b32 s0, s0, 0xffff +; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] +; GFX6-NEXT: s_and_b32 s1, s3, 0xffff +; GFX6-NEXT: s_and_b32 s0, s2, 0xffff ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s4, 0xffff -; GFX6-NEXT: s_and_b32 s3, s3, 0xffff -; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] -; GFX6-NEXT: s_lshr_b32 s4, s0, 16 -; GFX6-NEXT: s_lshr_b32 s5, s2, 16 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] +; GFX6-NEXT: s_lshr_b32 s2, s0, 16 +; GFX6-NEXT: s_lshr_b32 s5, s6, 16 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff -; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s4 -; GFX6-NEXT: s_and_b32 s2, s2, 0xffff -; GFX6-NEXT: s_lshl_b32 s4, s5, 16 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s2, s6, 0xffff +; GFX6-NEXT: s_lshl_b32 s3, s5, 16 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff -; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_and_b32 s3, s7, 0xffff ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_v3i16_multi_use: @@ -874,16 +870,15 @@ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_or_b32_e32 v2, v1, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v1, v4 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ;