diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23720,10 +23720,6 @@ continue; } - // Profitability check: only deal with extractions from the first subvector. - if (OpSubvecIdx != 0) - return SDValue(); - const std::pair DemandedSubvector = std::make_pair(Op, OpSubvecIdx); @@ -23753,6 +23749,14 @@ if (DemandedSubvectors.empty()) return DAG.getUNDEF(NarrowVT); + // Profitability check: only deal with extractions from the first subvector + // unless the mask becomes an identity mask. + if (!ShuffleVectorInst::isIdentityMask(NewMask) || + any_of(NewMask, [](int M) { return M < 0; })) + for (auto &DemandedSubvector : DemandedSubvectors) + if (DemandedSubvector.second != 0) + return SDValue(); + // We still perform the exact same EXTRACT_SUBVECTOR, just on different // operand[s]/index[es], so there is no point in checking for it's legality. diff --git a/llvm/test/CodeGen/AArch64/add-extract.ll b/llvm/test/CodeGen/AArch64/add-extract.ll --- a/llvm/test/CodeGen/AArch64/add-extract.ll +++ b/llvm/test/CodeGen/AArch64/add-extract.ll @@ -83,9 +83,9 @@ define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: add_i64_ext_ext_test1: ; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: dup v1.2d, v1.d[1] -; CHECK-NEXT: add d0, d0, d1 +; CHECK-NEXT: add d0, d0, d2 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 @@ -99,9 +99,9 @@ define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: sub_i64_ext_ext_test1: ; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: sub d0, d0, d1 -; CHECK-NEXT: dup v1.2d, v1.d[1] -; CHECK-NEXT: sub d0, d0, d1 +; CHECK-NEXT: sub d0, d0, d2 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -262,3 +262,16 @@ %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } + +define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) { +; CHECK-LABEL: extract_shuffle: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #3 +; CHECK-NEXT: ret + %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> + %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + %c = zext <4 x i16> %b to <4 x i32> + %d = shl <4 x i32> %c, + ret <4 x i32> %d +} + diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -56,36 +56,36 @@ ; CHECK-NEXT: mov v1.b[5], w10 ; CHECK-NEXT: umov w10, v0.b[14] ; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: mov x8, #16 +; CHECK-NEXT: mov x8, #16 // =0x10 ; CHECK-NEXT: mov v1.b[6], w9 -; CHECK-NEXT: mov x9, #24 +; CHECK-NEXT: mov x9, #24 // =0x18 ; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: mov v2.b[6], w10 ; CHECK-NEXT: umov w10, v0.b[15] ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 ; CHECK-NEXT: ld1w { z5.s }, p0/z, [x0, x9, lsl #2] -; CHECK-NEXT: dup v3.2d, v0.d[1] +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: mov v1.b[7], w11 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: mov v2.b[7], w10 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 +; CHECK-NEXT: mov x11, #8 // =0x8 ; CHECK-NEXT: uunpklo z3.h, z3.b +; CHECK-NEXT: and z0.s, z0.s, #0x1 ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: mov x11, #8 -; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: lsl z3.s, z3.s, #31 -; CHECK-NEXT: asr z0.s, z0.s, #31 -; CHECK-NEXT: asr z3.s, z3.s, #31 ; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: asr z0.s, z3.s, #31 ; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: and z3.s, z3.s, #0x1 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x11, lsl #2] -; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0 -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2] +; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: lsl z2.s, z2.s, #31 ; CHECK-NEXT: asr z1.s, z1.s, #31 @@ -96,12 +96,12 @@ ; CHECK-NEXT: mov z5.s, p2/m, #0 // =0x0 ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 ; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 ; CHECK-NEXT: st1w { z4.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z5.s }, p0, [x0, x9, lsl #2] -; CHECK-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2] -; CHECK-NEXT: st1w { z3.s }, p0, [x0] +; CHECK-NEXT: st1w { z3.s }, p0, [x0, x11, lsl #2] +; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: .LBB1_2: // %exit ; CHECK-NEXT: ret %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer