diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13213,6 +13213,26 @@ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold (iM_signext_inreg + // (extract_subvector (zext|anyext|sext iN_v to _) _) + // from iN) + // -> (extract_subvector (signext iN_v to iM)) + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue InnerExt = N0.getOperand(0); + EVT InnerExtVT = InnerExt->getValueType(0); + SDValue Extendee = InnerExt->getOperand(0); + + if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) { + SDValue SignExtExtendee = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee, + N0.getOperand(1)); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll --- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll +++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll @@ -54,9 +54,8 @@ define <2 x i32> @sext_extract_zext_idx0(<4 x i16> %vec) nounwind { ; CHECK-LABEL: sext_extract_zext_idx0: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %zext = zext <4 x i16> %vec to <4 x i32> %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 0) @@ -65,6 +64,21 @@ ret <2 x i32> %sext_inreg } +; Negative test, combine should not fire if sign extension is for a different width. +define <2 x i32> @sext_extract_zext_idx0_negtest(<4 x i16> %vec) nounwind { +; CHECK-LABEL: sext_extract_zext_idx0_negtest: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.2s, v0.2s, #17 +; CHECK-NEXT: sshr v0.2s, v0.2s, #17 +; CHECK-NEXT: ret + %zext = zext <4 x i16> %vec to <4 x i32> + %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 0) + %sext_inreg_step0 = shl <2 x i32> %extract, + %sext_inreg = ashr <2 x i32> %sext_inreg_step0, + ret <2 x i32> %sext_inreg +} + define <4 x i16> @sext_extract_sext_idx0(<8 x i8> %vec) nounwind { ; CHECK-LABEL: sext_extract_sext_idx0: ; CHECK: // %bb.0: @@ -81,10 +95,9 @@ define <2 x i32> @sext_extract_zext_idx2(<4 x i16> %vec) nounwind { ; CHECK-LABEL: sext_extract_zext_idx2: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %zext = zext <4 x i16> %vec to <4 x i32> %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 2) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -575,9 +575,7 @@ ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: ldr q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -17,15 +17,11 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -538,9 +538,7 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: ldr q2, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -17,9 +17,7 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.h[0], w8 diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -326,26 +326,16 @@ define <8 x double> @sitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: sitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v2.2s, v1.2s, #16 -; CHECK-NEXT: shl v3.2s, v0.2s, #16 -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: sshr v3.2s, v3.2s, #16 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 ; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: sshll v1.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v3.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v1.2d, v3.2d +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v3.2d, v2.2d +; CHECK-NEXT: scvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %1