diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15036,6 +15036,37 @@ ExtractSubvector->getOperand(1)); } +static SDValue pushSignExtendThroughExtractSubvector(SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + + if (VT.isScalableVector()) + return SDValue(); + + if (LHS->getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + SDValue ExtractSubvector = LHS; + // if (!ExtractSubvector.getValueType().isFixedLengthVector()) + // return SDValue(); + if (!ExtractSubvector->hasOneUse()) + return SDValue(); + SDValue ExtractedVec = ExtractSubvector->getOperand(0); + if (ExtractedVec->getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + SDValue AnyExt = ExtractedVec; + EVT AnyExtVT = AnyExt->getValueType(0); + SDValue Extendee = AnyExt->getOperand(0); + + // (sign_extend_inreg (extract_subvector (anyext v) Idx)) + // => (extract_subvector (sign_extend v) Idx) + SDValue SignExtExtendee = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), AnyExtVT, Extendee); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee, + ExtractSubvector->getOperand(1)); +} + static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -19253,6 +19284,9 @@ return DAG.getNode(SOpc, DL, N->getValueType(0), Ext); } + if (SDValue R = pushSignExtendThroughExtractSubvector(N, DAG)) + return R; + if (DCI.isBeforeLegalizeOps()) return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -575,9 +575,7 @@ ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: ldr q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -17,15 +17,11 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -538,9 +538,7 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: ldr q2, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -17,9 +17,7 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.h[0], w8 diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -194,16 +194,11 @@ define <4 x double> @sitofp_v4i8_double(<4 x i8> %a) { ; CHECK-LABEL: sitofp_v4i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v1.2d, v0.4s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: scvtf v1.2d, v1.2d +; CHECK-NEXT: scvtf v0.2d, v0.2d ; CHECK-NEXT: ret %1 = sitofp <4 x i8> %a to <4 x double> ret <4 x double> %1 @@ -212,27 +207,17 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) { ; CHECK-LABEL: sitofp_v8i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v2.2s, v1.2s, #24 -; CHECK-NEXT: shl v3.2s, v0.2s, #24 -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: sshr v3.2s, v3.2s, #24 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 ; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: sshll v1.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v3.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v1.2d, v3.2d +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v3.2d, v2.2d +; CHECK-NEXT: scvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i8> %a to <8 x double> ret <8 x double> %1 @@ -241,48 +226,28 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v3.2s, v3.2s, #24 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v4.2s, v4.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-NEXT: shl v6.2s, v6.2s, #24 -; CHECK-NEXT: shl v7.2s, v7.2s, #24 -; CHECK-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-NEXT: sshr v3.2s, v3.2s, #24 -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-NEXT: sshr v4.2s, v4.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: sshr v5.2s, v5.2s, #24 -; CHECK-NEXT: sshr v6.2s, v6.2s, #24 -; CHECK-NEXT: sshr v7.2s, v7.2s, #24 +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v6.2d, v2.4s, #0 +; CHECK-NEXT: sshll2 v4.2d, v3.4s, #0 +; CHECK-NEXT: sshll2 v5.2d, v1.4s, #0 ; CHECK-NEXT: sshll v16.2d, v2.2s, #0 -; CHECK-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-NEXT: sshll2 v2.2d, v0.4s, #0 +; CHECK-NEXT: sshll v7.2d, v3.2s, #0 ; CHECK-NEXT: sshll v17.2d, v1.2s, #0 -; CHECK-NEXT: sshll v18.2d, v4.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v3.2d, v5.2s, #0 -; CHECK-NEXT: sshll v5.2d, v6.2s, #0 -; CHECK-NEXT: sshll v1.2d, v7.2s, #0 +; CHECK-NEXT: scvtf v1.2d, v2.2d ; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: scvtf v3.2d, v3.2d -; CHECK-NEXT: scvtf v4.2d, v17.2d +; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: scvtf v2.2d, v7.2d ; CHECK-NEXT: scvtf v5.2d, v5.2d +; CHECK-NEXT: scvtf v4.2d, v17.2d +; CHECK-NEXT: scvtf v7.2d, v6.2d ; CHECK-NEXT: scvtf v6.2d, v16.2d -; CHECK-NEXT: scvtf v7.2d, v18.2d ; CHECK-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 @@ -291,26 +256,16 @@ define <8 x double> @sitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: sitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v2.2s, v1.2s, #16 -; CHECK-NEXT: shl v3.2s, v0.2s, #16 -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: sshr v3.2s, v3.2s, #16 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 ; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: sshll v1.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v3.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v1.2d, v3.2d +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v3.2d, v2.2d +; CHECK-NEXT: scvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %1