diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3742,9 +3742,17 @@ case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); - case ISD::SIGN_EXTEND_INREG: + case ISD::SIGN_EXTEND_INREG: { + // Only custom lower when ExtraVT has a legal byte based element type. + EVT ExtraVT = cast(Op.getOperand(1))->getVT(); + EVT ExtraEltVT = ExtraVT.getVectorElementType(); + if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) && + (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64)) + return SDValue(); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU); + } case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::LOAD: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll @@ -20,6 +20,48 @@ ; Don't use SVE when its registers are no bigger than NEON. ; NO_SVE-NOT: z{0-9} +; +; sext i1 -> i32 +; + +; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg +; type's element type is not byte based and thus cannot be lowered directly to +; an SVE instruction. +define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) #0 { +; CHECK-LABEL: sext_v8i1_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b +; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h +; CHECK-NEXT: lsl [[A_WORDS]].s, [[PG]]/m, [[A_WORDS]].s, #31 +; CHECK-NEXT: asr [[A_WORDS]].s, [[PG]]/m, [[A_WORDS]].s, #31 +; CHECK-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %b = sext <8 x i1> %a to <8 x i32> + store <8 x i32> %b, <8 x i32>* %out + ret void +} + +; +; sext i3 -> i64 +; + +; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg +; type's element type is not power-of-2 based and thus cannot be lowered +; directly to an SVE instruction. +define void @sext_v4i3_v4i64(<4 x i3> %a, <4 x i64>* %out) #0 { +; CHECK-LABEL: sext_v4i3_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h +; CHECK-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s +; CHECK-NEXT: lsl [[A_DWORDS]].d, [[PG]]/m, [[A_DWORDS]].d, #61 +; CHECK-NEXT: asr [[A_DWORDS]].d, [[PG]]/m, [[A_DWORDS]].d, #61 +; CHECK-NEXT: st1d { [[A_WORDS]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %b = sext <4 x i3> %a to <4 x i64> + store <4 x i64> %b, <4 x i64>* %out + ret void +} + ; ; sext i8 -> i16 ;