Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -687,6 +687,7 @@ // These operations default to expand for vector types. if (VT.isVector()) { setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -180,7 +180,6 @@ setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -366,6 +365,13 @@ addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); + // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); + // Some truncating stores are legal too. setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); Index: llvm/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1535,6 +1535,10 @@ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + // Types natively supported: for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) { Index: llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -17,7 +17,10 @@ static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; - +static const MVT InRegV64[] = { MVT::v32i8, MVT::v32i16, MVT::v16i8, + MVT::v16i16, MVT::v16i32 }; +static const MVT InRegV128[] = { MVT::v64i8, MVT::v64i16, MVT::v32i8, + MVT::v32i16, MVT::v32i32 }; void HexagonTargetLowering::initializeHVXLowering() { @@ -58,6 +61,7 @@ bool Use64b = Subtarget.useHVX64BOps(); ArrayRef LegalV = Use64b ? LegalV64 : LegalV128; ArrayRef LegalW = Use64b ? LegalW64 : LegalW128; + ArrayRef InRegV = Use64b ? InRegV64 : InRegV128; MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; @@ -194,6 +198,9 @@ setOperationAction(ISD::XOR, BoolV, Legal); } + for (MVT T : InRegV) + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); + setTargetDAGCombine(ISD::VSELECT); } Index: llvm/test/CodeGen/ARM/signext-inreg.ll =================================================================== --- llvm/test/CodeGen/ARM/signext-inreg.ll +++ llvm/test/CodeGen/ARM/signext-inreg.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=armv8 -mattr=+mve | FileCheck %s +; RUN: llc < %s -mtriple=armv8 | FileCheck %s define <4 x i32> @test(<4 x i32> %m) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vshl.i32 q0, q0, #24 -; CHECK-NEXT: vshr.s32 q0, q0, #24 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vshl.i32 q8, q8, #24 +; CHECK-NEXT: vshr.s32 q8, q8, #24 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr entry: %shl = shl <4 x i32> %m, Index: llvm/test/CodeGen/Hexagon/signext-inreg.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Hexagon/signext-inreg.ll @@ -0,0 +1,272 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=hexagon | FileCheck %s +; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length64b | FileCheck %s --check-prefix=CHECK-64B +; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length128b | FileCheck %s --check-prefix=CHECK-128B +define <2 x i32> @test1(<2 x i32> %m) { +; CHECK-LABEL: test1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r1 = extract(r1,#8,#0) +; CHECK-NEXT: r0 = sxtb(r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +; +; CHECK-64B-LABEL: test1: +; CHECK-64B: .cfi_startproc +; CHECK-64B-NEXT: // %bb.0: // %entry +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: r1 = extract(r1,#8,#0) +; CHECK-64B-NEXT: r0 = sxtb(r0) +; CHECK-64B-NEXT: jumpr r31 +; CHECK-64B-NEXT: } +; +; CHECK-128B-LABEL: test1: +; CHECK-128B: .cfi_startproc +; CHECK-128B-NEXT: // %bb.0: // %entry +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: r1 = extract(r1,#8,#0) +; CHECK-128B-NEXT: r0 = sxtb(r0) +; CHECK-128B-NEXT: jumpr r31 +; CHECK-128B-NEXT: } +entry: + %shl = shl <2 x i32> %m, + %shr = ashr exact <2 x i32> %shl, + ret <2 x i32> %shr +} + +define <16 x i32> @test2(<16 x i32> %m) { +; CHECK-LABEL: test2: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r3 = extract(r3,#8,#0) +; CHECK-NEXT: r29 = add(r29,#-8) +; CHECK-NEXT: r2 = sxtb(r2) +; CHECK-NEXT: r4 = sxtb(r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = extract(r5,#8,#0) +; CHECK-NEXT: r13:12 = memd(r29+#48) +; CHECK-NEXT: memd(r29+#0) = r17:16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r13 = extract(r13,#8,#0) +; CHECK-NEXT: r12 = sxtb(r12) +; CHECK-NEXT: r15:14 = memd(r29+#40) +; CHECK-NEXT: r9:8 = memd(r29+#32) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r9 = extract(r9,#8,#0) +; CHECK-NEXT: r8 = sxtb(r8) +; CHECK-NEXT: r11:10 = memd(r29+#24) +; CHECK-NEXT: r7:6 = memd(r29+#16) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11 = extract(r11,#8,#0) +; CHECK-NEXT: r10 = sxtb(r10) +; CHECK-NEXT: r14 = sxtb(r14) +; CHECK-NEXT: r17:16 = memd(r29+#8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r15 = extract(r15,#8,#0) +; CHECK-NEXT: r17 = extract(r17,#8,#0) +; CHECK-NEXT: r16 = sxtb(r16) +; CHECK-NEXT: r6 = sxtb(r6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r7 = extract(r7,#8,#0) +; CHECK-NEXT: memd(r0+#56) = r13:12 +; CHECK-NEXT: memd(r0+#48) = r15:14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memd(r0+#40) = r9:8 +; CHECK-NEXT: memd(r0+#32) = r11:10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memd(r0+#24) = r7:6 +; CHECK-NEXT: memd(r0+#16) = r17:16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memd(r0+#8) = r5:4 +; CHECK-NEXT: memd(r0+#0) = r3:2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r29 = add(r29,#8) +; CHECK-NEXT: r17:16 = memd(r29+#0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } // 8-byte Folded Reload +; +; CHECK-64B-LABEL: test2: +; CHECK-64B: .cfi_startproc +; CHECK-64B-NEXT: // %bb.0: // %entry +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: r0 = #24 +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v0.w = vasl(v0.w,r0) +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v0.w = vasr(v0.w,r0) +; CHECK-64B-NEXT: jumpr r31 +; CHECK-64B-NEXT: } +; +; CHECK-128B-LABEL: test2: +; CHECK-128B: .cfi_startproc +; CHECK-128B-NEXT: // %bb.0: // %entry +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: r0 = #24 +; CHECK-128B-NEXT: } +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: v0.w = vasl(v0.w,r0) +; CHECK-128B-NEXT: } +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: v0.w = vasr(v0.w,r0) +; CHECK-128B-NEXT: jumpr r31 +; CHECK-128B-NEXT: } +entry: + %shl = shl <16 x i32> %m, + %shr = ashr exact <16 x i32> %shl, + ret <16 x i32> %shr +} + +define <64 x i16> @test3(<64 x i16> %m) { +; CHECK-LABEL: test3: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vaslh(r3:2,#8) +; CHECK-NEXT: r5:4 = vaslh(r5:4,#8) +; CHECK-NEXT: r9:8 = memd(r29+#96) +; CHECK-NEXT: r11:10 = memd(r29+#88) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r13:12 = vaslh(r9:8,#8) +; CHECK-NEXT: r11:10 = vaslh(r11:10,#8) +; CHECK-NEXT: r9:8 = memd(r29+#80) +; CHECK-NEXT: r7:6 = memd(r29+#104) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r15:14 = vaslh(r7:6,#8) +; CHECK-NEXT: r9:8 = vaslh(r9:8,#8) +; CHECK-NEXT: r7:6 = memd(r29+#72) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r15:14 = vasrh(r15:14,#8) +; CHECK-NEXT: r13:12 = vasrh(r13:12,#8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11:10 = vasrh(r11:10,#8) +; CHECK-NEXT: r9:8 = vasrh(r9:8,#8) +; CHECK-NEXT: r15:14 = memd(r29+#64) +; CHECK-NEXT: memd(r0+#120) = r15:14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r7:6 = vaslh(r7:6,#8) +; CHECK-NEXT: r15:14 = vaslh(r15:14,#8) +; CHECK-NEXT: r13:12 = memd(r29+#56) +; CHECK-NEXT: memd(r0+#112) = r13:12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r13:12 = vaslh(r13:12,#8) +; CHECK-NEXT: r7:6 = vasrh(r7:6,#8) +; CHECK-NEXT: r11:10 = memd(r29+#48) +; CHECK-NEXT: memd(r0+#104) = r11:10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11:10 = vaslh(r11:10,#8) +; CHECK-NEXT: r15:14 = vasrh(r15:14,#8) +; CHECK-NEXT: r9:8 = memd(r29+#40) +; CHECK-NEXT: memd(r0+#96) = r9:8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r9:8 = vaslh(r9:8,#8) +; CHECK-NEXT: r13:12 = vasrh(r13:12,#8) +; CHECK-NEXT: r7:6 = memd(r29+#32) +; CHECK-NEXT: memd(r0+#88) = r7:6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11:10 = vasrh(r11:10,#8) +; CHECK-NEXT: r9:8 = vasrh(r9:8,#8) +; CHECK-NEXT: r15:14 = memd(r29+#0) +; CHECK-NEXT: memd(r0+#80) = r15:14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r7:6 = vaslh(r7:6,#8) +; CHECK-NEXT: r15:14 = vaslh(r15:14,#8) +; CHECK-NEXT: r13:12 = memd(r29+#16) +; CHECK-NEXT: memd(r0+#72) = r13:12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r13:12 = vaslh(r13:12,#8) +; CHECK-NEXT: r7:6 = vasrh(r7:6,#8) +; CHECK-NEXT: r11:10 = memd(r29+#24) +; CHECK-NEXT: memd(r0+#64) = r11:10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11:10 = vaslh(r11:10,#8) +; CHECK-NEXT: r3:2 = vasrh(r3:2,#8) +; CHECK-NEXT: r9:8 = memd(r29+#8) +; CHECK-NEXT: memd(r0+#56) = r9:8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r9:8 = vaslh(r9:8,#8) +; CHECK-NEXT: r13:12 = vasrh(r13:12,#8) +; CHECK-NEXT: memd(r0+#48) = r7:6 +; CHECK-NEXT: memd(r0+#0) = r3:2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r11:10 = vasrh(r11:10,#8) +; CHECK-NEXT: r7:6 = vasrh(r15:14,#8) +; CHECK-NEXT: memd(r0+#32) = r13:12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r9:8 = vasrh(r9:8,#8) +; CHECK-NEXT: r5:4 = vasrh(r5:4,#8) +; CHECK-NEXT: memd(r0+#40) = r11:10 +; CHECK-NEXT: memd(r0+#16) = r7:6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: memd(r0+#24) = r9:8 +; CHECK-NEXT: memd(r0+#8) = r5:4 +; CHECK-NEXT: } +; +; CHECK-64B-LABEL: test3: +; CHECK-64B: .cfi_startproc +; CHECK-64B-NEXT: // %bb.0: // %entry +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: r0 = #8 +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v0.h = vasl(v0.h,r0) +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v1.h = vasl(v1.h,r0) +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v0.h = vasr(v0.h,r0) +; CHECK-64B-NEXT: } +; CHECK-64B-NEXT: { +; CHECK-64B-NEXT: v1.h = vasr(v1.h,r0) +; CHECK-64B-NEXT: jumpr r31 +; CHECK-64B-NEXT: } +; +; CHECK-128B-LABEL: test3: +; CHECK-128B: .cfi_startproc +; CHECK-128B-NEXT: // %bb.0: // %entry +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: r0 = #8 +; CHECK-128B-NEXT: } +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: v0.h = vasl(v0.h,r0) +; CHECK-128B-NEXT: } +; CHECK-128B-NEXT: { +; CHECK-128B-NEXT: v0.h = vasr(v0.h,r0) +; CHECK-128B-NEXT: jumpr r31 +; CHECK-128B-NEXT: } +entry: + %shl = shl <64 x i16> %m, + %shr = ashr exact <64 x i16> %shl, + ret <64 x i16> %shr +} Index: llvm/test/CodeGen/Thumb2/mve-sext.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-sext.ll +++ llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -1,6 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32(<4 x i32> %m) { +; CHECK-LABEL: sext_v4i32_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i32 q0, q0, #31 +; CHECK-NEXT: vshr.s32 q0, q0, #31 +; CHECK-NEXT: bx lr +entry: + %shl = shl <4 x i32> %m, + %shr = ashr exact <4 x i32> %shl, + ret <4 x i32> %shr +} + define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) { ; CHECK-LABEL: sext_v8i8_v8i16: ; CHECK: @ %bb.0: @ %entry @@ -332,13 +344,13 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { ; CHECK-LABEL: zext_v2i32_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI13_0 +; CHECK-NEXT: adr r0, .LCPI14_0 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI13_0: +; CHECK-NEXT: .LCPI14_0: ; CHECK-NEXT: .long 4294967295 @ 0xffffffff ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff