diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -926,6 +926,14 @@ if (Subtarget.hasP9Altivec()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1410,6 +1410,12 @@ def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>; } +def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i8)), (v4i32 (VEXTSB2W $VRB))>; +def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i16)), (v4i32 (VEXTSH2W $VRB))>; +def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i8)), (v2i64 (VEXTSB2D $VRB))>; +def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i16)), (v2i64 (VEXTSH2D $VRB))>; +def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i32)), (v2i64 (VEXTSW2D $VRB))>; + // Vector Integer Negate def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", [(set v4i32:$vD, diff --git a/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll b/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-altivec < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9-NOALTIVEC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-P8 + +define <4 x i32> @test_vextsh2w(<4 x i32> %m) { +; CHECK-P9-LABEL: test_vextsh2w: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: vextsh2w 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_vextsh2w: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: extsh 6, 6 +; CHECK-P9-NOALTIVEC-NEXT: extsh 5, 5 +; CHECK-P9-NOALTIVEC-NEXT: extsh 4, 4 +; CHECK-P9-NOALTIVEC-NEXT: extsh 3, 3 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_vextsh2w: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: vspltisw 3, 8 +; CHECK-P8-NEXT: vadduwm 3, 3, 3 +; CHECK-P8-NEXT: vslw 2, 2, 3 +; CHECK-P8-NEXT: vsraw 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <4 x i32> %m, + %shr = ashr exact <4 x i32> %shl, + ret <4 x i32> %shr +} + +define <4 x i32> @test_vextsb2w(<4 x i32> %m) { +; CHECK-P9-LABEL: test_vextsb2w: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: vextsb2w 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_vextsb2w: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: extsb 6, 6 +; CHECK-P9-NOALTIVEC-NEXT: extsb 5, 5 +; CHECK-P9-NOALTIVEC-NEXT: extsb 4, 4 +; CHECK-P9-NOALTIVEC-NEXT: extsb 3, 3 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_vextsb2w: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: vspltisw 3, 12 +; CHECK-P8-NEXT: vadduwm 3, 3, 3 +; CHECK-P8-NEXT: vslw 2, 2, 3 +; CHECK-P8-NEXT: vsraw 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <4 x i32> %m, + %shr = ashr exact <4 x i32> %shl, + ret <4 x i32> %shr +} + +define <2 x i64> @test_vextsb2d(<2 x i64> %m) { +; CHECK-P9-LABEL: test_vextsb2d: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: vextsb2d 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_vextsb2d: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: extsb 3, 3 +; CHECK-P9-NOALTIVEC-NEXT: extsb 4, 4 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_vextsb2d: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 35, 0 +; CHECK-P8-NEXT: vsld 2, 2, 3 +; CHECK-P8-NEXT: vsrad 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <2 x i64> %m, + %shr = ashr exact <2 x i64> %shl, + ret <2 x i64> %shr +} + +define <2 x i64> @test_vextsh2d(<2 x i64> %m) { +; CHECK-P9-LABEL: test_vextsh2d: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: vextsh2d 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_vextsh2d: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: extsh 3, 3 +; CHECK-P9-NOALTIVEC-NEXT: extsh 4, 4 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_vextsh2d: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 35, 0 +; CHECK-P8-NEXT: vsld 2, 2, 3 +; CHECK-P8-NEXT: vsrad 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <2 x i64> %m, + %shr = ashr exact <2 x i64> %shl, + ret <2 x i64> %shr +} + +define <2 x i64> @test_vextsw2d(<2 x i64> %m) { +; CHECK-P9-LABEL: test_vextsw2d: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: vextsw2d 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_vextsw2d: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: extsw 3, 3 +; CHECK-P9-NOALTIVEC-NEXT: extsw 4, 4 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_vextsw2d: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; CHECK-P8-NEXT: addi 3, 3, .LCPI4_0@toc@l +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 35, 0 +; CHECK-P8-NEXT: vsld 2, 2, 3 +; CHECK-P8-NEXT: vsrad 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <2 x i64> %m, + %shr = ashr exact <2 x i64> %shl, + ret <2 x i64> %shr +} + +define <2 x i64> @test_none(<2 x i64> %m) { +; CHECK-P9-LABEL: test_none: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI5_0@toc@l +; CHECK-P9-NEXT: lxvx 35, 0, 3 +; CHECK-P9-NEXT: vsld 2, 2, 3 +; CHECK-P9-NEXT: vsrad 2, 2, 3 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOALTIVEC-LABEL: test_none: +; CHECK-P9-NOALTIVEC: # %bb.0: # %entry +; CHECK-P9-NOALTIVEC-NEXT: sldi 3, 3, 16 +; CHECK-P9-NOALTIVEC-NEXT: sldi 4, 4, 16 +; CHECK-P9-NOALTIVEC-NEXT: sradi 3, 3, 16 +; CHECK-P9-NOALTIVEC-NEXT: sradi 4, 4, 16 +; CHECK-P9-NOALTIVEC-NEXT: blr +; +; CHECK-P8-LABEL: test_none: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; CHECK-P8-NEXT: addi 3, 3, .LCPI5_0@toc@l +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 35, 0 +; CHECK-P8-NEXT: vsld 2, 2, 3 +; CHECK-P8-NEXT: vsrad 2, 2, 3 +; CHECK-P8-NEXT: blr +entry: + %shl = shl <2 x i64> %m, + %shr = ashr exact <2 x i64> %shl, + ret <2 x i64> %shr +}