diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -567,6 +567,12 @@ } if (Subtarget.hasAltivec()) { + for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); + } // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::fixedlen_vector_valuetypes()) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -869,6 +869,20 @@ def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>; +// Saturating adds/subtracts. +def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>; +def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>; +def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>; +def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>; +def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>; +def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>; +def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>; +def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>; +def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>; +def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>; +def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>; +def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>; + // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; diff --git a/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll b/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s +define dso_local <16 x i8 > @vectorsaddb(<16 x i8 > %a, <16 x i8 > %b) { +; CHECK-LABEL: vectorsaddb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddsbs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %call +} + +define dso_local <16 x i8 > @vectorssubb(<16 x i8 > %a, <16 x i8 > %b) { +; CHECK-LABEL: vectorssubb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubsbs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %call +} + +define dso_local <16 x i8 > @vectoruaddb(<16 x i8 > %a, <16 x i8 > %b) { +; CHECK-LABEL: vectoruaddb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddubs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %call +} + +define dso_local <16 x i8 > @vectorusubb(<16 x i8 > %a, <16 x i8 > %b) { +; CHECK-LABEL: vectorusubb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsububs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %call +} + +define dso_local <8 x i16 > @vectorsaddh(<8 x i16 > %a, <8 x i16 > %b) { +; CHECK-LABEL: vectorsaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddshs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %call +} + +define dso_local <8 x i16 > @vectorssubh(<8 x i16 > %a, <8 x i16 > %b) { +; CHECK-LABEL: vectorssubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubshs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %call +} + +define dso_local <8 x i16 > @vectoruaddh(<8 x i16 > %a, <8 x i16 > %b) { +; CHECK-LABEL: vectoruaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadduhs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %call +} + +define dso_local <8 x i16 > @vectorusubh(<8 x i16 > %a, <8 x i16 > %b) { +; CHECK-LABEL: vectorusubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubuhs v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %call +} + +define dso_local <4 x i32 > @vectorsaddw(<4 x i32 > %a, <4 x i32 > %b) { +; CHECK-LABEL: vectorsaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddsws v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %call +} + +define dso_local <4 x i32 > @vectorssubw(<4 x i32 > %a, <4 x i32 > %b) { +; CHECK-LABEL: vectorssubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubsws v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %call +} + +define dso_local <4 x i32 > @vectoruaddw(<4 x i32 > %a, <4 x i32 > %b) { +; CHECK-LABEL: vectoruaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadduws v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %call +} + +define dso_local <4 x i32 > @vectorusubw(<4 x i32 > %a, <4 x i32 > %b) { +; CHECK-LABEL: vectorusubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubuws v2, v2, v3 +; CHECK-NEXT: blr +entry: + %call = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %call +} + +declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)