diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -285,9 +285,9 @@ // VSCR access. def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">, - Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>; + Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">, - Intrinsic<[], [llvm_v4i32_ty], []>; + Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; // Loads. These don't map directly to GCC builtins because they represent the @@ -757,10 +757,12 @@ // Saturating multiply-adds. def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, @@ -778,7 +780,7 @@ llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -790,7 +792,7 @@ llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; @@ -841,19 +843,19 @@ // Vector Sum Instructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // Vector Sign Extension Instructions def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, @@ -880,34 +882,34 @@ [IntrNoMem]>; def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuhum is lowered to a shuffle. def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuwum is lowered to a shuffle. def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkudum is lowered to a shuffle. def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // Unpacks. def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">, diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -404,12 +404,14 @@ Deprecated; } -def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), - "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; -def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), - "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; +let hasSideEffects = 1 in { + def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), + "mfvscr $vD", IIC_LdStStore, + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), + "mtvscr $vB", IIC_LdStLoad, + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; +} let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), @@ -469,10 +471,11 @@ "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, (fneg v4f32:$vB))))]>; - -def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; -def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, - v8i16>; +let hasSideEffects = 1 in { + def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; + def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +} def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; } // isCommutable @@ -608,14 +611,16 @@ v4i32, v16i8, v4i32>; def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm, v4i32, v8i16, v4i32>; -def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, - v4i32, v8i16, v4i32>; def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, v4i32, v16i8, v4i32>; def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, v4i32, v8i16, v4i32>; -def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, - v4i32, v8i16, v4i32>; +let hasSideEffects = 1 in { + def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; + def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; +} let isCommutable = 1 in { def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, @@ -665,15 +670,17 @@ def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; -def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; -def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; +let hasSideEffects = 1 in { + def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; + def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; -def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, - v4i32, v16i8, v4i32>; -def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, - v4i32, v8i16, v4i32>; -def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, - v4i32, v16i8, v4i32>; + def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; + def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; + def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; +} def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vnor $vD, $vA, $vB", IIC_VecFP, @@ -742,26 +749,28 @@ // Vector Pack. def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, v8i16, v4i32>; -def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, - v16i8, v8i16>; -def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, - v16i8, v8i16>; -def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, - v8i16, v4i32>; -def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, - v8i16, v4i32>; +let hasSideEffects = 1 in { + def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; + def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; + def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v8i16, v4i32>; + def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; + def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; + def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; +} def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkuhum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; -def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, - v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkuwum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; -def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, - v8i16, v4i32>; // Vector Unpack. def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx, @@ -1322,16 +1331,18 @@ int_ppc_altivec_crypto_vpermxor, v16i8>; // Vector doubleword integer pack and unpack. -def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, - v4i32, v2i64>; -def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, - v4i32, v2i64>; +let hasSideEffects = 1 in { + def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, + v4i32, v2i64>; + def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, + v4i32, v2i64>; + def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, + v4i32, v2i64>; +} def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkudum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>; -def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, - v4i32, v2i64>; def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw, v2i64, v4i32>; def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw, diff --git a/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll b/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: %s -o - -verify-machineinstrs -mcpu=pwr9 | FileCheck %s + +define <4 x i32> @test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %aa, <8 x i16>* %FromVSCR) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsumsws v5, v2, v3 +; CHECK-NEXT: xxlxor vs32, vs32, vs32 +; CHECK-NEXT: mtvscr v0 +; CHECK-NEXT: vadduwm v0, v3, v2 +; CHECK-NEXT: vpkswus v2, v2, v3 +; CHECK-NEXT: mfvscr v1 +; CHECK-NEXT: stxv vs33, 0(r9) +; CHECK-NEXT: vpkswus v3, v3, v4 +; CHECK-NEXT: vadduwm v4, v0, v5 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: vadduwm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vsumsws(<4 x i32> %a, <4 x i32> %b) + tail call void @llvm.ppc.altivec.mtvscr(<4 x i32> zeroinitializer) + %add = add <4 x i32> %b, %a + %1 = tail call <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32> %a, <4 x i32> %b) + %2 = bitcast <8 x i16> %1 to <4 x i32> + %3 = tail call <8 x i16> @llvm.ppc.altivec.mfvscr() + store <8 x i16> %3, <8 x i16>* %FromVSCR, align 16 + %4 = tail call <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32> %b, <4 x i32> %aa) + %5 = bitcast <8 x i16> %4 to <4 x i32> + %add1 = add <4 x i32> %add, %0 + %add2 = add <4 x i32> %add1, %2 + %add3 = add <4 x i32> %add2, %5 + ret <4 x i32> %add3 +} + +declare <4 x i32> @llvm.ppc.altivec.vsumsws(<4 x i32>, <4 x i32>) #1 + +declare void @llvm.ppc.altivec.mtvscr(<4 x i32>) #1 + +declare <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32>, <4 x i32>) #1 + +declare <8 x i16> @llvm.ppc.altivec.mfvscr() #1 +