diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -275,9 +275,9 @@ // VSCR access. def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">, - Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>; + Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">, - Intrinsic<[], [llvm_v4i32_ty], []>; + Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; // Loads. These don't map directly to GCC builtins because they represent the @@ -747,10 +747,12 @@ // Saturating multiply-adds. def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, @@ -768,7 +770,7 @@ llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -780,7 +782,7 @@ llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; @@ -831,19 +833,19 @@ // Vector Sum Instructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // Vector Sign Extension Instructions def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, @@ -870,34 +872,34 @@ [IntrNoMem]>; def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuhum is lowered to a shuffle. def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuwum is lowered to a shuffle. def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkudum is lowered to a shuffle. def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // Unpacks. def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">, diff --git a/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll b/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sat-register-clobber.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: %s -o - -verify-machineinstrs -mcpu=pwr9 | FileCheck %s + +define <4 x i32> @test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %aa, <8 x i16>* %FromVSCR) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsumsws v5, v2, v3 +; CHECK-NEXT: xxlxor vs32, vs32, vs32 +; CHECK-NEXT: mtvscr v0 +; CHECK-NEXT: vadduwm v0, v3, v2 +; CHECK-NEXT: vpkswus v2, v2, v3 +; CHECK-NEXT: mfvscr v1 +; CHECK-NEXT: stxv vs33, 0(r9) +; CHECK-NEXT: vpkswus v3, v3, v4 +; CHECK-NEXT: vadduwm v4, v0, v5 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: vadduwm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vsumsws(<4 x i32> %a, <4 x i32> %b) + tail call void @llvm.ppc.altivec.mtvscr(<4 x i32> zeroinitializer) + %add = add <4 x i32> %b, %a + %1 = tail call <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32> %a, <4 x i32> %b) + %2 = bitcast <8 x i16> %1 to <4 x i32> + %3 = tail call <8 x i16> @llvm.ppc.altivec.mfvscr() + store <8 x i16> %3, <8 x i16>* %FromVSCR, align 16 + %4 = tail call <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32> %b, <4 x i32> %aa) + %5 = bitcast <8 x i16> %4 to <4 x i32> + %add1 = add <4 x i32> %add, %0 + %add2 = add <4 x i32> %add1, %2 + %add3 = add <4 x i32> %add2, %5 + ret <4 x i32> %add3 +} + +declare <4 x i32> @llvm.ppc.altivec.vsumsws(<4 x i32>, <4 x i32>) #1 + +declare void @llvm.ppc.altivec.mtvscr(<4 x i32>) #1 + +declare <8 x i16> @llvm.ppc.altivec.vpkswus(<4 x i32>, <4 x i32>) #1 + +declare <8 x i16> @llvm.ppc.altivec.mfvscr() #1 +