diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2554,16 +2554,21 @@ (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; } -let AddedComplexity = 400, Predicates = [IsISA3_1] in { - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src), - (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src), - (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src), - (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src), - (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; -} +let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { + // Store element 0 of a VSX register to memory + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>; + } class xxevalPattern imm> : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-O0 +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-O0 ; These test cases aims to test the builtins for the Power10 VSX vector ; instructions introduced in ISA 3.1. @@ -22,14 +22,6 @@ ; CHECK-NEXT: srwi r3, r3, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: srwi r3, r3, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1) ret i32 %0 @@ -43,24 +35,22 @@ ; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: rlwinm r3, r3, 3, 31, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0) ret i32 %0 } define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sc: ; CHECK-O0: # %bb.0: # %entry @@ -79,10 +69,16 @@ } define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_uc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_uc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_uc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_uc: ; CHECK-O0: # %bb.0: # %entry @@ -101,11 +97,18 @@ } define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ss: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ss: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ss: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ss: ; CHECK-O0: # %bb.0: # %entry @@ -125,11 +128,18 @@ } define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_us: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_us: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_us: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_us: ; CHECK-O0: # %bb.0: # %entry @@ -149,11 +159,18 @@ } define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_si: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_si: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_si: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_si: ; CHECK-O0: # %bb.0: # %entry @@ -173,11 +190,18 @@ } define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ui: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ui: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ui: ; CHECK-O0: # %bb.0: # %entry @@ -197,11 +221,17 @@ } define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sll: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sll: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sll: ; CHECK-O0: # %bb.0: # %entry @@ -219,11 +249,17 @@ } define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ull: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ull: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ull: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ull: ; CHECK-O0: # %bb.0: # %entry @@ -245,11 +281,6 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxvrbx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: lxvrbx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset %0 = load i8, i8* %add.ptr, align 1 @@ -264,12 +295,6 @@ ; CHECK-NEXT: sldi r3, r3, 1 ; CHECK-NEXT: lxvrhx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_short: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lxvrhx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset %0 = load i16, i16* %add.ptr, align 2 @@ -284,12 +309,6 @@ ; CHECK-NEXT: sldi r3, r3, 2 ; CHECK-NEXT: lxvrwx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_word: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lxvrwx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset %0 = load i32, i32* %add.ptr, align 4 @@ -304,12 +323,6 @@ ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lxvrdx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_dw: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: lxvrdx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset %0 = load i64, i64* %add.ptr, align 8 @@ -319,13 +332,21 @@ } define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) { -; CHECK-LABEL: vec_xl_sext_b: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lbzx r3, r4, r3 -; CHECK-NEXT: extsb r3, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_b: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lbzx r3, r4, r3 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_b: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lbzx r3, r4, r3 +; CHECK-BE-NEXT: extsb r3, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_b: ; CHECK-O0: # %bb.0: # %entry @@ -343,13 +364,21 @@ } define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) { -; CHECK-LABEL: vec_xl_sext_h: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 1 -; CHECK-NEXT: lhax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_h: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lhax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_h: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 1 +; CHECK-BE-NEXT: lhax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_h: ; CHECK-O0: # %bb.0: # %entry @@ -367,13 +396,21 @@ } define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) { -; CHECK-LABEL: vec_xl_sext_w: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 2 -; CHECK-NEXT: lwax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_w: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lwax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_w: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: lwax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_w: ; CHECK-O0: # %bb.0: # %entry @@ -391,13 +428,21 @@ } define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) { -; CHECK-LABEL: vec_xl_sext_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: ldx r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_d: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: ldx r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_d: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 3 +; CHECK-BE-NEXT: ldx r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_d: ; CHECK-O0: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @test1(<4 x i32> %A, i32* %a) { +; CHECK-LE-LABEL: test1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %A, i32 0 + store i32 %vecext, i32* %a, align 4 + ret void +} + +define void @test2(<4 x float> %A, float* %a) { +; CHECK-LE-LABEL: test2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %A, i32 0 + store float %vecext, float* %a, align 4 + ret void +} + +define void @test3(<2 x double> %A, double* %a) { +; CHECK-LE-LABEL: test3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x double> %A, i32 0 + store double %vecext, double* %a, align 8 + ret void +} + +define void @test4(<2 x i64> %A, i64* %a) { +; CHECK-LE-LABEL: test4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %A, i32 0 + store i64 %vecext, i64* %a, align 8 + ret void +} + +define void @test5(<8 x i16> %A, i16* %a) { +; CHECK-LE-LABEL: test5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrhx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: stxsihx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %A, i32 0 + store i16 %vecext, i16* %a, align 2 + ret void +} + +define void @test6(<16 x i8> %A, i8* %a) { +; CHECK-LE-LABEL: test6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %A, i32 0 + store i8 %vecext, i8* %a, align 1 + ret void +} +