Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -3307,7 +3307,27 @@ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), (v2f64 (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; - } + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + ixaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), @@ -3319,7 +3339,27 @@ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; - } + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + } // IsBigEndian, HasP9Vector } let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3834,8 +3874,33 @@ def : Pat; + def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + foreach Idx = [ [0,3], [2,1], [3,2] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } } + let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + } + // Big endian, available on all targets with VSX let Predicates = [IsBigEndian, HasVSX] in { def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3871,8 +3936,33 @@ def : Pat; + def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + foreach Idx = [ [0,2], [1,1], [3,3] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } } + let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + } + let Predicates = [IsLittleEndian, HasVSX] in { // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), Index: test/CodeGen/PowerPC/extract-and-store.ll =================================================================== --- test/CodeGen/PowerPC/extract-and-store.ll +++ test/CodeGen/PowerPC/extract-and-store.ll @@ -12,20 +12,19 @@ ; CHECK-LABEL: testll0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrd r3, f0 -; CHECK-NEXT: std r3, 24(r7) +; CHECK-NEXT: stfd f0, 24(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrd r3, vs34 -; CHECK-BE-NEXT: std r3, 24(r7) +; CHECK-BE-NEXT: addi r3, r7, 24 +; CHECK-BE-NEXT: stxsdx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrld r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r7) +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 24(r7) ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 0 @@ -38,21 +37,19 @@ define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testll1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrd r3, vs34 -; CHECK-NEXT: std r3, 24(r6) +; CHECK-NEXT: addi r3, r6, 24 +; CHECK-NEXT: stxsdx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrd r3, f0 -; CHECK-BE-NEXT: std r3, 24(r6) +; CHECK-BE-NEXT: stfd f0, 24(r6) ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrd r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r6) +; CHECK-P9-NEXT: stxsd v2, 24(r6) ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 1 @@ -116,22 +113,23 @@ define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvspdpn f0, vs34 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 @@ -144,23 +142,22 @@ define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxswapd vs0, vs34 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 @@ -173,23 +170,21 @@ define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 @@ -202,21 +197,23 @@ define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvspdpn f0, vs34 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvspdpn f0, vs34 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 @@ -229,23 +226,23 @@ define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi0: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -259,21 +256,21 @@ ; CHECK-LABEL: testi1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrwz r3, vs34 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 @@ -286,21 +283,21 @@ define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrwz r3, vs34 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrwz r3, vs34 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -314,22 +311,22 @@ ; CHECK-LABEL: testi3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 12 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 Index: test/CodeGen/PowerPC/scalar_vector_test_2.ll =================================================================== --- test/CodeGen/PowerPC/scalar_vector_test_2.ll +++ test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ @@ -15,11 +16,10 @@ ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvaddsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: test_liwzx1: ; P9BE: # %bb.0: ; P9BE-NEXT: lfiwzx f0, 0, r3 @@ -27,10 +27,10 @@ ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvaddsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: test_liwzx1: ; P8LE: # %bb.0: ; P8LE-NEXT: lfiwzx f0, 0, r3 @@ -38,11 +38,10 @@ ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvaddsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: test_liwzx1: ; P8BE: # %bb.0: ; P8BE-NEXT: lfiwzx f0, 0, r3 @@ -50,9 +49,12 @@ ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvaddsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr + + + %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B %X = fadd <1 x float> %a, %b @@ -69,11 +71,10 @@ ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvsubsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: test_liwzx2: ; P9BE: # %bb.0: ; P9BE-NEXT: lfiwzx f0, 0, r3 @@ -82,10 +83,10 @@ ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvsubsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: test_liwzx2: ; P8LE: # %bb.0: ; P8LE-NEXT: lfiwzx f0, 0, r3 @@ -94,11 +95,10 @@ ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvsubsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: test_liwzx2: ; P8BE: # %bb.0: ; P8BE-NEXT: lfiwzx f0, 0, r3 @@ -107,9 +107,12 @@ ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvsubsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr + + + %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B %X = fsub <1 x float> %a, %b