Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -1129,6 +1129,10 @@ bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool hasBitPreservingFPLogic(EVT VT) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + // If the input vector will require a direct-move to extract the element + // but the store can be combined into PPC::STIWX, we want to combine it. + bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const override; }; // end class PPCTargetLowering namespace PPC { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -14288,6 +14288,38 @@ return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); } +bool PPCTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + if (!Subtarget.hasDirectMove() || !Subtarget.hasAltivec() || + !Subtarget.hasVSX()) + return false; + + // If the index is unknown at compile time, this is very expensive to lower + // and it is not possible to combine the store with the extract. + ConstantInt *CI = dyn_cast(Idx); + if (!CI) + return false; + + assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); + unsigned BitWidth = VectorTy->getScalarSizeInBits(); + + // Only have combined stores for sub-word types on Power9. + if (BitWidth > 32 || (!Subtarget.hasP9Vector() && BitWidth != 32)) + return false; + + uint64_t CIdx = CI->getZExtValue(); + uint64_t NaturalIdx = -1UL; + switch (BitWidth) { + default: return false; + case 8: NaturalIdx = Subtarget.isLittleEndian() ? 8 : 7; break; + case 16: NaturalIdx = Subtarget.isLittleEndian() ? 4 : 3; break; + case 32: NaturalIdx = Subtarget.isLittleEndian() ? 2 : 1; break; + } + + Cost = CIdx == NaturalIdx ? 0 : 3; + return true; +} + bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { if (!Subtarget.hasVSX()) return false; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1430,7 +1430,7 @@ "xscvspdpn $XT, $XB", IIC_VecFP, []>; } // UseVSXReg = 1 - let Predicates = [IsLittleEndian] in { + let Predicates = [HasP8Vector, IsLittleEndian] in { def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; @@ -1445,9 +1445,32 @@ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; - } + def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + foreach Idx = [ [0,2], [1,1], [3,3] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } + } // HasP8Vector, IsLittleEndian + + let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + } // HasP8Vector, IsLittleEndian, NoP9Vector - let Predicates = [IsBigEndian] in { + let Predicates = [HasP8Vector, IsBigEndian] in { def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; @@ -1460,6 +1483,29 @@ def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + foreach Idx = [ [0,3], [2,1], [3,2] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + } + } // HasP8Vector, IsBigEndian + + let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in { + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; } // Instructions for converting float to i64 feeding a store. @@ -3270,6 +3316,27 @@ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), (v2f64 (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + ixaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + } let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3282,6 +3349,26 @@ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src), + (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), ixaddr:$src)>; + def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; + def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>; } } @@ -3467,7 +3554,7 @@ (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8), - (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>; + (DFSTOREf64 (XSCVDPSXDS f64:$src), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; @@ -3498,7 +3585,7 @@ (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8), - (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>; + (DFSTOREf64 (XSCVDPUXDS f64:$src), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; Index: test/CodeGen/PowerPC/combine-extract-store.ll =================================================================== --- test/CodeGen/PowerPC/combine-extract-store.ll +++ test/CodeGen/PowerPC/combine-extract-store.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unkknown-unknown \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unkknown-unknown \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s --check-prefix=CHECK-BE + +; Function Attrs: norecurse nounwind +define void @test(<4 x i32>* noalias nocapture readonly %VP, <4 x i32>* noalias nocapture %VP2, i32* noalias nocapture %IP) local_unnamed_addr #0 { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vspltisw 2, 4 +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: vadduwm 2, 3, 2 +; CHECK-NEXT: stxsiwx 34, 0, 5 +; CHECK-NEXT: stvx 3, 0, 4 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* %VP, align 16 + %vecext = extractelement <4 x i32> %0, i32 2 + %add = add nsw i32 %vecext, 4 + store i32 %add, i32* %IP, align 4 + store <4 x i32> %0, <4 x i32>* %VP2, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @testf(<4 x float>* noalias nocapture readonly %VP, <4 x float>* noalias nocapture %VP2, float* noalias nocapture %IP) local_unnamed_addr #0 { +; CHECK-LABEL: testf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 6, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: lvx 2, 0, 3 +; CHECK-NEXT: addi 6, 6, .LCPI1_0@toc@l +; CHECK-NEXT: lvx 3, 0, 6 +; CHECK-NEXT: stvx 2, 0, 4 +; CHECK-NEXT: xvaddsp 0, 34, 35 +; CHECK-NEXT: stfiwx 0, 0, 5 +; CHECK-NEXT: blr +entry: + %0 = load <4 x float>, <4 x float>* %VP, align 16 + %vecext = extractelement <4 x float> %0, i32 2 + %add = fadd float %vecext, 4.000000e+00 + store float %add, float* %IP, align 4 + store <4 x float> %0, <4 x float>* %VP2, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @testBE(<4 x i32>* noalias nocapture readonly %VP, <4 x i32>* noalias nocapture %VP2, i32* noalias nocapture %IP) local_unnamed_addr #0 { +; CHECK-BE-LABEL: testBE: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vspltisw 2, 4 +; CHECK-BE-NEXT: lxvw4x 35, 0, 3 +; CHECK-BE-NEXT: vadduwm 2, 3, 2 +; CHECK-BE-NEXT: stxsiwx 34, 0, 5 +; CHECK-BE-NEXT: stxvw4x 35, 0, 4 +; CHECK-BE-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* %VP, align 16 + %vecext = extractelement <4 x i32> %0, i32 1 + %add = add nsw i32 %vecext, 4 + store i32 %add, i32* %IP, align 4 + store <4 x i32> %0, <4 x i32>* %VP2, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @testBEf(<4 x float>* noalias nocapture readonly %VP, <4 x float>* noalias nocapture %VP2, float* noalias nocapture %IP) local_unnamed_addr #0 { +; CHECK-BE-LABEL: testBEf: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: lxvw4x 0, 0, 3 +; CHECK-BE-NEXT: addi 6, 6, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxvw4x 1, 0, 6 +; CHECK-BE-NEXT: stxvw4x 0, 0, 4 +; CHECK-BE-NEXT: xvaddsp 1, 0, 1 +; CHECK-BE-NEXT: stfiwx 1, 0, 5 +; CHECK-BE-NEXT: blr +entry: + %0 = load <4 x float>, <4 x float>* %VP, align 16 + %vecext = extractelement <4 x float> %0, i32 1 + %add = fadd float %vecext, 4.000000e+00 + store float %add, float* %IP, align 4 + store <4 x float> %0, <4 x float>* %VP2, align 16 + ret void +} Index: test/CodeGen/PowerPC/extract-and-store.ll =================================================================== --- test/CodeGen/PowerPC/extract-and-store.ll +++ test/CodeGen/PowerPC/extract-and-store.ll @@ -12,20 +12,20 @@ ; CHECK-LABEL: testll0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrd r3, f0 -; CHECK-NEXT: std r3, 24(r7) +; CHECK-NEXT: addi r3, r7, 24 +; CHECK-NEXT: stxsdx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrd r3, vs34 -; CHECK-BE-NEXT: std r3, 24(r7) +; CHECK-BE-NEXT: addi r3, r7, 24 +; CHECK-BE-NEXT: stxsdx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrld r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r7) +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 24(r7) ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 0 @@ -38,21 +38,20 @@ define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testll1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrd r3, vs34 -; CHECK-NEXT: std r3, 24(r6) +; CHECK-NEXT: addi r3, r6, 24 +; CHECK-NEXT: stxsdx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testll1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrd r3, f0 -; CHECK-BE-NEXT: std r3, 24(r6) +; CHECK-BE-NEXT: addi r3, r6, 24 +; CHECK-BE-NEXT: stxsdx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testll1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrd r3, vs34 -; CHECK-P9-NEXT: std r3, 24(r6) +; CHECK-P9-NEXT: stxsd v2, 24(r6) ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 1 @@ -66,7 +65,8 @@ ; CHECK-LABEL: testd0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: stfd f0, 24(r7) +; CHECK-NEXT: addi r3, r7, 24 +; CHECK-NEXT: stxsdx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testd0: @@ -98,7 +98,8 @@ ; CHECK-BE-LABEL: testd1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: stfd f0, 24(r7) +; CHECK-BE-NEXT: addi r3, r7, 24 +; CHECK-BE-NEXT: stxsdx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testd1: @@ -116,22 +117,23 @@ define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf0: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvspdpn f0, vs34 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 @@ -144,23 +146,22 @@ define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxswapd vs0, vs34 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 @@ -173,23 +174,21 @@ define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 @@ -202,21 +201,23 @@ define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testf3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvspdpn f0, vs34 -; CHECK-NEXT: stfs f0, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testf3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: stfs f0, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testf3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvspdpn f0, vs34 -; CHECK-P9-NEXT: stfs f0, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 @@ -229,23 +230,23 @@ define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi0: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi0: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -259,21 +260,21 @@ ; CHECK-LABEL: testi1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mfvsrwz r3, vs34 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stxsiwx vs34, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi1: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 @@ -286,21 +287,21 @@ define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 { ; CHECK-LABEL: testi2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mfvsrwz r3, vs34 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stxsiwx vs34, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mfvsrwz r3, vs34 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stxsiwx vs34, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -314,22 +315,22 @@ ; CHECK-LABEL: testi3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: mfvsrwz r3, f0 -; CHECK-NEXT: stw r3, 12(r7) +; CHECK-NEXT: addi r3, r7, 12 +; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testi3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs0, vs34 -; CHECK-BE-NEXT: mfvsrwz r3, f0 -; CHECK-BE-NEXT: stw r3, 12(r7) +; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-BE-NEXT: addi r3, r7, 12 +; CHECK-BE-NEXT: stfiwx f0, 0, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testi3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r3, 12 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-P9-NEXT: stw r3, 12(r7) +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-NEXT: addi r3, r7, 12 +; CHECK-P9-NEXT: stfiwx f0, 0, r3 ; CHECK-P9-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 Index: test/CodeGen/PowerPC/scalar_vector_test_2.ll =================================================================== --- test/CodeGen/PowerPC/scalar_vector_test_2.ll +++ test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -15,9 +15,8 @@ ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvaddsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr ; P9BE-LABEL: test_liwzx1: @@ -27,8 +26,8 @@ ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvaddsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr ; P8LE-LABEL: test_liwzx1: @@ -38,9 +37,8 @@ ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvaddsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr ; P8BE-LABEL: test_liwzx1: @@ -50,8 +48,8 @@ ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvaddsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B @@ -69,9 +67,8 @@ ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P9LE-NEXT: xvsubsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9LE-NEXT: xscvspdpn f0, vs0 -; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: stfiwx f0, 0, r5 ; P9LE-NEXT: blr ; P9BE-LABEL: test_liwzx2: @@ -82,8 +79,8 @@ ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P9BE-NEXT: xvsubsp vs0, vs0, vs1 -; P9BE-NEXT: xscvspdpn f0, vs0 -; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: stfiwx f0, 0, r5 ; P9BE-NEXT: blr ; P8LE-LABEL: test_liwzx2: @@ -94,9 +91,8 @@ ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 ; P8LE-NEXT: xvsubsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8LE-NEXT: xscvspdpn f0, vs0 -; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P8LE-NEXT: stfiwx f0, 0, r5 ; P8LE-NEXT: blr ; P8BE-LABEL: test_liwzx2: @@ -107,8 +103,8 @@ ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 ; P8BE-NEXT: xvsubsp vs0, vs0, vs1 -; P8BE-NEXT: xscvspdpn f0, vs0 -; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8BE-NEXT: stfiwx f0, 0, r5 ; P8BE-NEXT: blr %a = load <1 x float>, <1 x float>* %A %b = load <1 x float>, <1 x float>* %B Index: test/CodeGen/PowerPC/store_fptoi.ll =================================================================== --- test/CodeGen/PowerPC/store_fptoi.ll +++ test/CodeGen/PowerPC/store_fptoi.ll @@ -18,7 +18,7 @@ ; CHECK-LABEL: dpConv2sdw ; CHECK: lfd [[LD:[0-9]+]], 0(3) ; CHECK: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: stfd [[CONV]], 0(4) ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: dpConv2sdw @@ -104,7 +104,7 @@ ; CHECK-LABEL: spConv2sdw ; CHECK: lfs [[LD:[0-9]+]], 0(3) ; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: stfd [[CONV]], 0(4) ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: spConv2sdw @@ -402,7 +402,7 @@ ; CHECK-LABEL: dpConv2udw ; CHECK: lfd [[LD:[0-9]+]], 0(3) ; CHECK: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: stfd [[CONV]], 0(4) ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: dpConv2udw @@ -488,7 +488,7 @@ ; CHECK-LABEL: spConv2udw ; CHECK: lfs [[LD:[0-9]+]], 0(3) ; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: stfd [[CONV]], 0(4) ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: spConv2udw