diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -664,6 +664,14 @@ return false; } + /// Return true if target has efficient pattern to do multiple vector extracts + /// and store into consecutive memory locations. + virtual bool + canMergeConsecutiveStoresOfVectorExtracts(const SelectionDAG &DAG, EVT Ty0, + EVT Ty1, int Idx0, int Idx1) const { + return false; + } + /// Return if the target supports combining a /// chain like: /// \code diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18444,6 +18444,37 @@ *FirstInChain->getMemOperand(), &IsFast) && IsFast) NumStoresToMerge = i + 1; + else if (Level == BeforeLegalizeTypes && (i + 1) == 2) { + // Target that has efficient pattern to do multiple vector extracts and + // store into consecutive memory locations may use larger vector type + // before type legalized. + EVT Ty0 = StoreNodes[0].MemNode->getOperand(1).getValueType(); + EVT Ty1 = StoreNodes[1].MemNode->getOperand(1).getValueType(); + int Idx0 = -1; + int Idx1 = -1; + + SDValue VE0 = peekThroughBitcasts(StoreNodes[0].MemNode->getOperand(1)); + if (VE0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) + if (auto *C = dyn_cast(VE0->getOperand(1))) + Idx0 = C->getZExtValue(); + + SDValue VE1 = peekThroughBitcasts(StoreNodes[1].MemNode->getOperand(1)); + if (VE1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) + if (auto *C = dyn_cast(VE1->getOperand(1))) + Idx1 = C->getZExtValue(); + + EVT DoubleTy = Ty.getDoubleNumVectorElementsVT(*DAG.getContext()); + if (TLI.canMergeConsecutiveStoresOfVectorExtracts(DAG, Ty0, Ty1, Idx0, + Idx1) && + TLI.isTypeLegal(DoubleTy) && + TLI.getTypeToTransformTo(*DAG.getContext(), Ty) == DoubleTy && + TLI.canMergeStoresTo(FirstStoreAS, DoubleTy, + DAG.getMachineFunction()) && + TLI.allowsMemoryAccess(Context, DL, DoubleTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } } // Check if we found a legal integer type creating a meaningful diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1456,6 +1456,9 @@ // duplicate return instructions to help enable tail call optimizations. bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool hasBitPreservingFPLogic(EVT VT) const override; + bool canMergeConsecutiveStoresOfVectorExtracts(const SelectionDAG &DAG, + EVT Ty0, EVT Ty1, int Idx0, + int Idx1) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; /// getAddrModeForFlags - Based on the set of address flags, select the most diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17413,6 +17413,32 @@ VT == MVT::v4f32 || VT == MVT::v2f64; } +bool PPCTargetLowering::canMergeConsecutiveStoresOfVectorExtracts( + const SelectionDAG &DAG, EVT Ty0, EVT Ty1, int Idx0, int Idx1) const { + if (!((Ty0 == MVT::f32 && Ty1 == MVT::f32) || + (Ty0 == MVT::i32 && Ty1 == MVT::i32))) + return false; + + if (Idx0 == -1 || Idx1 == -1) + return false; + + bool IsLE = DAG.getDataLayout().isLittleEndian(); + // TODO: some missing combinations can be improved. + if ((Idx0 == 0 && Idx1 == 1) || + (Idx0 == 0 && Idx1 == 2 && (IsLE || (!IsLE && !DisablePerfectShuffle))) || + (Idx0 == 0 && Idx1 == 3 && (!IsLE && !DisablePerfectShuffle)) || + (Idx0 == 1 && Idx1 == 2) || (Idx0 == 1 && Idx1 == 3 && !IsLE) || + (Idx0 == 2 && Idx1 == 3) || + ((Idx0 == 1 || Idx0 == 2) && Idx1 == 0 && + (!IsLE && !DisablePerfectShuffle)) || + (Idx0 == 3 && Idx1 == 0) || + ((Idx0 == 2 || Idx0 == 3) && Idx1 == 1 && + (!IsLE && !DisablePerfectShuffle)) || + (Idx0 == 3 && Idx1 == 2 && (!IsLE && !DisablePerfectShuffle))) + return true; + return false; +} + bool PPCTargetLowering:: isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { const Value *Mask = AndI.getOperand(1); diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll --- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -858,70 +858,46 @@ define dso_local void @test_consecutive_i32_01(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_i32_01: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stfiwx f0, 0, r5 -; CHECK-NEXT: stfiwx f1, r5, r3 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32_01: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_i32_01: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_01: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_01: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_01: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_01: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_01: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -935,10 +911,9 @@ define dso_local void @test_consecutive_i32_02(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_i32_02: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, r5, r3 -; CHECK-NEXT: stfiwx f0, 0, r5 +; CHECK-NEXT: vpkudum v2, v2, v2 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32_02: @@ -952,10 +927,9 @@ ; ; CHECK-P9-LABEL: test_consecutive_i32_02: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_02: @@ -969,36 +943,30 @@ ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_02: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_02: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_02: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_02: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -1058,10 +1026,8 @@ ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_03: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_03: @@ -1076,10 +1042,8 @@ ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_03: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -1093,66 +1057,54 @@ define dso_local void @test_consecutive_i32_12(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_i32_12: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, r5, r3 -; CHECK-NEXT: stfiwx f0, 0, r5 +; CHECK-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32_12: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_i32_12: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_12: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_12: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_12: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_12: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_12: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 @@ -1175,10 +1127,8 @@ ; ; CHECK-BE-LABEL: test_consecutive_i32_13: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-NEXT: vpkudum v2, v2, v2 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_i32_13: @@ -1192,10 +1142,8 @@ ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_13: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_13: @@ -1209,10 +1157,8 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_13: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_13: @@ -1226,10 +1172,8 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_13: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 @@ -1243,70 +1187,46 @@ define dso_local void @test_consecutive_i32_23(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_i32_23: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, 0, r5 -; CHECK-NEXT: stfiwx f0, r5, r3 +; CHECK-NEXT: stxsdx vs34, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32_23: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_i32_23: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: stxsd v2, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_23: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_23: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_23: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_23: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_23: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -1363,10 +1283,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_10: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs34, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_10: @@ -1380,10 +1299,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_10: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs34, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 1 @@ -1439,11 +1357,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_20: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_20: @@ -1456,11 +1372,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_20: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -1474,74 +1388,54 @@ define dso_local void @test_consecutive_i32_30(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_i32_30: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stfiwx f0, 0, r5 -; CHECK-NEXT: stfiwx f1, r5, r3 +; CHECK-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_i32_30: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_i32_30: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_i32_30: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_i32_30: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_30: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_30: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_30: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 @@ -1595,10 +1489,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_21: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_21: @@ -1611,10 +1504,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_21: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -1671,10 +1563,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_31: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_31: @@ -1688,10 +1579,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_31: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 @@ -1747,11 +1637,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_32: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_i32_32: @@ -1764,11 +1652,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_i32_32: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 3 @@ -1782,70 +1668,46 @@ define dso_local void @test_consecutive_float_01(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_float_01: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stfiwx f0, 0, r5 -; CHECK-NEXT: stfiwx f1, r5, r3 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_float_01: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_float_01: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_float_01: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_01: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_01: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_01: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_01: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 @@ -1859,10 +1721,9 @@ define dso_local void @test_consecutive_float_02(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_float_02: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, r5, r3 -; CHECK-NEXT: stfiwx f0, 0, r5 +; CHECK-NEXT: vpkudum v2, v2, v2 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_float_02: @@ -1876,10 +1737,9 @@ ; ; CHECK-P9-LABEL: test_consecutive_float_02: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_float_02: @@ -1893,36 +1753,30 @@ ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_02: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_02: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_02: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_02: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 @@ -1982,10 +1836,8 @@ ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_03: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_03: @@ -2000,10 +1852,8 @@ ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_03: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 0 @@ -2017,66 +1867,54 @@ define dso_local void @test_consecutive_float_12(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_float_12: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, r5, r3 -; CHECK-NEXT: stfiwx f0, 0, r5 +; CHECK-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_float_12: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_float_12: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_float_12: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_12: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_12: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_12: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_12: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 @@ -2099,10 +1937,8 @@ ; ; CHECK-BE-LABEL: test_consecutive_float_13: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-NEXT: vpkudum v2, v2, v2 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_float_13: @@ -2116,10 +1952,8 @@ ; ; CHECK-P9-BE-LABEL: test_consecutive_float_13: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_13: @@ -2133,10 +1967,8 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_13: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_13: @@ -2150,10 +1982,8 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_13: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vpkudum v2, v2, v2 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 @@ -2167,70 +1997,46 @@ define dso_local void @test_consecutive_float_23(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_float_23: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stxsiwx vs34, 0, r5 -; CHECK-NEXT: stfiwx f0, r5, r3 +; CHECK-NEXT: stxsdx vs34, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_float_23: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_float_23: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: stxsd v2, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_float_23: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_23: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_23: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_23: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_23: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 @@ -2287,10 +2093,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_10: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs34, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_10: @@ -2304,10 +2109,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_10: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs34, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 1 @@ -2363,11 +2167,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_20: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_20: @@ -2380,11 +2182,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_20: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 @@ -2398,74 +2198,54 @@ define dso_local void @test_consecutive_float_30(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 { ; CHECK-LABEL: test_consecutive_float_30: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: stfiwx f0, 0, r5 -; CHECK-NEXT: stfiwx f1, r5, r3 +; CHECK-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: stfdx f0, 0, r5 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_consecutive_float_30: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-NEXT: li r3, 4 -; CHECK-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-BE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_consecutive_float_30: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: li r3, 4 -; CHECK-P9-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-NEXT: xxswapd vs0, vs34 +; CHECK-P9-NEXT: stfd f0, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_consecutive_float_30: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-BE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_consecutive_float_30: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 2 -; CHECK-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_30: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs34, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_30: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_30: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 @@ -2519,10 +2299,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_21: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_21: @@ -2535,10 +2314,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_21: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs0, vs34, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs34, vs0 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 2 @@ -2595,10 +2373,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_31: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_31: @@ -2612,10 +2389,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_31: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 @@ -2671,11 +2447,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_consecutive_float_32: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r5 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_consecutive_float_32: @@ -2688,11 +2462,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_consecutive_float_32: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r5 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3 @@ -2707,113 +2479,95 @@ ; CHECK-LABEL: test_stores_exceed_vec_size: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: li r4, 20 ; CHECK-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: li r3, 16 ; CHECK-NEXT: xxswapd vs35, vs0 ; CHECK-NEXT: vperm v3, v2, v2, v3 +; CHECK-NEXT: vsldoi v2, v2, v2, 12 ; CHECK-NEXT: xxswapd vs0, vs35 +; CHECK-NEXT: xxswapd vs1, vs34 ; CHECK-NEXT: stxvd2x vs0, 0, r5 -; CHECK-NEXT: stfiwx f1, r5, r3 -; CHECK-NEXT: stxsiwx vs34, r5, r4 +; CHECK-NEXT: stfd f1, 16(r5) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_stores_exceed_vec_size: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-BE-NEXT: li r4, 20 ; CHECK-BE-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-BE-NEXT: lxvw4x vs35, 0, r3 ; CHECK-BE-NEXT: li r3, 16 -; CHECK-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-BE-NEXT: stfiwx f0, r5, r4 ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 4 ; CHECK-BE-NEXT: stxvw4x vs35, 0, r5 +; CHECK-BE-NEXT: stxsdx vs34, r5, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: test_stores_exceed_vec_size: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-P9-NEXT: lxv vs35, 0(r3) -; CHECK-P9-NEXT: li r3, 16 -; CHECK-P9-NEXT: stfiwx f0, r5, r3 -; CHECK-P9-NEXT: li r3, 20 -; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 ; CHECK-P9-NEXT: vperm v3, v2, v2, v3 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-NEXT: xxswapd vs0, vs34 ; CHECK-P9-NEXT: stxv vs35, 0(r5) +; CHECK-P9-NEXT: stfd f0, 16(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size: ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-P9-BE-NEXT: lxv vs35, 0(r3) -; CHECK-P9-BE-NEXT: li r3, 16 -; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-NEXT: li r3, 20 -; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-P9-BE-NEXT: vsldoi v2, v2, v2, 4 ; CHECK-P9-BE-NEXT: stxv vs35, 0(r5) +; CHECK-P9-BE-NEXT: stxsd v2, 16(r5) ; CHECK-P9-BE-NEXT: blr ; ; CHECK-PERFSHUFFLE-LABEL: test_stores_exceed_vec_size: ; CHECK-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-PERFSHUFFLE-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-PERFSHUFFLE-NEXT: li r4, 20 ; CHECK-PERFSHUFFLE-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-PERFSHUFFLE-NEXT: lxvd2x vs0, 0, r3 -; CHECK-PERFSHUFFLE-NEXT: li r3, 16 ; CHECK-PERFSHUFFLE-NEXT: xxswapd vs35, vs0 ; CHECK-PERFSHUFFLE-NEXT: vperm v3, v2, v2, v3 +; CHECK-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 ; CHECK-PERFSHUFFLE-NEXT: xxswapd vs0, vs35 +; CHECK-PERFSHUFFLE-NEXT: xxswapd vs1, vs34 ; CHECK-PERFSHUFFLE-NEXT: stxvd2x vs0, 0, r5 -; CHECK-PERFSHUFFLE-NEXT: stfiwx f1, r5, r3 -; CHECK-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r4 +; CHECK-PERFSHUFFLE-NEXT: stfd f1, 16(r5) ; CHECK-PERFSHUFFLE-NEXT: blr ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_stores_exceed_vec_size: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-BE-PERFSHUFFLE-NEXT: xxspltw vs0, vs34, 0 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs34, vs34, 1 +; CHECK-BE-PERFSHUFFLE-NEXT: vsldoi v3, v2, v2, 4 ; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 16 -; CHECK-BE-PERFSHUFFLE-NEXT: li r4, 20 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 ; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r5, r4 ; CHECK-BE-PERFSHUFFLE-NEXT: stxvw4x vs0, 0, r5 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsdx vs35, r5, r3 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_stores_exceed_vec_size: ; CHECK-P9-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-P9-PERFSHUFFLE-NEXT: addis r3, r2, .LCPI38_0@toc@ha -; CHECK-P9-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-PERFSHUFFLE-NEXT: addi r3, r3, .LCPI38_0@toc@l ; CHECK-P9-PERFSHUFFLE-NEXT: lxv vs35, 0(r3) -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 16 -; CHECK-P9-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 -; CHECK-P9-PERFSHUFFLE-NEXT: li r3, 20 -; CHECK-P9-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 ; CHECK-P9-PERFSHUFFLE-NEXT: vperm v3, v2, v2, v3 +; CHECK-P9-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 12 +; CHECK-P9-PERFSHUFFLE-NEXT: xxswapd vs0, vs34 ; CHECK-P9-PERFSHUFFLE-NEXT: stxv vs35, 0(r5) +; CHECK-P9-PERFSHUFFLE-NEXT: stfd f0, 16(r5) ; CHECK-P9-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_stores_exceed_vec_size: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry ; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxspltw vs0, vs34, 0 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 16 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 20 ; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs0, 2 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: vsldoi v2, v2, v2, 4 ; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxv vs0, 0(r5) -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r5, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsd v2, 16(r5) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 2 @@ -3376,10 +3130,9 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_elements_from_two_vec: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs35, 0, r7 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, r7, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs35, vs35 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r7 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_elements_from_two_vec: @@ -3393,10 +3146,9 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_elements_from_two_vec: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs35, 0, r7 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r7, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrghw vs35, vs35, vs35 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs34, 3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r7) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -3465,13 +3217,11 @@ ; ; CHECK-BE-PERFSHUFFLE-LABEL: test_elements_from_three_vec: ; CHECK-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs1, vs35, vs35, 1 -; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-BE-PERFSHUFFLE-NEXT: li r4, 8 -; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs36, r9, r4 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f1, r9, r3 -; CHECK-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r9 +; CHECK-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-BE-PERFSHUFFLE-NEXT: li r3, 8 +; CHECK-BE-PERFSHUFFLE-NEXT: stxsiwx vs36, r9, r3 +; CHECK-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs0, vs35 +; CHECK-BE-PERFSHUFFLE-NEXT: stfdx f0, 0, r9 ; CHECK-BE-PERFSHUFFLE-NEXT: blr ; ; CHECK-P9-PERFSHUFFLE-LABEL: test_elements_from_three_vec: @@ -3487,13 +3237,11 @@ ; ; CHECK-P9-BE-PERFSHUFFLE-LABEL: test_elements_from_three_vec: ; CHECK-P9-BE-PERFSHUFFLE: # %bb.0: # %entry -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 2 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 4 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, 0, r9 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs35, vs35, 1 -; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfiwx f0, r9, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-BE-PERFSHUFFLE-NEXT: li r3, 8 ; CHECK-P9-BE-PERFSHUFFLE-NEXT: stxsiwx vs36, r9, r3 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: xxmrglw vs0, vs0, vs35 +; CHECK-P9-BE-PERFSHUFFLE-NEXT: stfd f0, 0(r9) ; CHECK-P9-BE-PERFSHUFFLE-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 3