diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -325,7 +325,8 @@ private: void DoInstructionSelection(); SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - ArrayRef Ops, unsigned EmitNodeInfo); + ArrayRef Ops, unsigned EmitNodeInfo, + bool NodeHasChain); /// Prepares the landing pad to take incoming values or do other EH /// personality specific tasks. Returns true if the block should be diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2378,9 +2378,9 @@ } /// MorphNode - Handle morphing a node in place for the selector. -SDNode *SelectionDAGISel:: -MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - ArrayRef Ops, unsigned EmitNodeInfo) { +SDNode *SelectionDAGISel::MorphNode(SDNode *Node, unsigned TargetOpc, + SDVTList VTList, ArrayRef Ops, + unsigned EmitNodeInfo, bool NodeHasChain) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have glue and chains as well. @@ -2422,8 +2422,8 @@ --ResNumResults; // Move the chain reference if needed. - if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && - (unsigned)OldChainResultNo != ResNumResults-1) + if (NodeHasChain && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults - 1) ReplaceUses(SDValue(Node, OldChainResultNo), SDValue(Res, ResNumResults - 1)); @@ -2828,6 +2828,11 @@ // update the chain results when the pattern is complete. SmallVector ChainNodesMatched; + // Collect chains for machine nodes that may raise exceptions. We allow + // multiple MachineSDNodes to be emitted with parallel chains. We'll join + // them with a TokenFactor. This helps CSE identical nodes with in a match. + SmallVector StrictFPChains; + LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n"); // Determine where to start the interpreter. Normally we start at opcode #0, @@ -3378,6 +3383,19 @@ uint16_t TargetOpc = MatcherTable[MatcherIndex++]; TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + + // We need to perform this check before potentially modifying one of the + // nodes via MorphNode. + bool MayRaiseFPException = + llvm::any_of(ChainNodesMatched, [this](SDNode *N) { + return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept(); + }); + + bool NodeMayRaiseFPException = + MayRaiseFPException && TII->get(TargetOpc).mayRaiseFPException(); + bool NodeHasChain = + (EmitNodeInfo & OPFL_Chain) || NodeMayRaiseFPException; + // Get the result VT list. unsigned NumVTs; // If this is one of the compressed forms, get the number of VTs based @@ -3397,7 +3415,7 @@ VTs.push_back(VT); } - if (EmitNodeInfo & OPFL_Chain) + if (NodeHasChain) VTs.push_back(MVT::Other); if (EmitNodeInfo & OPFL_GlueOutput) VTs.push_back(MVT::Glue); @@ -3428,7 +3446,7 @@ if (EmitNodeInfo & OPFL_VariadicInfo) { // Determine the start index to copy from. unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); - FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + FirstOpToCopy += NodeHasChain ? 1 : 0; assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && "Invalid variadic node"); // Copy all of the variadic operands, not including a potential glue @@ -3441,21 +3459,25 @@ } } + if (EmitNodeInfo & OPFL_Chain) { + // Update InputChain if there are any strict fp nodes. + if (!StrictFPChains.empty()) { + if (StrictFPChains.size() == 1) + InputChain = StrictFPChains[0]; + else + InputChain = + CurDAG->getNode(ISD::TokenFactor, SDLoc(StrictFPChains[0]), + MVT::Other, StrictFPChains); + StrictFPChains.clear(); + } + } + // If this has chain/glue inputs, add them. - if (EmitNodeInfo & OPFL_Chain) + if (NodeHasChain) Ops.push_back(InputChain); if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); - // Check whether any matched node could raise an FP exception. Since all - // such nodes must have a chain, it suffices to check ChainNodesMatched. - // We need to perform this check before potentially modifying one of the - // nodes via MorphNode. - bool MayRaiseFPException = - llvm::any_of(ChainNodesMatched, [this](SDNode *N) { - return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept(); - }); - // Create the node. MachineSDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || @@ -3483,8 +3505,8 @@ "Chain node replaced during MorphNode"); llvm::erase_value(Chain, N); }); - Res = cast(MorphNode(NodeToMatch, TargetOpc, VTList, - Ops, EmitNodeInfo)); + Res = cast(MorphNode(NodeToMatch, TargetOpc, VTList, Ops, + EmitNodeInfo, NodeHasChain)); } // Set the NoFPExcept flag when no original matched node could @@ -3499,10 +3521,16 @@ // chain and glue. if (EmitNodeInfo & OPFL_GlueOutput) { InputGlue = SDValue(Res, VTs.size()-1); - if (EmitNodeInfo & OPFL_Chain) + if (EmitNodeInfo & OPFL_Chain) { + assert(StrictFPChains.empty() && "Chain node and strict FP node?"); InputChain = SDValue(Res, VTs.size()-2); - } else if (EmitNodeInfo & OPFL_Chain) + } else if (NodeMayRaiseFPException) + StrictFPChains.push_back(SDValue(Res, VTs.size() - 2)); + } else if (EmitNodeInfo & OPFL_Chain) { + assert(StrictFPChains.empty() && "Chain node and strict FP node?"); InputChain = SDValue(Res, VTs.size()-1); + } else if (NodeMayRaiseFPException) + StrictFPChains.push_back(SDValue(Res, VTs.size() - 1)); // If the OPFL_MemRefs glue is set on this node, slap all of the // accumulated memrefs onto it. @@ -3543,6 +3571,17 @@ // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { + // Update InputChain if there are any strict fp nodes. + if (!StrictFPChains.empty()) { + if (StrictFPChains.size() == 1) + InputChain = StrictFPChains[0]; + else + InputChain = + CurDAG->getNode(ISD::TokenFactor, SDLoc(StrictFPChains[0]), + MVT::Other, StrictFPChains); + StrictFPChains.clear(); + } + // Update chain uses. UpdateChains(Res, InputChain, ChainNodesMatched, true); return; @@ -3577,6 +3616,17 @@ ReplaceUses(SDValue(NodeToMatch, i), Res); } + // Update InputChain if there are any strict fp nodes. + if (!StrictFPChains.empty()) { + if (StrictFPChains.size() == 1) + InputChain = StrictFPChains[0]; + else + InputChain = + CurDAG->getNode(ISD::TokenFactor, SDLoc(StrictFPChains[0]), + MVT::Other, StrictFPChains); + StrictFPChains.clear(); + } + // Update chain uses. UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -621,9 +621,11 @@ ; P8-NEXT: fcmpo cr0, f2, f3 ; P8-NEXT: xxlxor f3, f3, f3 ; P8-NEXT: fcmpo cr1, f1, f0 -; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt -; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq -; P8-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P8-NEXT: fcmpo cr5, f1, f0 +; P8-NEXT: fcmpo cr6, f1, f0 +; P8-NEXT: crand 4*cr7+lt, 4*cr1+eq, lt +; P8-NEXT: crandc 4*cr5+lt, 4*cr6+lt, 4*cr5+eq +; P8-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr7+lt ; P8-NEXT: isel r30, 0, r3, 4*cr5+lt ; P8-NEXT: bc 12, 4*cr5+lt, .LBB13_2 ; P8-NEXT: # %bb.1: # %entry @@ -664,7 +666,9 @@ ; P9-NEXT: fcmpo cr0, f1, f0 ; P9-NEXT: xxlxor f3, f3, f3 ; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt -; P9-NEXT: crandc 4*cr5+gt, lt, eq +; P9-NEXT: fcmpo cr0, f1, f0 +; P9-NEXT: fcmpo cr1, f1, f0 +; P9-NEXT: crandc 4*cr5+gt, 4*cr1+lt, eq ; P9-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt ; P9-NEXT: isel r30, 0, r3, 4*cr5+lt ; P9-NEXT: bc 12, 4*cr5+lt, .LBB13_2 @@ -705,10 +709,12 @@ ; NOVSX-NEXT: lfs f4, .LCPI13_1@toc@l(r4) ; NOVSX-NEXT: fcmpo cr0, f1, f0 ; NOVSX-NEXT: fcmpo cr1, f2, f4 +; NOVSX-NEXT: fcmpo cr5, f1, f0 +; NOVSX-NEXT: fcmpo cr6, f1, f0 ; NOVSX-NEXT: fmr f3, f4 -; NOVSX-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt -; NOVSX-NEXT: crandc 4*cr5+gt, lt, eq -; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; NOVSX-NEXT: crand 4*cr7+lt, eq, 4*cr1+lt +; NOVSX-NEXT: crandc 4*cr5+lt, 4*cr6+lt, 4*cr5+eq +; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+lt, 4*cr7+lt ; NOVSX-NEXT: bc 12, 4*cr2+lt, .LBB13_2 ; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr f3, f0 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -7,13 +7,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fcmpu cr0, f1, f3 ; CHECK-NEXT: crmove 4*cr5+lt, eq -; CHECK-NEXT: fcmpu cr1, f2, f4 -; CHECK-NEXT: crmove 4*cr5+gt, 4*cr1+eq +; CHECK-NEXT: fcmpu cr0, f2, f4 +; CHECK-NEXT: crmove 4*cr5+gt, eq ; CHECK-NEXT: crnot 4*cr5+gt, 4*cr5+gt ; CHECK-NEXT: crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: fcmpu cr0, f1, f3 ; CHECK-NEXT: crmove 4*cr5+lt, eq ; CHECK-NEXT: crnot 4*cr5+lt, 4*cr5+lt -; CHECK-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+lt +; CHECK-NEXT: fcmpu cr0, f1, f3 +; CHECK-NEXT: crmove 4*cr5+eq, eq +; CHECK-NEXT: crnot 4*cr5+eq, 4*cr5+eq +; CHECK-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+eq ; CHECK-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: li r3, 1 @@ -32,12 +36,14 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fcmpu cr0, f1, f3 ; CHECK-NEXT: crmove 4*cr5+lt, eq -; CHECK-NEXT: fcmpu cr1, f2, f4 -; CHECK-NEXT: crmove 4*cr5+gt, 4*cr1+gt +; CHECK-NEXT: fcmpu cr0, f2, f4 +; CHECK-NEXT: crmove 4*cr5+gt, gt ; CHECK-NEXT: crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: fcmpu cr0, f1, f3 +; CHECK-NEXT: fcmpu cr1, f1, f3 +; CHECK-NEXT: crmove 4*cr5+eq, 4*cr1+gt ; CHECK-NEXT: crmove 4*cr5+lt, eq ; CHECK-NEXT: crnot 4*cr5+lt, 4*cr5+lt -; CHECK-NEXT: crmove 4*cr5+eq, gt ; CHECK-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+eq ; CHECK-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt ; CHECK-NEXT: li r4, 0 diff --git a/llvm/test/CodeGen/PowerPC/nofpexcept.ll b/llvm/test/CodeGen/PowerPC/nofpexcept.ll --- a/llvm/test/CodeGen/PowerPC/nofpexcept.ll +++ b/llvm/test/CodeGen/PowerPC/nofpexcept.ll @@ -113,8 +113,10 @@ ; CHECK-NEXT: [[FCMPOD1:%[0-9]+]]:crrc = FCMPOD [[COPY3]], [[XXLXORdpz]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:crbitrc = COPY [[FCMPOD1]].sub_lt ; CHECK-NEXT: [[CRAND:%[0-9]+]]:crbitrc = CRAND killed [[COPY10]], killed [[COPY11]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:crbitrc = COPY [[FCMPOD]].sub_eq - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:crbitrc = COPY [[FCMPOD]].sub_lt + ; CHECK-NEXT: [[FCMPOD2:%[0-9]+]]:crrc = FCMPOD [[COPY4]], [[COPY9]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:crbitrc = COPY [[FCMPOD2]].sub_eq + ; CHECK-NEXT: [[FCMPOD3:%[0-9]+]]:crrc = FCMPOD [[COPY4]], [[COPY9]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:crbitrc = COPY [[FCMPOD3]].sub_lt ; CHECK-NEXT: [[CRANDC:%[0-9]+]]:crbitrc = CRANDC killed [[COPY13]], killed [[COPY12]] ; CHECK-NEXT: [[CROR:%[0-9]+]]:crbitrc = CROR killed [[CRANDC]], killed [[CRAND]] ; CHECK-NEXT: [[LIS:%[0-9]+]]:gprc_and_gprc_nor0 = LIS 32768 @@ -140,10 +142,10 @@ ; CHECK-NEXT: [[MFFS1:%[0-9]+]]:f8rc = MFFS implicit $rm ; CHECK-NEXT: MTFSB1 31, implicit-def $rm, implicit-def $rm ; CHECK-NEXT: MTFSB0 30, implicit-def $rm, implicit-def $rm - ; CHECK-NEXT: %37:f8rc = nofpexcept FADD [[COPY15]], [[COPY14]], implicit $rm + ; CHECK-NEXT: %39:f8rc = nofpexcept FADD [[COPY15]], [[COPY14]], implicit $rm ; CHECK-NEXT: MTFSFb 1, [[MFFS1]], implicit-def $rm - ; CHECK-NEXT: %38:vsfrc = nofpexcept XSCVDPSXWS killed %37, implicit $rm - ; CHECK-NEXT: [[MFVSRWZ3:%[0-9]+]]:gprc = MFVSRWZ killed %38 + ; CHECK-NEXT: %40:vsfrc = nofpexcept XSCVDPSXWS killed %39, implicit $rm + ; CHECK-NEXT: [[MFVSRWZ3:%[0-9]+]]:gprc = MFVSRWZ killed %40 ; CHECK-NEXT: [[XOR:%[0-9]+]]:gprc = XOR killed [[MFVSRWZ3]], killed [[ISEL]] ; CHECK-NEXT: STW killed [[XOR]], 0, [[COPY1]] :: (volatile store (s32) into %ir.addr1) ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1297,9 +1297,11 @@ ; PC64LE-NEXT: fcmpo 0, 2, 3 ; PC64LE-NEXT: xxlxor 3, 3, 3 ; PC64LE-NEXT: fcmpo 1, 1, 0 -; PC64LE-NEXT: crand 20, 6, 0 -; PC64LE-NEXT: crandc 21, 4, 6 -; PC64LE-NEXT: cror 20, 21, 20 +; PC64LE-NEXT: fcmpo 5, 1, 0 +; PC64LE-NEXT: fcmpo 6, 1, 0 +; PC64LE-NEXT: crand 28, 6, 0 +; PC64LE-NEXT: crandc 20, 24, 22 +; PC64LE-NEXT: cror 20, 20, 28 ; PC64LE-NEXT: isel 30, 0, 3, 20 ; PC64LE-NEXT: bc 12, 20, .LBB31_2 ; PC64LE-NEXT: # %bb.1: # %entry @@ -1336,7 +1338,9 @@ ; PC64LE9-NEXT: fcmpo 0, 1, 0 ; PC64LE9-NEXT: xxlxor 3, 3, 3 ; PC64LE9-NEXT: crand 20, 2, 4 -; PC64LE9-NEXT: crandc 21, 0, 2 +; PC64LE9-NEXT: fcmpo 0, 1, 0 +; PC64LE9-NEXT: fcmpo 1, 1, 0 +; PC64LE9-NEXT: crandc 21, 4, 2 ; PC64LE9-NEXT: cror 20, 21, 20 ; PC64LE9-NEXT: isel 30, 0, 3, 20 ; PC64LE9-NEXT: bc 12, 20, .LBB31_2 @@ -1372,9 +1376,11 @@ ; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha ; PC64-NEXT: lfs 4, .LCPI31_1@toc@l(3) ; PC64-NEXT: fcmpo 0, 1, 0 -; PC64-NEXT: crandc 21, 0, 2 ; PC64-NEXT: fcmpo 1, 2, 4 ; PC64-NEXT: crand 20, 2, 4 +; PC64-NEXT: fcmpo 0, 1, 0 +; PC64-NEXT: fcmpo 1, 1, 0 +; PC64-NEXT: crandc 21, 4, 2 ; PC64-NEXT: cror 8, 21, 20 ; PC64-NEXT: fmr 3, 4 ; PC64-NEXT: bc 12, 8, .LBB31_2 diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -5222,14 +5222,14 @@ define <4 x double> @constrained_vector_maxnum_v4f64(<4 x double> %x, <4 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xvmaxdp 34, 34, 36 ; PC64LE-NEXT: xvmaxdp 35, 35, 37 +; PC64LE-NEXT: xvmaxdp 34, 34, 36 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xvmaxdp 34, 34, 36 ; PC64LE9-NEXT: xvmaxdp 35, 35, 37 +; PC64LE9-NEXT: xvmaxdp 34, 34, 36 ; PC64LE9-NEXT: blr entry: %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( @@ -5463,14 +5463,14 @@ define <4 x double> @constrained_vector_minnum_v4f64(<4 x double> %x, <4 x double> %y) #0 { ; PC64LE-LABEL: constrained_vector_minnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xvmindp 34, 34, 36 ; PC64LE-NEXT: xvmindp 35, 35, 37 +; PC64LE-NEXT: xvmindp 34, 34, 36 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: xvmindp 34, 34, 36 ; PC64LE9-NEXT: xvmindp 35, 35, 37 +; PC64LE9-NEXT: xvmindp 34, 34, 36 ; PC64LE9-NEXT: blr entry: %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64( @@ -7377,30 +7377,30 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; PC64LE-LABEL: constrained_vector_sitofp_v3f64_v3i32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxswapd 0, 34 -; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: xxswapd 1, 34 ; PC64LE-NEXT: mfvsrwz 3, 34 -; PC64LE-NEXT: mtfprwa 3, 3 -; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mffprwz 4, 0 ; PC64LE-NEXT: mtfprwa 0, 3 +; PC64LE-NEXT: mffprwz 3, 1 ; PC64LE-NEXT: mtfprwa 2, 4 -; PC64LE-NEXT: xscvsxddp 1, 0 +; PC64LE-NEXT: xscvsxddp 3, 0 +; PC64LE-NEXT: mtfprwa 0, 3 ; PC64LE-NEXT: xscvsxddp 2, 2 -; PC64LE-NEXT: xscvsxddp 3, 3 +; PC64LE-NEXT: xscvsxddp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sitofp_v3f64_v3i32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: li 3, 0 +; PC64LE9-NEXT: li 3, 4 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: mtfprwa 0, 3 -; PC64LE9-NEXT: li 3, 4 +; PC64LE9-NEXT: li 3, 0 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 -; PC64LE9-NEXT: xscvsxddp 1, 0 +; PC64LE9-NEXT: xscvsxddp 2, 0 ; PC64LE9-NEXT: mtfprwa 0, 3 ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: xscvsxddp 2, 0 +; PC64LE9-NEXT: xscvsxddp 1, 0 ; PC64LE9-NEXT: mtfprwa 0, 3 ; PC64LE9-NEXT: xscvsxddp 3, 0 ; PC64LE9-NEXT: blr @@ -7469,22 +7469,22 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; PC64LE-LABEL: constrained_vector_sitofp_v3f64_v3i64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mtfprd 0, 3 -; PC64LE-NEXT: mtfprd 2, 4 -; PC64LE-NEXT: mtfprd 3, 5 -; PC64LE-NEXT: xscvsxddp 1, 0 -; PC64LE-NEXT: xscvsxddp 2, 2 -; PC64LE-NEXT: xscvsxddp 3, 3 +; PC64LE-NEXT: mtfprd 0, 5 +; PC64LE-NEXT: mtfprd 1, 4 +; PC64LE-NEXT: mtfprd 4, 3 +; PC64LE-NEXT: xscvsxddp 3, 0 +; PC64LE-NEXT: xscvsxddp 2, 1 +; PC64LE-NEXT: xscvsxddp 1, 4 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sitofp_v3f64_v3i64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mtfprd 0, 3 -; PC64LE9-NEXT: xscvsxddp 1, 0 -; PC64LE9-NEXT: mtfprd 0, 4 -; PC64LE9-NEXT: xscvsxddp 2, 0 ; PC64LE9-NEXT: mtfprd 0, 5 ; PC64LE9-NEXT: xscvsxddp 3, 0 +; PC64LE9-NEXT: mtfprd 0, 4 +; PC64LE9-NEXT: xscvsxddp 2, 0 +; PC64LE9-NEXT: mtfprd 0, 3 +; PC64LE9-NEXT: xscvsxddp 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x double> @@ -7940,30 +7940,30 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; PC64LE-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: xxswapd 0, 34 -; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: xxswapd 1, 34 ; PC64LE-NEXT: mfvsrwz 3, 34 -; PC64LE-NEXT: mtfprwz 3, 3 -; PC64LE-NEXT: mffprwz 3, 0 -; PC64LE-NEXT: mffprwz 4, 1 +; PC64LE-NEXT: mffprwz 4, 0 ; PC64LE-NEXT: mtfprwz 0, 3 +; PC64LE-NEXT: mffprwz 3, 1 ; PC64LE-NEXT: mtfprwz 2, 4 -; PC64LE-NEXT: xscvuxddp 1, 0 +; PC64LE-NEXT: xscvuxddp 3, 0 +; PC64LE-NEXT: mtfprwz 0, 3 ; PC64LE-NEXT: xscvuxddp 2, 2 -; PC64LE-NEXT: xscvuxddp 3, 3 +; PC64LE-NEXT: xscvuxddp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: li 3, 0 +; PC64LE9-NEXT: li 3, 4 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 ; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: li 3, 4 +; PC64LE9-NEXT: li 3, 0 ; PC64LE9-NEXT: vextuwrx 3, 3, 2 -; PC64LE9-NEXT: xscvuxddp 1, 0 +; PC64LE9-NEXT: xscvuxddp 2, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: xscvuxddp 2, 0 +; PC64LE9-NEXT: xscvuxddp 1, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: xscvuxddp 3, 0 ; PC64LE9-NEXT: blr @@ -8032,22 +8032,22 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; PC64LE-LABEL: constrained_vector_uitofp_v3f64_v3i64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mtfprd 0, 3 -; PC64LE-NEXT: mtfprd 2, 4 -; PC64LE-NEXT: mtfprd 3, 5 -; PC64LE-NEXT: xscvuxddp 1, 0 -; PC64LE-NEXT: xscvuxddp 2, 2 -; PC64LE-NEXT: xscvuxddp 3, 3 +; PC64LE-NEXT: mtfprd 0, 5 +; PC64LE-NEXT: mtfprd 1, 4 +; PC64LE-NEXT: mtfprd 4, 3 +; PC64LE-NEXT: xscvuxddp 3, 0 +; PC64LE-NEXT: xscvuxddp 2, 1 +; PC64LE-NEXT: xscvuxddp 1, 4 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_uitofp_v3f64_v3i64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mtfprd 0, 3 -; PC64LE9-NEXT: xscvuxddp 1, 0 -; PC64LE9-NEXT: mtfprd 0, 4 -; PC64LE9-NEXT: xscvuxddp 2, 0 ; PC64LE9-NEXT: mtfprd 0, 5 ; PC64LE9-NEXT: xscvuxddp 3, 0 +; PC64LE9-NEXT: mtfprd 0, 4 +; PC64LE9-NEXT: xscvuxddp 2, 0 +; PC64LE9-NEXT: mtfprd 0, 3 +; PC64LE9-NEXT: xscvuxddp 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x double> diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -4296,9 +4296,9 @@ ; SZ13-NEXT: larl %r1, .LCPI79_0 ; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: larl %r1, .LCPI79_1 -; SZ13-NEXT: vfidb %v24, %v0, 0, 0 -; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: vfidb %v26, %v0, 0, 0 +; SZ13-NEXT: vl %v0, 0(%r1), 3 +; SZ13-NEXT: vfidb %v24, %v0, 0, 0 ; SZ13-NEXT: br %r14 entry: %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( @@ -4530,9 +4530,9 @@ ; SZ13-NEXT: larl %r1, .LCPI84_0 ; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: larl %r1, .LCPI84_1 -; SZ13-NEXT: vfidb %v24, %v0, 4, 0 -; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: vfidb %v26, %v0, 4, 0 +; SZ13-NEXT: vl %v0, 0(%r1), 3 +; SZ13-NEXT: vfidb %v24, %v0, 4, 0 ; SZ13-NEXT: br %r14 entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(