diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4354,6 +4354,13 @@ bool expandROT(SDNode *N, bool AllowVectorOps, SDValue &Result, SelectionDAG &DAG) const; + /// Expand shift-by-parts. + /// \param N Node to expand + /// \param Lo lower-output-part after conversion + /// \param Hi upper-output-part after conversion + void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, + SelectionDAG &DAG) const; + /// Expand float(f32) to SINT(i64) conversion /// \param N Node to expand /// \param Result output after conversion diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6587,6 +6587,58 @@ return true; } +void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi, + SelectionDAG &DAG) const { + assert(Node->getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Node->getValueType(0); + unsigned VTBits = VT.getScalarSizeInBits(); + assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected"); + + bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS; + bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS; + SDValue ShOpLo = Node->getOperand(0); + SDValue ShOpHi = Node->getOperand(1); + SDValue ShAmt = Node->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT); + SDLoc dl(Node); + + // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and + // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized + // away during isel. + SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(VTBits - 1, dl, ShAmtVT)); + SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, ShAmtVT)) + : DAG.getConstant(0, dl, VT); + + SDValue Tmp2, Tmp3; + if (IsSHL) { + Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); + } else { + Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); + } + + // If the shift amount is larger or equal than the width of a part we don't + // use the result from the FSHL/FSHR. Insert a test and select the appropriate + // values for large shift amounts. + SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(VTBits, dl, ShAmtVT)); + SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode, + DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE); + + if (IsSHL) { + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); + } else { + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); + } +} + bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -946,8 +946,7 @@ SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4501,10 +4501,9 @@ case ISD::SHL: return LowerVectorSRA_SRL_SHL(Op, DAG); case ISD::SHL_PARTS: - return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: - return LowerShiftRightParts(Op, DAG); + return LowerShiftParts(Op, DAG); case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::FCOPYSIGN: @@ -7500,112 +7499,13 @@ return SDValue(St, 0); } -/// LowerShiftRightParts - Lower SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - - // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which - // is "undef". We wanted 0, so CSEL it directly. - SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETEQ, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); - HiBitsForLo = - DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), - HiBitsForLo, CCVal, Cmp); - - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i64)); - - SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue LoForNormalShift = - DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo); - - Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, - dl, DAG); - CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); - SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, - LoForNormalShift, CCVal, Cmp); - - // AArch64 shifts larger than the register width are wrapped rather than - // clamped, so we can't just emit "hi >> x". - SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue HiForBigShift = - Opc == ISD::SRA - ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, dl, MVT::i64)) - : DAG.getConstant(0, dl, VT); - SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, - HiForNormalShift, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - - // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which - // is "undef". We wanted 0, so CSEL it directly. - SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETEQ, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); - LoBitsForHi = - DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), - LoBitsForHi, CCVal, Cmp); - - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i64)); - SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue HiForNormalShift = - DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi); - - SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - - Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, - dl, DAG); - CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); - SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, - HiForNormalShift, CCVal, Cmp); - - // AArch64 shifts of larger than register sizes are wrapped rather than - // clamped, so we can't just emit "lo << a" if a is too big. - SDValue LoForBigShift = DAG.getConstant(0, dl, VT); - SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, - LoForNormalShift, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); +/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two +/// i32 values and take a 2 x i32 value to shift plus a shift amount. +SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op, + SelectionDAG &DAG) const { + SDValue Lo, Hi; + expandShiftParts(Op.getNode(), Lo, Hi, DAG); + return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); } bool AArch64TargetLowering::isOffsetFoldingLegal( diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -85,8 +85,7 @@ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, unsigned mainop, unsigned ovf) const; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -451,9 +451,9 @@ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); + case ISD::SHL_PARTS: case ISD::SRA_PARTS: - case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); + case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); case ISD::FCOS: @@ -765,78 +765,11 @@ DAG.getConstantFP(numbers::pif, DL, MVT::f32)); } -SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - - SDValue Lo = Op.getOperand(0); - SDValue Hi = Op.getOperand(1); - SDValue Shift = Op.getOperand(2); - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue One = DAG.getConstant(1, DL, VT); - - SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); - SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); - SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); - SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); - - // The dance around Width1 is necessary for 0 special case. - // Without it the CompShift might be 32, producing incorrect results in - // Overflow. So we do the shift in two steps, the alternative is to - // add a conditional to filter the special case. - - SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); - Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); - - SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); - HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); - SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); - - SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); - SDValue LoBig = Zero; - - Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); - Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); - - return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); -} - -SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - - SDValue Lo = Op.getOperand(0); - SDValue Hi = Op.getOperand(1); - SDValue Shift = Op.getOperand(2); - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue One = DAG.getConstant(1, DL, VT); - - const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; - - SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); - SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); - SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); - SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); - - // The dance around Width1 is necessary for 0 special case. - // Without it the CompShift might be 32, producing incorrect results in - // Overflow. So we do the shift in two steps, the alternative is to - // add a conditional to filter the special case. - - SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); - Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); - - SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); - SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); - LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); - - SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); - SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; - - Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); - Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); - - return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); +SDValue R600TargetLowering::LowerShiftParts(SDValue Op, + SelectionDAG &DAG) const { + SDValue Lo, Hi; + expandShiftParts(Op.getNode(), Lo, Hi, DAG); + return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); } SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19573,50 +19573,9 @@ /// and take a 2 x i32 value to shift plus a shift amount. /// TODO: Can this be moved to general expansion code? static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - MVT VT = Op.getSimpleValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and - // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away - // during isel. - SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits - 1, dl, MVT::i8)); - SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, dl, MVT::i8)) - : DAG.getConstant(0, dl, VT); - - SDValue Tmp2, Tmp3; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); - } else { - Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt); - Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); - } - - // If the shift amount is larger or equal than the width of a part we can't - // rely on the results of shld/shrd. Insert a test and select the appropriate - // values for large shift amounts. - SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i8)); - SDValue Cond = DAG.getSetCC(dl, MVT::i8, AndNode, - DAG.getConstant(0, dl, MVT::i8), ISD::SETNE); - - SDValue Hi, Lo; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); - Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); - } else { - Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); - Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); - } - - return DAG.getMergeValues({ Lo, Hi }, dl); + SDValue Lo, Hi; + DAG.getTargetLoweringInfo().expandShiftParts(Op.getNode(), Lo, Hi, DAG); + return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); } static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, diff --git a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll --- a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll @@ -4,17 +4,15 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsr x8, x0, x8 -; CHECK-NEXT: cmp x2, #0 // =0 -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: lsl x9, x1, x2 +; CHECK-NEXT: lsl x8, x1, x2 +; CHECK-NEXT: mvn w9, w2 +; CHECK-NEXT: lsr x10, x0, #1 +; CHECK-NEXT: lsr x9, x10, x9 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: lsl x9, x0, x2 -; CHECK-NEXT: sub x10, x2, #64 // =64 -; CHECK-NEXT: cmp x10, #0 // =0 -; CHECK-NEXT: csel x1, x9, x8, ge -; CHECK-NEXT: csel x0, xzr, x9, ge +; CHECK-NEXT: tst x2, #0x40 +; CHECK-NEXT: csel x1, x9, x8, ne +; CHECK-NEXT: csel x0, xzr, x9, ne ; CHECK-NEXT: ret %shl = shl i128 %r, %s ret i128 %shl @@ -39,18 +37,16 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: ashr: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsl x8, x1, x8 -; CHECK-NEXT: cmp x2, #0 // =0 -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: lsr x9, x0, x2 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: mvn w9, w2 +; CHECK-NEXT: lsl x10, x1, #1 +; CHECK-NEXT: lsl x9, x10, x9 ; CHECK-NEXT: orr x8, x9, x8 ; CHECK-NEXT: asr x9, x1, x2 -; CHECK-NEXT: sub x10, x2, #64 // =64 -; CHECK-NEXT: cmp x10, #0 // =0 -; CHECK-NEXT: csel x0, x9, x8, ge +; CHECK-NEXT: tst x2, #0x40 +; CHECK-NEXT: csel x0, x9, x8, ne ; CHECK-NEXT: asr x8, x1, #63 -; CHECK-NEXT: csel x1, x8, x9, ge +; CHECK-NEXT: csel x1, x8, x9, ne ; CHECK-NEXT: ret %shr = ashr i128 %r, %s ret i128 %shr @@ -75,17 +71,15 @@ define i128 @lshr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsl x8, x1, x8 -; CHECK-NEXT: cmp x2, #0 // =0 -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: lsr x9, x0, x2 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: mvn w9, w2 +; CHECK-NEXT: lsl x10, x1, #1 +; CHECK-NEXT: lsl x9, x10, x9 ; CHECK-NEXT: orr x8, x9, x8 ; CHECK-NEXT: lsr x9, x1, x2 -; CHECK-NEXT: sub x10, x2, #64 // =64 -; CHECK-NEXT: cmp x10, #0 // =0 -; CHECK-NEXT: csel x0, x9, x8, ge -; CHECK-NEXT: csel x1, xzr, x9, ge +; CHECK-NEXT: tst x2, #0x40 +; CHECK-NEXT: csel x0, x9, x8, ne +; CHECK-NEXT: csel x1, xzr, x9, ne ; CHECK-NEXT: ret %shr = lshr i128 %r, %s ret i128 %shr diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -263,7 +263,7 @@ ; ; EG-LABEL: fp_to_sint_i64: ; EG: ; %bb.0: ; %entry -; EG-NEXT: ALU 42, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -273,30 +273,29 @@ ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: SUB_INT T2.W, literal.x, PV.W, -; EG-NEXT: OR_INT * T1.W, PS, literal.y, -; EG-NEXT: 181(2.536350e-43), 8388608(1.175494e-38) -; EG-NEXT: LSHR * T2.W, PS, PV.W, +; EG-NEXT: OR_INT T1.W, PS, literal.x, +; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, +; EG-NEXT: 8388608(1.175494e-38), -150(nan) ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, -; EG-NEXT: LSHR T0.Y, PV.W, 1, -; EG-NEXT: ADD_INT T0.Z, T0.W, literal.y, -; EG-NEXT: SUB_INT T2.W, literal.z, T0.W, -; EG-NEXT: ADD_INT * T0.W, T0.W, literal.w, -; EG-NEXT: -127(nan), -150(nan) -; EG-NEXT: 150(2.101948e-43), -182(nan) -; EG-NEXT: LSHL T1.X, T1.W, PS, -; EG-NEXT: SETGT_UINT T1.Y, PV.W, literal.x, -; EG-NEXT: LSHR T1.Z, T1.W, PV.W, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.x, -; EG-NEXT: LSHL * T1.W, T1.W, PV.Z, +; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W, +; EG-NEXT: AND_INT T0.Z, PS, literal.z, +; EG-NEXT: NOT_INT T0.W, PS, +; EG-NEXT: LSHR * T3.W, PV.W, 1, +; EG-NEXT: -127(nan), 150(2.101948e-43) ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.Y, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, -; EG-NEXT: CNDE_INT T0.W, PV.W, T0.Y, PV.X, +; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T1.Y, T1.W, PV.Z, +; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0, +; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y, ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W, -; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y, +; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z, ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) ; EG-NEXT: XOR_INT T0.W, PV.W, PS, @@ -452,89 +451,87 @@ ; ; EG-LABEL: fp_to_sint_v2i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 79, @4, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; EG-NEXT: ALU 77, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: ; EG-NEXT: MOV * T0.W, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T1.W, KC0[2].W, literal.x, PV.W, -; EG-NEXT: AND_INT * T2.W, KC0[2].W, literal.y, -; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: AND_INT T0.Y, KC0[3].X, literal.x, -; EG-NEXT: ADD_INT T0.Z, PV.W, literal.y, -; EG-NEXT: OR_INT T2.W, PS, literal.z, -; EG-NEXT: SUB_INT * T3.W, literal.w, PV.W, -; EG-NEXT: 8388607(1.175494e-38), -150(nan) -; EG-NEXT: 8388608(1.175494e-38), 150(2.101948e-43) -; EG-NEXT: BFE_UINT T0.X, KC0[3].X, literal.x, T0.W, -; EG-NEXT: SETGT_UINT T1.Y, PS, literal.y, -; EG-NEXT: LSHR T1.Z, PV.W, PS, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.y, -; EG-NEXT: LSHL * T3.W, PV.W, PV.Z, -; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T1.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, -; EG-NEXT: ADD_INT T0.Z, PV.X, literal.x, -; EG-NEXT: OR_INT T3.W, T0.Y, literal.y, -; EG-NEXT: SUB_INT * T4.W, literal.z, PV.X, -; EG-NEXT: -150(nan), 8388608(1.175494e-38) -; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, literal.x, T0.X, -; EG-NEXT: SETGT_UINT T0.Y, PS, literal.y, -; EG-NEXT: LSHR T1.Z, PV.W, PS, -; EG-NEXT: SETGT_UINT T4.W, PV.Z, literal.y, -; EG-NEXT: LSHL * T5.W, PV.W, PV.Z, -; EG-NEXT: 181(2.536350e-43), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T3.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Y, PV.Y, PV.Z, 0.0, -; EG-NEXT: ADD_INT T0.Z, T0.X, literal.x, -; EG-NEXT: LSHR T5.W, T3.W, PV.X, -; EG-NEXT: SUB_INT * T6.W, literal.y, T1.W, -; EG-NEXT: -182(nan), 181(2.536350e-43) -; EG-NEXT: ADD_INT T2.X, T1.W, literal.x, -; EG-NEXT: LSHR T2.Y, T2.W, PS, BS:VEC_120/SCL_212 -; EG-NEXT: ADD_INT T1.Z, T0.X, literal.y, -; EG-NEXT: LSHR T5.W, PV.W, 1, -; EG-NEXT: LSHL * T3.W, T3.W, PV.Z, -; EG-NEXT: -182(nan), -127(nan) -; EG-NEXT: CNDE_INT T0.X, T4.W, PV.W, PS, -; EG-NEXT: SETGT_INT T3.Y, PV.Z, literal.x, -; EG-NEXT: ADD_INT T0.Z, T1.W, literal.y, BS:VEC_120/SCL_212 -; EG-NEXT: LSHR T1.W, PV.Y, 1, -; EG-NEXT: LSHL * T2.W, T2.W, PV.X, +; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W, +; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W, +; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z, +; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44) +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: SUB_INT T0.X, literal.x, PV.W, +; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W, +; EG-NEXT: AND_INT T1.Z, PS, literal.y, +; EG-NEXT: OR_INT T3.W, PV.Z, literal.z, +; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, +; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) +; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38) +; EG-NEXT: OR_INT T1.X, PS, literal.x, +; EG-NEXT: LSHL T1.Y, PV.W, PV.Z, +; EG-NEXT: AND_INT T0.Z, T2.W, literal.y, +; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y, +; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y, +; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0, +; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x, +; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X, +; EG-NEXT: AND_INT * T5.W, T0.X, literal.y, +; EG-NEXT: -150(nan), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, +; EG-NEXT: NOT_INT T2.Y, T2.W, +; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x, +; EG-NEXT: NOT_INT T2.W, PV.Z, +; EG-NEXT: LSHR * T4.W, T1.X, 1, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T3.X, T3.W, 1, +; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, +; EG-NEXT: LSHL T0.W, T1.X, PV.Z, +; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y, +; EG-NEXT: -127(nan), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, +; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y, +; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y, ; EG-NEXT: 23(3.222986e-44), -127(nan) -; EG-NEXT: CNDE_INT T2.X, T0.W, PV.W, PS, -; EG-NEXT: SETGT_INT T2.Y, PV.Z, literal.x, -; EG-NEXT: CNDE_INT T2.Z, PV.Y, 0.0, PV.X, -; EG-NEXT: CNDE_INT T0.W, PV.Y, T0.Y, T3.X, -; EG-NEXT: ASHR * T1.W, KC0[3].X, literal.y, +; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y, +; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, +; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, +; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X, +; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) ; EG-NEXT: XOR_INT T0.X, PV.W, PS, -; EG-NEXT: XOR_INT T0.Y, PV.Z, PS, -; EG-NEXT: CNDE_INT T2.Z, PV.Y, 0.0, PV.X, -; EG-NEXT: CNDE_INT T0.W, PV.Y, T1.Y, T1.X, -; EG-NEXT: ASHR * T2.W, KC0[2].W, literal.x, +; EG-NEXT: XOR_INT T2.Y, PV.Z, PS, +; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, +; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y, +; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: XOR_INT T1.Y, PV.W, PS, -; EG-NEXT: XOR_INT T2.Z, PV.Z, PS, -; EG-NEXT: SUB_INT T0.W, PV.Y, T1.W, -; EG-NEXT: SUBB_UINT * T3.W, PV.X, T1.W, -; EG-NEXT: SUB_INT T0.Y, PV.W, PS, -; EG-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, -; EG-NEXT: SUB_INT T0.W, PV.Z, T2.W, -; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, +; EG-NEXT: XOR_INT T0.Y, PV.W, PS, +; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, +; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, +; EG-NEXT: SUB_INT T1.Y, PV.W, PS, +; EG-NEXT: SETGT_INT T1.Z, T3.Y, literal.x, +; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.Z, PV.W, PS, -; EG-NEXT: SETGT_INT T0.W, T0.Z, literal.x, -; EG-NEXT: CNDE_INT * T3.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T0.Z, PV.W, PS, +; EG-NEXT: SETGT_INT T0.W, T1.W, literal.x, +; EG-NEXT: CNDE_INT * T1.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T3.Y, PV.W, 0.0, PV.Z, -; EG-NEXT: SUB_INT * T1.W, T0.X, T1.W, -; EG-NEXT: CNDE_INT T3.Z, T1.Z, 0.0, PV.W, -; EG-NEXT: SUB_INT * T1.W, T1.Y, T2.W, -; EG-NEXT: CNDE_INT T3.X, T0.W, 0.0, PV.W, +; EG-NEXT: CNDE_INT T1.Y, PV.W, 0.0, PV.Z, +; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, +; EG-NEXT: CNDE_INT T1.Z, T1.Z, 0.0, PV.W, +; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, +; EG-NEXT: CNDE_INT T1.X, T0.W, 0.0, PV.W, ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptosi <2 x float> %x to <2 x i64> @@ -779,175 +776,171 @@ ; ; EG-LABEL: fp_to_sint_v4i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] -; EG-NEXT: ALU 64, @106, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 0 -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T2.X, 1 +; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 58, @108, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 6: ; EG-NEXT: MOV * T0.W, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W, -; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y, -; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: ADD_INT T0.Z, PV.W, literal.x, -; EG-NEXT: SUB_INT T3.W, literal.y, PV.W, -; EG-NEXT: OR_INT * T2.W, PS, literal.z, -; EG-NEXT: -127(nan), 181(2.536350e-43) -; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T0.X, KC0[4].X, literal.x, T0.W, -; EG-NEXT: AND_INT T0.Y, KC0[4].X, literal.y, -; EG-NEXT: ADD_INT T1.Z, T1.W, literal.z, -; EG-NEXT: ADD_INT T4.W, T1.W, literal.w, -; EG-NEXT: LSHR * T3.W, PS, PV.W, +; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W, +; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y, ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: -150(nan), -182(nan) -; EG-NEXT: LSHR T1.X, PS, 1, -; EG-NEXT: LSHL T1.Y, T2.W, PV.W, -; EG-NEXT: SETGT_UINT T2.Z, PV.Z, literal.x, -; EG-NEXT: OR_INT T3.W, PV.Y, literal.y, -; EG-NEXT: ADD_INT * T4.W, PV.X, literal.z, -; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) +; EG-NEXT: OR_INT T0.Z, PS, literal.x, +; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W, +; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z, +; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44) ; EG-NEXT: -150(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, literal.x, T1.W, -; EG-NEXT: SETGT_UINT T0.Y, PS, literal.y, -; EG-NEXT: LSHL T3.Z, PV.W, PS, -; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, PV.Y, -; EG-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, -; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) -; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Y, PS, 0.0, PV.W, -; EG-NEXT: CNDE_INT T3.Z, PV.Y, PV.Z, 0.0, -; EG-NEXT: SETGT_UINT T1.W, PV.X, literal.x, -; EG-NEXT: SUB_INT * T5.W, literal.y, T0.X, -; EG-NEXT: 31(4.344025e-44), 181(2.536350e-43) -; EG-NEXT: LSHR T1.X, T2.W, T2.X, -; EG-NEXT: LSHL T2.Y, T2.W, T1.Z, -; EG-NEXT: SUB_INT T1.Z, literal.x, T0.X, BS:VEC_021/SCL_122 -; EG-NEXT: ADD_INT T2.W, T0.X, literal.y, -; EG-NEXT: LSHR * T5.W, T3.W, PS, -; EG-NEXT: 150(2.101948e-43), -182(nan) -; EG-NEXT: ADD_INT T0.X, T0.X, literal.x, -; EG-NEXT: LSHR T3.Y, PS, 1, -; EG-NEXT: LSHL T4.Z, T3.W, PV.W, -; EG-NEXT: SETGT_UINT T2.W, PV.Z, literal.y, -; EG-NEXT: LSHR * T3.W, T3.W, PV.Z, +; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x, +; EG-NEXT: AND_INT T1.Z, PS, literal.y, +; EG-NEXT: NOT_INT T4.W, PS, +; EG-NEXT: LSHR * T5.W, PV.Z, 1, ; EG-NEXT: -127(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T2.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Y, T0.Y, PV.Y, PV.Z, -; EG-NEXT: SETGT_INT T1.Z, PV.X, literal.x, -; EG-NEXT: CNDE_INT T2.W, T2.Z, T2.Y, 0.0, -; EG-NEXT: CNDE_INT * T1.W, T1.W, T1.X, 0.0, +; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, +; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W, +; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201 +; EG-NEXT: LSHL T3.W, T0.Z, PV.Z, +; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W, +; EG-NEXT: -127(nan), 32(4.484155e-44) +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.X, PS, literal.x, +; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS, +; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y, +; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W, +; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z, +; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) +; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W, +; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x, +; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y, +; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0, +; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0, +; EG-NEXT: 8388608(1.175494e-38), -150(nan) +; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS, +; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x, +; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x, +; EG-NEXT: NOT_INT T1.W, PV.Z, +; EG-NEXT: LSHR * T3.W, PV.Y, 1, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z, +; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y, +; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y, +; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W, +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.X, T0.Z, literal.x, +; EG-NEXT: AND_INT T4.Y, PS, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y, +; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: SUB_INT T2.X, PV.W, PS, +; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, +; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0, +; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.X, T4.W, PS, PV.W, -; EG-NEXT: ASHR T2.Y, KC0[3].Z, literal.x, -; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, -; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T3.Z, -; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x, +; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W, +; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y, +; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z, +; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z, +; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T2.X, KC0[3].Y, literal.x, T0.W, -; EG-NEXT: XOR_INT T0.Y, PV.W, PS, -; EG-NEXT: XOR_INT T1.Z, PV.Z, PS, -; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y, -; EG-NEXT: XOR_INT * T3.W, T1.Y, PV.Y, +; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W, +; EG-NEXT: XOR_INT T1.Y, PV.W, PS, +; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, +; EG-NEXT: OR_INT T0.W, PV.Y, literal.y, +; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X, +; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38) +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x, +; EG-NEXT: AND_INT T3.Y, PS, literal.y, +; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS, +; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W, +; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, +; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44) +; EG-NEXT: SUB_INT T5.X, PV.W, PS, +; EG-NEXT: SETGT_INT T0.Y, T0.Y, literal.x, +; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, +; EG-NEXT: OR_INT T1.W, PV.X, literal.y, +; EG-NEXT: ADD_INT * T3.W, T3.X, literal.z, +; EG-NEXT: -1(nan), 8388608(1.175494e-38) +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: ADD_INT T4.X, T3.X, literal.x, +; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X, +; EG-NEXT: AND_INT T2.Z, PS, literal.z, +; EG-NEXT: NOT_INT T4.W, PS, +; EG-NEXT: LSHR * T5.W, PV.W, 1, +; EG-NEXT: -127(nan), 150(2.101948e-43) +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T4.Y, T1.W, PV.Z, +; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT T6.X, T1.X, literal.x, +; EG-NEXT: CNDE_INT * T3.Y, PS, PV.W, 0.0, +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 108: +; EG-NEXT: CNDE_INT T3.Z, T2.Z, T4.Y, 0.0, +; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y, +; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: AND_INT T1.X, KC0[3].Y, literal.x, -; EG-NEXT: SUB_INT T1.Y, PS, T2.Y, -; EG-NEXT: SUBB_UINT T2.Z, PV.W, T2.Y, -; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W, -; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W, -; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) -; EG-NEXT: SUB_INT T3.Y, PV.W, PS, -; EG-NEXT: SUB_INT T1.Z, PV.Y, PV.Z, -; EG-NEXT: OR_INT T3.W, PV.X, literal.x, -; EG-NEXT: SUB_INT * T4.W, literal.y, T2.X, -; EG-NEXT: 8388608(1.175494e-38), 150(2.101948e-43) -; EG-NEXT: SETGT_INT T1.X, T0.Z, literal.x, -; EG-NEXT: SETGT_UINT T1.Y, PS, literal.y, -; EG-NEXT: LSHR T0.Z, PV.W, PS, -; EG-NEXT: SUB_INT T4.W, literal.z, T2.X, -; EG-NEXT: AND_INT * T5.W, KC0[3].W, literal.w, -; EG-NEXT: -1(nan), 31(4.344025e-44) -; EG-NEXT: 181(2.536350e-43), 8388607(1.175494e-38) -; EG-NEXT: OR_INT T3.X, PS, literal.x, -; EG-NEXT: ADD_INT T4.Y, T2.X, literal.y, -; EG-NEXT: ADD_INT T2.Z, T2.X, literal.z, -; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.w, T0.W, BS:VEC_021/SCL_122 -; EG-NEXT: LSHR * T4.W, T3.W, PV.W, -; EG-NEXT: 8388608(1.175494e-38), -150(nan) -; EG-NEXT: -182(nan), 23(3.222986e-44) -; EG-NEXT: ADD_INT T4.X, PV.W, literal.x, -; EG-NEXT: ADD_INT T5.Y, T2.X, literal.y, -; EG-NEXT: LSHR T3.Z, PS, 1, -; EG-NEXT: LSHL T4.W, T3.W, PV.Z, -; EG-NEXT: SETGT_UINT * T5.W, PV.Y, literal.z, -; EG-NEXT: -150(nan), -127(nan) +; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, PV.Z, +; EG-NEXT: AND_INT T2.Z, T6.X, literal.x, +; EG-NEXT: NOT_INT T1.W, T6.X, +; EG-NEXT: LSHR * T3.W, T0.W, 1, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T2.X, T3.W, T4.Y, -; EG-NEXT: CNDE_INT * T4.Y, PS, PV.Z, PV.W, -; EG-NEXT: ALU clause starting at 106: -; EG-NEXT: SETGT_INT T2.Z, T5.Y, literal.x, -; EG-NEXT: SETGT_UINT T3.W, T4.X, literal.y, -; EG-NEXT: LSHL * T4.W, T3.X, T4.X, -; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T4.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T4.Y, PV.Z, 0.0, T4.Y, BS:VEC_021/SCL_122 -; EG-NEXT: SUB_INT T3.Z, literal.x, T0.W, -; EG-NEXT: CNDE_INT T4.W, T5.W, T2.X, 0.0, -; EG-NEXT: CNDE_INT * T5.W, T1.Y, T0.Z, 0.0, -; EG-NEXT: 181(2.536350e-43), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.X, T2.Z, PS, PV.W, -; EG-NEXT: ASHR T1.Y, KC0[3].Y, literal.x, -; EG-NEXT: SUB_INT T0.Z, literal.y, T0.W, -; EG-NEXT: ADD_INT T4.W, T0.W, literal.z, -; EG-NEXT: LSHR * T5.W, T3.X, PV.Z, -; EG-NEXT: 31(4.344025e-44), 150(2.101948e-43) -; EG-NEXT: -182(nan), 0(0.000000e+00) -; EG-NEXT: ADD_INT T5.X, T0.W, literal.x, -; EG-NEXT: LSHR T6.Y, PS, 1, -; EG-NEXT: LSHL T2.Z, T3.X, PV.W, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.y, -; EG-NEXT: LSHR * T4.W, T3.X, PV.Z, -; EG-NEXT: -127(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T3.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T6.Y, T3.W, PV.Y, PV.Z, -; EG-NEXT: SETGT_INT T0.Z, PV.X, literal.x, -; EG-NEXT: XOR_INT T0.W, T2.X, T1.Y, -; EG-NEXT: XOR_INT * T3.W, T4.Y, T1.Y, +; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x, +; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y, +; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, +; EG-NEXT: LSHL T0.W, T0.W, PV.Z, +; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, +; EG-NEXT: 31(4.344025e-44), -127(nan) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W, +; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x, +; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X, +; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, PS, T1.Y, -; EG-NEXT: SUBB_UINT T4.Y, PV.W, T1.Y, -; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, -; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T4.X, -; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x, +; EG-NEXT: SUB_INT T3.X, PS, T7.X, +; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X, +; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y, +; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X, +; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: XOR_INT T3.X, PV.W, PS, -; EG-NEXT: XOR_INT T6.Y, PV.Z, PS, +; EG-NEXT: XOR_INT T1.X, PV.W, PS, +; EG-NEXT: XOR_INT T5.Y, PV.Z, PS, ; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y, -; EG-NEXT: SETGT_INT T3.W, T5.Y, literal.x, -; EG-NEXT: CNDE_INT * T5.W, T1.X, 0.0, T1.Z, BS:VEC_021/SCL_122 +; EG-NEXT: SETGT_INT T1.W, T4.X, literal.x, +; EG-NEXT: CNDE_INT * T6.W, T0.Y, 0.0, T5.X, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) ; EG-NEXT: SETGT_INT T0.X, T0.X, literal.x, -; EG-NEXT: CNDE_INT T5.Y, PV.W, 0.0, PV.Z, -; EG-NEXT: SUB_INT T0.Z, T1.W, T2.Y, -; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W, -; EG-NEXT: SUBB_UINT * T6.W, PV.X, T4.W, +; EG-NEXT: CNDE_INT T6.Y, PV.W, 0.0, PV.Z, +; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W, ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, PV.W, PS, -; EG-NEXT: SETGT_INT T2.Y, T5.X, literal.x, -; EG-NEXT: CNDE_INT T5.Z, T1.X, 0.0, PV.Z, BS:VEC_120/SCL_212 -; EG-NEXT: SUB_INT T0.W, T0.W, T1.Y, -; EG-NEXT: CNDE_INT * T1.W, PV.X, 0.0, T3.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T3.X, PV.W, PS, +; EG-NEXT: SETGT_INT T1.Y, T4.Y, literal.x, +; EG-NEXT: CNDE_INT T6.Z, T0.Y, 0.0, PV.Z, BS:VEC_120/SCL_212 +; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, +; EG-NEXT: CNDE_INT * T4.W, PV.X, 0.0, T2.X, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T5.X, T3.W, 0.0, PV.W, -; EG-NEXT: CNDE_INT T1.Y, PV.Y, 0.0, PV.X, -; EG-NEXT: SUB_INT T0.W, T0.Y, T2.W, +; EG-NEXT: CNDE_INT T6.X, T1.W, 0.0, PV.W, +; EG-NEXT: CNDE_INT T4.Y, PV.Y, 0.0, PV.X, +; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y, ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Z, T0.X, 0.0, PV.W, -; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212 -; EG-NEXT: CNDE_INT T1.X, T2.Y, 0.0, PV.W, +; EG-NEXT: CNDE_INT T4.Z, T0.X, 0.0, PV.W, +; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212 +; EG-NEXT: CNDE_INT T4.X, T1.Y, 0.0, PV.W, ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: LSHR * T0.X, PV.W, literal.x, diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -286,7 +286,7 @@ ; ; EG-LABEL: fp_to_uint_f32_to_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 42, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -296,30 +296,29 @@ ; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, ; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: SUB_INT T2.W, literal.x, PV.W, -; EG-NEXT: OR_INT * T1.W, PS, literal.y, -; EG-NEXT: 181(2.536350e-43), 8388608(1.175494e-38) -; EG-NEXT: LSHR * T2.W, PS, PV.W, +; EG-NEXT: OR_INT T1.W, PS, literal.x, +; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, +; EG-NEXT: 8388608(1.175494e-38), -150(nan) ; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, -; EG-NEXT: LSHR T0.Y, PV.W, 1, -; EG-NEXT: ADD_INT T0.Z, T0.W, literal.y, -; EG-NEXT: SUB_INT T2.W, literal.z, T0.W, -; EG-NEXT: ADD_INT * T0.W, T0.W, literal.w, -; EG-NEXT: -127(nan), -150(nan) -; EG-NEXT: 150(2.101948e-43), -182(nan) -; EG-NEXT: LSHL T1.X, T1.W, PS, -; EG-NEXT: SETGT_UINT T1.Y, PV.W, literal.x, -; EG-NEXT: LSHR T1.Z, T1.W, PV.W, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.x, -; EG-NEXT: LSHL * T1.W, T1.W, PV.Z, +; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W, +; EG-NEXT: AND_INT T0.Z, PS, literal.z, +; EG-NEXT: NOT_INT T0.W, PS, +; EG-NEXT: LSHR * T3.W, PV.W, 1, +; EG-NEXT: -127(nan), 150(2.101948e-43) ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.Y, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, -; EG-NEXT: CNDE_INT T0.W, PV.W, T0.Y, PV.X, +; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T1.Y, T1.W, PV.Z, +; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0, +; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y, ; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W, -; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y, +; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z, ; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) ; EG-NEXT: XOR_INT T0.W, PV.W, PS, @@ -580,89 +579,87 @@ ; ; EG-LABEL: fp_to_uint_v2f32_to_v2i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 79, @4, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; EG-NEXT: ALU 77, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: ; EG-NEXT: MOV * T0.W, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T1.W, KC0[2].W, literal.x, PV.W, -; EG-NEXT: AND_INT * T2.W, KC0[2].W, literal.y, -; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: AND_INT T0.Y, KC0[3].X, literal.x, -; EG-NEXT: ADD_INT T0.Z, PV.W, literal.y, -; EG-NEXT: OR_INT T2.W, PS, literal.z, -; EG-NEXT: SUB_INT * T3.W, literal.w, PV.W, -; EG-NEXT: 8388607(1.175494e-38), -150(nan) -; EG-NEXT: 8388608(1.175494e-38), 150(2.101948e-43) -; EG-NEXT: BFE_UINT T0.X, KC0[3].X, literal.x, T0.W, -; EG-NEXT: SETGT_UINT T1.Y, PS, literal.y, -; EG-NEXT: LSHR T1.Z, PV.W, PS, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.y, -; EG-NEXT: LSHL * T3.W, PV.W, PV.Z, -; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T1.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, -; EG-NEXT: ADD_INT T0.Z, PV.X, literal.x, -; EG-NEXT: OR_INT T3.W, T0.Y, literal.y, -; EG-NEXT: SUB_INT * T4.W, literal.z, PV.X, -; EG-NEXT: -150(nan), 8388608(1.175494e-38) -; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, literal.x, T0.X, -; EG-NEXT: SETGT_UINT T0.Y, PS, literal.y, -; EG-NEXT: LSHR T1.Z, PV.W, PS, -; EG-NEXT: SETGT_UINT T4.W, PV.Z, literal.y, -; EG-NEXT: LSHL * T5.W, PV.W, PV.Z, -; EG-NEXT: 181(2.536350e-43), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T3.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Y, PV.Y, PV.Z, 0.0, -; EG-NEXT: ADD_INT T0.Z, T0.X, literal.x, -; EG-NEXT: LSHR T5.W, T3.W, PV.X, -; EG-NEXT: SUB_INT * T6.W, literal.y, T1.W, -; EG-NEXT: -182(nan), 181(2.536350e-43) -; EG-NEXT: ADD_INT T2.X, T1.W, literal.x, -; EG-NEXT: LSHR T2.Y, T2.W, PS, BS:VEC_120/SCL_212 -; EG-NEXT: ADD_INT T1.Z, T0.X, literal.y, -; EG-NEXT: LSHR T5.W, PV.W, 1, -; EG-NEXT: LSHL * T3.W, T3.W, PV.Z, -; EG-NEXT: -182(nan), -127(nan) -; EG-NEXT: CNDE_INT T0.X, T4.W, PV.W, PS, -; EG-NEXT: SETGT_INT T3.Y, PV.Z, literal.x, -; EG-NEXT: ADD_INT T0.Z, T1.W, literal.y, BS:VEC_120/SCL_212 -; EG-NEXT: LSHR T1.W, PV.Y, 1, -; EG-NEXT: LSHL * T2.W, T2.W, PV.X, +; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W, +; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W, +; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z, +; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44) +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: SUB_INT T0.X, literal.x, PV.W, +; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W, +; EG-NEXT: AND_INT T1.Z, PS, literal.y, +; EG-NEXT: OR_INT T3.W, PV.Z, literal.z, +; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, +; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) +; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38) +; EG-NEXT: OR_INT T1.X, PS, literal.x, +; EG-NEXT: LSHL T1.Y, PV.W, PV.Z, +; EG-NEXT: AND_INT T0.Z, T2.W, literal.y, +; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y, +; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y, +; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0, +; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x, +; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X, +; EG-NEXT: AND_INT * T5.W, T0.X, literal.y, +; EG-NEXT: -150(nan), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, +; EG-NEXT: NOT_INT T2.Y, T2.W, +; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x, +; EG-NEXT: NOT_INT T2.W, PV.Z, +; EG-NEXT: LSHR * T4.W, T1.X, 1, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T3.X, T3.W, 1, +; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, +; EG-NEXT: LSHL T0.W, T1.X, PV.Z, +; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y, +; EG-NEXT: -127(nan), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, +; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y, +; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y, ; EG-NEXT: 23(3.222986e-44), -127(nan) -; EG-NEXT: CNDE_INT T2.X, T0.W, PV.W, PS, -; EG-NEXT: SETGT_INT T2.Y, PV.Z, literal.x, -; EG-NEXT: CNDE_INT T2.Z, PV.Y, 0.0, PV.X, -; EG-NEXT: CNDE_INT T0.W, PV.Y, T0.Y, T3.X, -; EG-NEXT: ASHR * T1.W, KC0[3].X, literal.y, +; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y, +; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, +; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, +; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X, +; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, ; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) ; EG-NEXT: XOR_INT T0.X, PV.W, PS, -; EG-NEXT: XOR_INT T0.Y, PV.Z, PS, -; EG-NEXT: CNDE_INT T2.Z, PV.Y, 0.0, PV.X, -; EG-NEXT: CNDE_INT T0.W, PV.Y, T1.Y, T1.X, -; EG-NEXT: ASHR * T2.W, KC0[2].W, literal.x, +; EG-NEXT: XOR_INT T2.Y, PV.Z, PS, +; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, +; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y, +; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: XOR_INT T1.Y, PV.W, PS, -; EG-NEXT: XOR_INT T2.Z, PV.Z, PS, -; EG-NEXT: SUB_INT T0.W, PV.Y, T1.W, -; EG-NEXT: SUBB_UINT * T3.W, PV.X, T1.W, -; EG-NEXT: SUB_INT T0.Y, PV.W, PS, -; EG-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, -; EG-NEXT: SUB_INT T0.W, PV.Z, T2.W, -; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, +; EG-NEXT: XOR_INT T0.Y, PV.W, PS, +; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, +; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, +; EG-NEXT: SUB_INT T1.Y, PV.W, PS, +; EG-NEXT: SETGT_INT T1.Z, T3.Y, literal.x, +; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.Z, PV.W, PS, -; EG-NEXT: SETGT_INT T0.W, T0.Z, literal.x, -; EG-NEXT: CNDE_INT * T3.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T0.Z, PV.W, PS, +; EG-NEXT: SETGT_INT T0.W, T1.W, literal.x, +; EG-NEXT: CNDE_INT * T1.W, PV.Z, 0.0, PV.Y, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T3.Y, PV.W, 0.0, PV.Z, -; EG-NEXT: SUB_INT * T1.W, T0.X, T1.W, -; EG-NEXT: CNDE_INT T3.Z, T1.Z, 0.0, PV.W, -; EG-NEXT: SUB_INT * T1.W, T1.Y, T2.W, -; EG-NEXT: CNDE_INT T3.X, T0.W, 0.0, PV.W, +; EG-NEXT: CNDE_INT T1.Y, PV.W, 0.0, PV.Z, +; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, +; EG-NEXT: CNDE_INT T1.Z, T1.Z, 0.0, PV.W, +; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, +; EG-NEXT: CNDE_INT T1.X, T0.W, 0.0, PV.W, ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %conv = fptoui <2 x float> %x to <2 x i64> @@ -1114,175 +1111,171 @@ ; ; EG-LABEL: fp_to_uint_v4f32_to_v4i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] -; EG-NEXT: ALU 64, @106, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 0 -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T2.X, 1 +; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 58, @108, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 6: ; EG-NEXT: MOV * T0.W, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W, -; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y, -; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: ADD_INT T0.Z, PV.W, literal.x, -; EG-NEXT: SUB_INT T3.W, literal.y, PV.W, -; EG-NEXT: OR_INT * T2.W, PS, literal.z, -; EG-NEXT: -127(nan), 181(2.536350e-43) -; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T0.X, KC0[4].X, literal.x, T0.W, -; EG-NEXT: AND_INT T0.Y, KC0[4].X, literal.y, -; EG-NEXT: ADD_INT T1.Z, T1.W, literal.z, -; EG-NEXT: ADD_INT T4.W, T1.W, literal.w, -; EG-NEXT: LSHR * T3.W, PS, PV.W, +; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W, +; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y, ; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) -; EG-NEXT: -150(nan), -182(nan) -; EG-NEXT: LSHR T1.X, PS, 1, -; EG-NEXT: LSHL T1.Y, T2.W, PV.W, -; EG-NEXT: SETGT_UINT T2.Z, PV.Z, literal.x, -; EG-NEXT: OR_INT T3.W, PV.Y, literal.y, -; EG-NEXT: ADD_INT * T4.W, PV.X, literal.z, -; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) +; EG-NEXT: OR_INT T0.Z, PS, literal.x, +; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W, +; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z, +; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44) ; EG-NEXT: -150(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, literal.x, T1.W, -; EG-NEXT: SETGT_UINT T0.Y, PS, literal.y, -; EG-NEXT: LSHL T3.Z, PV.W, PS, -; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, PV.Y, -; EG-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, -; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) -; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Y, PS, 0.0, PV.W, -; EG-NEXT: CNDE_INT T3.Z, PV.Y, PV.Z, 0.0, -; EG-NEXT: SETGT_UINT T1.W, PV.X, literal.x, -; EG-NEXT: SUB_INT * T5.W, literal.y, T0.X, -; EG-NEXT: 31(4.344025e-44), 181(2.536350e-43) -; EG-NEXT: LSHR T1.X, T2.W, T2.X, -; EG-NEXT: LSHL T2.Y, T2.W, T1.Z, -; EG-NEXT: SUB_INT T1.Z, literal.x, T0.X, BS:VEC_021/SCL_122 -; EG-NEXT: ADD_INT T2.W, T0.X, literal.y, -; EG-NEXT: LSHR * T5.W, T3.W, PS, -; EG-NEXT: 150(2.101948e-43), -182(nan) -; EG-NEXT: ADD_INT T0.X, T0.X, literal.x, -; EG-NEXT: LSHR T3.Y, PS, 1, -; EG-NEXT: LSHL T4.Z, T3.W, PV.W, -; EG-NEXT: SETGT_UINT T2.W, PV.Z, literal.y, -; EG-NEXT: LSHR * T3.W, T3.W, PV.Z, +; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x, +; EG-NEXT: AND_INT T1.Z, PS, literal.y, +; EG-NEXT: NOT_INT T4.W, PS, +; EG-NEXT: LSHR * T5.W, PV.Z, 1, ; EG-NEXT: -127(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T2.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T0.Y, T0.Y, PV.Y, PV.Z, -; EG-NEXT: SETGT_INT T1.Z, PV.X, literal.x, -; EG-NEXT: CNDE_INT T2.W, T2.Z, T2.Y, 0.0, -; EG-NEXT: CNDE_INT * T1.W, T1.W, T1.X, 0.0, +; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, +; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W, +; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201 +; EG-NEXT: LSHL T3.W, T0.Z, PV.Z, +; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W, +; EG-NEXT: -127(nan), 32(4.484155e-44) +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.X, PS, literal.x, +; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS, +; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y, +; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W, +; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z, +; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) +; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W, +; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x, +; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y, +; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0, +; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0, +; EG-NEXT: 8388608(1.175494e-38), -150(nan) +; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS, +; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x, +; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x, +; EG-NEXT: NOT_INT T1.W, PV.Z, +; EG-NEXT: LSHR * T3.W, PV.Y, 1, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z, +; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y, +; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y, +; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W, +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.X, T0.Z, literal.x, +; EG-NEXT: AND_INT T4.Y, PS, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y, +; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: SUB_INT T2.X, PV.W, PS, +; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, +; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0, +; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.X, T4.W, PS, PV.W, -; EG-NEXT: ASHR T2.Y, KC0[3].Z, literal.x, -; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, -; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T3.Z, -; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x, +; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W, +; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y, +; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z, +; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z, +; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: BFE_UINT T2.X, KC0[3].Y, literal.x, T0.W, -; EG-NEXT: XOR_INT T0.Y, PV.W, PS, -; EG-NEXT: XOR_INT T1.Z, PV.Z, PS, -; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y, -; EG-NEXT: XOR_INT * T3.W, T1.Y, PV.Y, +; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W, +; EG-NEXT: XOR_INT T1.Y, PV.W, PS, +; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, +; EG-NEXT: OR_INT T0.W, PV.Y, literal.y, +; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X, +; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38) +; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) +; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x, +; EG-NEXT: AND_INT T3.Y, PS, literal.y, +; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS, +; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W, +; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, +; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44) +; EG-NEXT: SUB_INT T5.X, PV.W, PS, +; EG-NEXT: SETGT_INT T0.Y, T0.Y, literal.x, +; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, +; EG-NEXT: OR_INT T1.W, PV.X, literal.y, +; EG-NEXT: ADD_INT * T3.W, T3.X, literal.z, +; EG-NEXT: -1(nan), 8388608(1.175494e-38) +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: ADD_INT T4.X, T3.X, literal.x, +; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X, +; EG-NEXT: AND_INT T2.Z, PS, literal.z, +; EG-NEXT: NOT_INT T4.W, PS, +; EG-NEXT: LSHR * T5.W, PV.W, 1, +; EG-NEXT: -127(nan), 150(2.101948e-43) +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, +; EG-NEXT: LSHL T4.Y, T1.W, PV.Z, +; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 +; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT T6.X, T1.X, literal.x, +; EG-NEXT: CNDE_INT * T3.Y, PS, PV.W, 0.0, +; EG-NEXT: -150(nan), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 108: +; EG-NEXT: CNDE_INT T3.Z, T2.Z, T4.Y, 0.0, +; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y, +; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: AND_INT T1.X, KC0[3].Y, literal.x, -; EG-NEXT: SUB_INT T1.Y, PS, T2.Y, -; EG-NEXT: SUBB_UINT T2.Z, PV.W, T2.Y, -; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W, -; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W, -; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) -; EG-NEXT: SUB_INT T3.Y, PV.W, PS, -; EG-NEXT: SUB_INT T1.Z, PV.Y, PV.Z, -; EG-NEXT: OR_INT T3.W, PV.X, literal.x, -; EG-NEXT: SUB_INT * T4.W, literal.y, T2.X, -; EG-NEXT: 8388608(1.175494e-38), 150(2.101948e-43) -; EG-NEXT: SETGT_INT T1.X, T0.Z, literal.x, -; EG-NEXT: SETGT_UINT T1.Y, PS, literal.y, -; EG-NEXT: LSHR T0.Z, PV.W, PS, -; EG-NEXT: SUB_INT T4.W, literal.z, T2.X, -; EG-NEXT: AND_INT * T5.W, KC0[3].W, literal.w, -; EG-NEXT: -1(nan), 31(4.344025e-44) -; EG-NEXT: 181(2.536350e-43), 8388607(1.175494e-38) -; EG-NEXT: OR_INT T3.X, PS, literal.x, -; EG-NEXT: ADD_INT T4.Y, T2.X, literal.y, -; EG-NEXT: ADD_INT T2.Z, T2.X, literal.z, -; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.w, T0.W, BS:VEC_021/SCL_122 -; EG-NEXT: LSHR * T4.W, T3.W, PV.W, -; EG-NEXT: 8388608(1.175494e-38), -150(nan) -; EG-NEXT: -182(nan), 23(3.222986e-44) -; EG-NEXT: ADD_INT T4.X, PV.W, literal.x, -; EG-NEXT: ADD_INT T5.Y, T2.X, literal.y, -; EG-NEXT: LSHR T3.Z, PS, 1, -; EG-NEXT: LSHL T4.W, T3.W, PV.Z, -; EG-NEXT: SETGT_UINT * T5.W, PV.Y, literal.z, -; EG-NEXT: -150(nan), -127(nan) +; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W, +; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, PV.Z, +; EG-NEXT: AND_INT T2.Z, T6.X, literal.x, +; EG-NEXT: NOT_INT T1.W, T6.X, +; EG-NEXT: LSHR * T3.W, T0.W, 1, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T2.X, T3.W, T4.Y, -; EG-NEXT: CNDE_INT * T4.Y, PS, PV.Z, PV.W, -; EG-NEXT: ALU clause starting at 106: -; EG-NEXT: SETGT_INT T2.Z, T5.Y, literal.x, -; EG-NEXT: SETGT_UINT T3.W, T4.X, literal.y, -; EG-NEXT: LSHL * T4.W, T3.X, T4.X, -; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T4.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T4.Y, PV.Z, 0.0, T4.Y, BS:VEC_021/SCL_122 -; EG-NEXT: SUB_INT T3.Z, literal.x, T0.W, -; EG-NEXT: CNDE_INT T4.W, T5.W, T2.X, 0.0, -; EG-NEXT: CNDE_INT * T5.W, T1.Y, T0.Z, 0.0, -; EG-NEXT: 181(2.536350e-43), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.X, T2.Z, PS, PV.W, -; EG-NEXT: ASHR T1.Y, KC0[3].Y, literal.x, -; EG-NEXT: SUB_INT T0.Z, literal.y, T0.W, -; EG-NEXT: ADD_INT T4.W, T0.W, literal.z, -; EG-NEXT: LSHR * T5.W, T3.X, PV.Z, -; EG-NEXT: 31(4.344025e-44), 150(2.101948e-43) -; EG-NEXT: -182(nan), 0(0.000000e+00) -; EG-NEXT: ADD_INT T5.X, T0.W, literal.x, -; EG-NEXT: LSHR T6.Y, PS, 1, -; EG-NEXT: LSHL T2.Z, T3.X, PV.W, -; EG-NEXT: SETGT_UINT T0.W, PV.Z, literal.y, -; EG-NEXT: LSHR * T4.W, T3.X, PV.Z, -; EG-NEXT: -127(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T3.X, PV.W, PS, 0.0, -; EG-NEXT: CNDE_INT T6.Y, T3.W, PV.Y, PV.Z, -; EG-NEXT: SETGT_INT T0.Z, PV.X, literal.x, -; EG-NEXT: XOR_INT T0.W, T2.X, T1.Y, -; EG-NEXT: XOR_INT * T3.W, T4.Y, T1.Y, +; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x, +; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y, +; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, +; EG-NEXT: LSHL T0.W, T0.W, PV.Z, +; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, +; EG-NEXT: 31(4.344025e-44), -127(nan) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, +; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W, +; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x, +; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X, +; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X, ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, PS, T1.Y, -; EG-NEXT: SUBB_UINT T4.Y, PV.W, T1.Y, -; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, -; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T4.X, -; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x, +; EG-NEXT: SUB_INT T3.X, PS, T7.X, +; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X, +; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y, +; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X, +; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: XOR_INT T3.X, PV.W, PS, -; EG-NEXT: XOR_INT T6.Y, PV.Z, PS, +; EG-NEXT: XOR_INT T1.X, PV.W, PS, +; EG-NEXT: XOR_INT T5.Y, PV.Z, PS, ; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y, -; EG-NEXT: SETGT_INT T3.W, T5.Y, literal.x, -; EG-NEXT: CNDE_INT * T5.W, T1.X, 0.0, T1.Z, BS:VEC_021/SCL_122 +; EG-NEXT: SETGT_INT T1.W, T4.X, literal.x, +; EG-NEXT: CNDE_INT * T6.W, T0.Y, 0.0, T5.X, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) ; EG-NEXT: SETGT_INT T0.X, T0.X, literal.x, -; EG-NEXT: CNDE_INT T5.Y, PV.W, 0.0, PV.Z, -; EG-NEXT: SUB_INT T0.Z, T1.W, T2.Y, -; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W, -; EG-NEXT: SUBB_UINT * T6.W, PV.X, T4.W, +; EG-NEXT: CNDE_INT T6.Y, PV.W, 0.0, PV.Z, +; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W, +; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W, ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: SUB_INT T2.X, PV.W, PS, -; EG-NEXT: SETGT_INT T2.Y, T5.X, literal.x, -; EG-NEXT: CNDE_INT T5.Z, T1.X, 0.0, PV.Z, BS:VEC_120/SCL_212 -; EG-NEXT: SUB_INT T0.W, T0.W, T1.Y, -; EG-NEXT: CNDE_INT * T1.W, PV.X, 0.0, T3.Y, BS:VEC_021/SCL_122 +; EG-NEXT: SUB_INT T3.X, PV.W, PS, +; EG-NEXT: SETGT_INT T1.Y, T4.Y, literal.x, +; EG-NEXT: CNDE_INT T6.Z, T0.Y, 0.0, PV.Z, BS:VEC_120/SCL_212 +; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, +; EG-NEXT: CNDE_INT * T4.W, PV.X, 0.0, T2.X, BS:VEC_021/SCL_122 ; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T5.X, T3.W, 0.0, PV.W, -; EG-NEXT: CNDE_INT T1.Y, PV.Y, 0.0, PV.X, -; EG-NEXT: SUB_INT T0.W, T0.Y, T2.W, +; EG-NEXT: CNDE_INT T6.X, T1.W, 0.0, PV.W, +; EG-NEXT: CNDE_INT T4.Y, PV.Y, 0.0, PV.X, +; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y, ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.Z, T0.X, 0.0, PV.W, -; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212 -; EG-NEXT: CNDE_INT T1.X, T2.Y, 0.0, PV.W, +; EG-NEXT: CNDE_INT T4.Z, T0.X, 0.0, PV.W, +; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212 +; EG-NEXT: CNDE_INT T4.X, T1.Y, 0.0, PV.W, ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: LSHR * T0.X, PV.W, literal.x, diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -826,7 +826,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 15, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -835,20 +835,17 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT * T0.W, literal.x, T0.Z, +; EG-NEXT: AND_INT T1.Y, T0.Z, literal.x, +; EG-NEXT: LSHR T1.Z, T0.Y, 1, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1, +; EG-NEXT: NOT_INT * T1.W, T0.Z, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR * T0.W, T0.X, PV.W, -; EG-NEXT: ADD_INT T1.Z, T0.Z, literal.x, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, T0.Y, T0.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T2.Z, PS, PV.W, -; EG-NEXT: LSHL T0.W, T0.X, PV.Z, -; EG-NEXT: SETGT_UINT * T1.W, T0.Z, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.Z, PV.W, -; EG-NEXT: LSHL * T0.W, T0.X, T0.Z, -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS, +; EG-NEXT: LSHL T0.W, T0.X, PV.Y, +; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 @@ -904,8 +901,8 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 1 @6 -; EG-NEXT: ALU 28, @11, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 +; EG-NEXT: ALU 22, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: Fetch clause starting at 6: @@ -914,33 +911,27 @@ ; EG-NEXT: ALU clause starting at 10: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 11: -; EG-NEXT: SUB_INT * T1.W, literal.x, T1.Z, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR * T1.W, T0.Z, PV.W, -; EG-NEXT: SUB_INT T2.Z, literal.x, T1.X, -; EG-NEXT: LSHR T1.W, PV.W, 1, -; EG-NEXT: LSHL * T0.W, T0.W, T1.Z, +; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x, +; EG-NEXT: LSHR T2.Z, T0.W, 1, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, +; EG-NEXT: NOT_INT * T1.W, T1.Z, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: OR_INT T3.Z, PS, PV.W, -; EG-NEXT: LSHR T0.W, T0.X, PV.Z, -; EG-NEXT: ADD_INT * T1.W, T1.Z, literal.x, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: LSHL T2.X, T0.Z, PS, -; EG-NEXT: SETGT_UINT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: ADD_INT T2.Z, T1.X, literal.y, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, T0.Y, T1.X, -; EG-NEXT: 31(4.344025e-44), -32(nan) -; EG-NEXT: OR_INT T0.Y, PS, PV.W, -; EG-NEXT: LSHL T2.Z, T0.X, PV.Z, -; EG-NEXT: SETGT_UINT T0.W, T1.X, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: CNDE_INT * T2.W, PV.Y, T3.Z, PV.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.Y, PV.W, PV.Y, PV.Z, -; EG-NEXT: LSHL * T1.W, T0.Z, T1.Z, -; EG-NEXT: CNDE_INT T2.Z, T1.Y, PV.W, 0.0, -; EG-NEXT: LSHL * T1.W, T0.X, T1.X, -; EG-NEXT: CNDE_INT T2.X, T0.W, PV.W, 0.0, +; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS, +; EG-NEXT: LSHL * T1.W, T0.Z, PV.Y, +; EG-NEXT: AND_INT T2.X, T1.Z, literal.x, +; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, +; EG-NEXT: LSHR T0.Z, T0.Y, 1, +; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, +; EG-NEXT: NOT_INT * T3.W, T1.X, +; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) +; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, +; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, +; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: CNDE_INT * T3.W, PV.X, T0.W, T1.W, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z, +; EG-NEXT: CNDE_INT * T3.Z, T2.X, T1.W, 0.0, +; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0, ; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 @@ -1010,76 +1001,65 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 3 @6 -; EG-NEXT: ALU 58, @15, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 0 -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T4.X, 1 +; EG-NEXT: ALU 47, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: Fetch clause starting at 6: ; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 ; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1 -; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1 -; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 32, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 32, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 ; EG-NEXT: ALU clause starting at 14: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 15: -; EG-NEXT: SUB_INT * T0.W, literal.x, T1.Z, +; EG-NEXT: AND_INT T4.Z, T1.Z, literal.x, +; EG-NEXT: LSHR T1.W, T0.W, 1, +; EG-NEXT: NOT_INT * T3.W, T1.Z, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: SUB_INT T4.Z, literal.x, T0.Z, -; EG-NEXT: SUB_INT T1.W, literal.x, T0.X, -; EG-NEXT: LSHR * T0.W, T3.Z, PV.W, +; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1, +; EG-NEXT: AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201 +; EG-NEXT: LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221 +; EG-NEXT: NOT_INT * T2.W, T3.Z, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: SUB_INT T0.Y, literal.x, T1.X, -; EG-NEXT: LSHR T5.Z, PS, 1, -; EG-NEXT: LSHR T0.W, T2.X, PV.W, -; EG-NEXT: LSHR * T1.W, T2.Z, PV.Z, +; EG-NEXT: BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS, +; EG-NEXT: LSHL T2.Z, T2.Z, PV.Y, +; EG-NEXT: BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W, +; EG-NEXT: LSHL * T1.W, T0.Z, T4.Z, +; EG-NEXT: AND_INT T4.X, T1.Z, literal.x, +; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, +; EG-NEXT: LSHR T0.Z, T0.Y, 1, +; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, +; EG-NEXT: NOT_INT * T3.W, T1.X, +; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) +; EG-NEXT: AND_INT T5.X, T3.Z, literal.x, +; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, +; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, +; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: CNDE_INT * T4.W, PV.X, T0.W, T1.W, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.X, T3.X, literal.x, +; EG-NEXT: CNDE_INT T4.Y, PV.W, PV.Y, PV.Z, +; EG-NEXT: LSHR T1.Z, T2.Y, 1, +; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1, +; EG-NEXT: NOT_INT * T3.W, T3.X, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T4.X, T3.W, T1.Z, -; EG-NEXT: LSHR T1.Y, PS, 1, -; EG-NEXT: LSHL T4.Z, T2.W, T0.Z, BS:VEC_120/SCL_212 -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, T2.Y, T0.X, -; EG-NEXT: OR_INT T5.X, PS, PV.W, -; EG-NEXT: OR_INT T1.Y, PV.Z, PV.Y, -; EG-NEXT: OR_INT T4.Z, PV.X, T5.Z, -; EG-NEXT: LSHR T0.W, T3.X, T0.Y, -; EG-NEXT: ADD_INT * T1.W, T1.Z, literal.x, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: LSHL T4.X, T3.Z, PS, -; EG-NEXT: SETGT_UINT T0.Y, T1.Z, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: ADD_INT T5.Z, T1.X, literal.y, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, T3.Y, T1.X, -; EG-NEXT: 31(4.344025e-44), -32(nan) -; EG-NEXT: OR_INT T6.X, PS, PV.W, -; EG-NEXT: LSHL T2.Y, T3.X, PV.Z, -; EG-NEXT: SETGT_UINT T5.Z, T1.X, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: ADD_INT T0.W, T0.Z, literal.y, -; EG-NEXT: CNDE_INT * T3.W, PV.Y, T4.Z, PV.X, -; EG-NEXT: 31(4.344025e-44), -32(nan) -; EG-NEXT: LSHL T4.X, T2.Z, PV.W, -; EG-NEXT: CNDE_INT T3.Y, PV.Z, PV.X, PV.Y, -; EG-NEXT: SETGT_UINT * T4.Z, T0.Z, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T0.W, T3.Z, T1.Z, -; EG-NEXT: ADD_INT * T1.W, T0.X, literal.x, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: LSHL T6.X, T2.X, PS, -; EG-NEXT: SETGT_UINT T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212 -; EG-NEXT: CNDE_INT * T3.Z, T0.Y, PV.W, 0.0, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T0.W, T3.X, T1.X, BS:VEC_120/SCL_212 -; EG-NEXT: CNDE_INT * T1.W, T4.Z, T1.Y, T4.X, -; EG-NEXT: CNDE_INT T3.X, T5.Z, PV.W, 0.0, -; EG-NEXT: CNDE_INT T1.Y, T2.Y, T5.X, T6.X, -; EG-NEXT: LSHL T0.W, T2.Z, T0.Z, BS:VEC_120/SCL_212 +; EG-NEXT: BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS, +; EG-NEXT: LSHL T0.Y, T2.X, PV.X, +; EG-NEXT: CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212 +; EG-NEXT: AND_INT * T0.W, T3.X, literal.x, BS:VEC_201 +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T1.W, T5.X, T3.Y, T2.Z, +; EG-NEXT: CNDE_INT T4.X, T2.W, T0.Z, 0.0, +; EG-NEXT: CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212 ; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T4.X, PS, literal.x, -; EG-NEXT: CNDE_INT T1.Z, T4.Z, PV.W, 0.0, -; EG-NEXT: LSHL * T0.W, T2.X, T0.X, +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: CNDE_INT T1.Z, T5.X, T2.Z, 0.0, +; EG-NEXT: CNDE_INT * T1.X, T0.W, T0.Y, 0.0, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T1.X, T2.Y, PV.W, 0.0, -; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in @@ -1233,27 +1213,22 @@ ; ; EG-LABEL: s_shl_constant_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SUB_INT * T0.W, literal.x, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR * T0.W, literal.x, PV.W, -; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: ADD_INT T0.Z, KC0[2].W, literal.x, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: -32(nan), 65535(9.183409e-41) -; EG-NEXT: OR_INT T1.Z, PS, PV.W, -; EG-NEXT: LSHL T0.W, literal.x, PV.Z, -; EG-NEXT: SETGT_UINT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: -1(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.Z, PV.W, -; EG-NEXT: LSHL * T0.W, literal.x, KC0[2].W, -; EG-NEXT: -1(nan), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; EG-NEXT: MOV T0.W, literal.y, +; EG-NEXT: NOT_INT * T1.W, KC0[2].W, +; EG-NEXT: 31(4.344025e-44), -1(nan) +; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, +; EG-NEXT: LSHL T0.W, literal.y, PV.Z, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, +; EG-NEXT: 32767(4.591635e-41), -1(nan) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %shl = shl i64 281474976710655, %a @@ -1304,7 +1279,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -1313,22 +1288,17 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR * T0.W, literal.x, PV.W, -; EG-NEXT: -1424379385(-5.460358e-13), 0(0.000000e+00) -; EG-NEXT: ADD_INT T0.Z, T0.X, literal.x, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, literal.y, T0.X, -; EG-NEXT: -32(nan), 286(4.007714e-43) -; EG-NEXT: OR_INT T1.Z, PS, PV.W, -; EG-NEXT: SETGT_UINT T0.W, T0.X, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, PV.Z, -; EG-NEXT: 31(4.344025e-44), -1424379385(-5.460358e-13) -; EG-NEXT: CNDE_INT T0.Y, PV.W, PV.Z, PS, -; EG-NEXT: LSHL * T1.W, literal.x, T0.X, -; EG-NEXT: -1424379385(-5.460358e-13), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T0.W, PV.W, 0.0, +; EG-NEXT: NOT_INT T0.Z, T0.X, +; EG-NEXT: MOV T0.W, literal.x, +; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, +; EG-NEXT: 1435293955(1.935796e+13), 31(4.344025e-44) +; EG-NEXT: LSHL T1.Z, literal.x, PS, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.W, PV.Z, +; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, +; EG-NEXT: -1424379385(-5.460358e-13), 143(2.003857e-43) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, +; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -1380,7 +1350,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -1389,19 +1359,16 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT T0.W, literal.x, T0.X, -; EG-NEXT: ADD_INT * T1.W, T0.X, literal.y, -; EG-NEXT: 31(4.344025e-44), -32(nan) -; EG-NEXT: LSHR * T0.W, literal.x, PV.W, -; EG-NEXT: 1234567(1.729997e-39), 0(0.000000e+00) -; EG-NEXT: LSHR T0.Z, PV.W, 1, -; EG-NEXT: LSHL T0.W, literal.x, T1.W, -; EG-NEXT: SETGT_UINT * T1.W, T0.X, literal.y, -; EG-NEXT: 1234567(1.729997e-39), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.Z, PV.W, -; EG-NEXT: LSHL * T0.W, literal.x, T0.X, -; EG-NEXT: 1234567(1.729997e-39), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: AND_INT T0.W, T0.X, literal.x, +; EG-NEXT: NOT_INT * T1.W, T0.X, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, +; EG-NEXT: LSHL T0.W, literal.y, PV.W, +; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, +; EG-NEXT: 617283(8.649977e-40), 1234567(1.729997e-39) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -1449,7 +1416,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -1458,19 +1425,15 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT T0.W, literal.x, T0.X, -; EG-NEXT: ADD_INT * T1.W, T0.X, literal.y, -; EG-NEXT: 31(4.344025e-44), -32(nan) -; EG-NEXT: LSHR * T0.W, literal.x, PV.W, -; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.Z, PV.W, 1, -; EG-NEXT: LSHL T0.W, literal.x, T1.W, -; EG-NEXT: SETGT_UINT * T1.W, T0.X, literal.y, -; EG-NEXT: 64(8.968310e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.Z, PV.W, -; EG-NEXT: LSHL * T0.W, literal.x, T0.X, -; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: AND_INT T0.W, T0.X, literal.x, +; EG-NEXT: NOT_INT * T1.W, T0.X, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, +; EG-NEXT: LSHL T0.W, literal.y, PV.W, +; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, +; EG-NEXT: 32(4.484155e-44), 64(8.968310e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -1508,24 +1471,20 @@ ; ; EG-LABEL: s_shl_inline_imm_64_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SUB_INT * T0.W, literal.x, KC0[2].W, +; EG-NEXT: NOT_INT T0.W, KC0[2].W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.W, literal.x, PV.W, -; EG-NEXT: ADD_INT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: 64(8.968310e-44), -32(nan) ; EG-NEXT: LSHL T0.Z, literal.x, PS, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: SETGT_UINT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: 64(8.968310e-44), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, PV.Z, -; EG-NEXT: LSHL * T0.W, literal.x, KC0[2].W, -; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 64(8.968310e-44), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %shl = shl i64 64, %a @@ -1562,20 +1521,23 @@ ; ; EG-LABEL: s_shl_inline_imm_1_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: ADD_INT T0.Z, KC0[2].W, literal.x, -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.y, -; EG-NEXT: LSHL * T1.W, 1, KC0[2].W, -; EG-NEXT: -32(nan), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.X, PV.W, PS, 0.0, -; EG-NEXT: LSHL T1.W, 1, PV.Z, +; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x, +; EG-NEXT: LSHL * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 31(4.344025e-44), 26(3.643376e-44) +; EG-NEXT: ASHR T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, 1, PV.W, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.Y, PV.W, PS, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, T0.W, 0.0, PV.W, %shl = shl i64 1, %a store i64 %shl, i64 addrspace(1)* %out, align 8 ret void @@ -1610,15 +1572,16 @@ ; ; EG-LABEL: s_shl_inline_imm_1_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 1072693248(1.875000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 536346624(1.050321e-19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1656,15 +1619,16 @@ ; ; EG-LABEL: s_shl_inline_imm_neg_1_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), -1074790400(-1.875000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 1610088448(3.574057e+19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1702,15 +1666,16 @@ ; ; EG-LABEL: s_shl_inline_imm_0_5_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 1071644672(1.750000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 535822336(1.016440e-19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1748,15 +1713,16 @@ ; ; EG-LABEL: s_shl_inline_imm_neg_0_5_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), -1075838976(-1.750000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 1609564160(3.458765e+19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1794,15 +1760,16 @@ ; ; EG-LABEL: s_shl_inline_imm_2_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 1073741824(2.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 536870912(1.084202e-19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1840,15 +1807,16 @@ ; ; EG-LABEL: s_shl_inline_imm_neg_2_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), -1073741824(-2.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 1610612736(3.689349e+19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1886,15 +1854,16 @@ ; ; EG-LABEL: s_shl_inline_imm_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 1074790400(2.250000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 537395200(1.151965e-19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1932,15 +1901,16 @@ ; ; EG-LABEL: s_shl_inline_imm_neg_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), -1072693248(-2.250000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 1611137024(3.919933e+19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1985,24 +1955,21 @@ ; ; EG-LABEL: s_shl_inline_imm_f32_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SUB_INT * T0.W, literal.x, KC0[2].W, +; EG-NEXT: NOT_INT T0.W, KC0[2].W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.W, literal.x, PV.W, -; EG-NEXT: ADD_INT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: 1082130432(4.000000e+00), -32(nan) ; EG-NEXT: LSHL T0.Z, literal.x, PS, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: SETGT_UINT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: 1082130432(4.000000e+00), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, PV.Z, -; EG-NEXT: LSHL * T0.W, literal.x, KC0[2].W, -; EG-NEXT: 1082130432(4.000000e+00), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, +; EG-NEXT: 1082130432(4.000000e+00), 541065216(1.626303e-19) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %shl = shl i64 1082130432, %a @@ -2044,27 +2011,22 @@ ; ; EG-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SUB_INT * T0.W, literal.x, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR * T0.W, literal.x, PV.W, -; EG-NEXT: -1065353216(-4.000000e+00), 0(0.000000e+00) -; EG-NEXT: ADD_INT T0.Z, KC0[2].W, literal.x, -; EG-NEXT: LSHR T0.W, PV.W, 1, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: -32(nan), -1(nan) -; EG-NEXT: OR_INT T1.Z, PS, PV.W, -; EG-NEXT: LSHL T0.W, literal.x, PV.Z, -; EG-NEXT: SETGT_UINT * T1.W, KC0[2].W, literal.y, -; EG-NEXT: -1065353216(-4.000000e+00), 31(4.344025e-44) -; EG-NEXT: CNDE_INT T0.Y, PS, PV.Z, PV.W, -; EG-NEXT: LSHL * T0.W, literal.x, KC0[2].W, -; EG-NEXT: -1065353216(-4.000000e+00), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, +; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; EG-NEXT: MOV T0.W, literal.y, +; EG-NEXT: NOT_INT * T1.W, KC0[2].W, +; EG-NEXT: 31(4.344025e-44), -532676608(-5.534023e+19) +; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, +; EG-NEXT: LSHL T0.W, literal.y, PV.Z, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, +; EG-NEXT: 2147483647(nan), -1065353216(-4.000000e+00) +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, +; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %shl = shl i64 -1065353216, %a @@ -2105,15 +2067,16 @@ ; ; EG-LABEL: s_shl_inline_high_imm_f32_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), 1082130432(4.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 541065216(1.626303e-19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -2155,15 +2118,16 @@ ; ; EG-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: ; EG: ; %bb.0: -; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, literal.y, KC0[2].W, -; EG-NEXT: 31(4.344025e-44), -1065353216(-4.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, PV.W, PS, 0.0, +; EG-NEXT: NOT_INT * T0.W, KC0[2].W, +; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, +; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, +; EG-NEXT: 1614807040(5.534023e+19), 32(4.484155e-44) +; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, ; EG-NEXT: MOV T0.X, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll --- a/llvm/test/CodeGen/AMDGPU/sra.ll +++ b/llvm/test/CodeGen/AMDGPU/sra.ll @@ -467,7 +467,7 @@ ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -476,24 +476,17 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT * T0.W, literal.x, T0.Z, +; EG-NEXT: AND_INT * T0.W, T0.Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL * T0.W, T0.Y, PV.W, -; EG-NEXT: ADD_INT T1.Z, T0.Z, literal.x, -; EG-NEXT: LSHL T0.W, PV.W, 1, -; EG-NEXT: LSHR * T1.W, T0.X, T0.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T2.Z, PS, PV.W, -; EG-NEXT: ASHR T0.W, T0.Y, PV.Z, -; EG-NEXT: SETGT_UINT * T1.W, T0.Z, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, PS, PV.Z, PV.W, +; EG-NEXT: ASHR T1.Z, T0.Y, PV.W, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, T0.Z, +; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, PV.Z, ; EG-NEXT: ASHR T0.W, T0.Y, literal.x, -; EG-NEXT: ASHR * T2.W, T0.Y, T0.Z, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, -; EG-NEXT: CNDE_INT * T0.Y, T1.W, PS, PV.W, -; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 2(2.802597e-45) +; EG-NEXT: CNDE_INT * T0.Y, T1.W, T1.Z, PV.W, entry: %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 %a = load i64, i64 addrspace(1)* %in @@ -548,8 +541,8 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 1 @6 -; EG-NEXT: ALU 32, @11, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 +; EG-NEXT: ALU 19, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: Fetch clause starting at 6: @@ -558,38 +551,25 @@ ; EG-NEXT: ALU clause starting at 10: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 11: -; EG-NEXT: SUB_INT * T1.W, literal.x, T1.Z, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL T1.W, T0.W, PV.W, -; EG-NEXT: SUB_INT * T2.W, literal.x, T1.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ADD_INT T2.X, T1.X, literal.x, -; EG-NEXT: LSHL T1.Y, T0.Y, PS, -; EG-NEXT: ADD_INT T2.Z, T1.Z, literal.x, -; EG-NEXT: LSHL T1.W, PV.W, 1, -; EG-NEXT: LSHR * T2.W, T0.Z, T1.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T3.X, PS, PV.W, -; EG-NEXT: ASHR T2.Y, T0.W, PV.Z, -; EG-NEXT: SETGT_UINT T0.Z, T1.Z, literal.x, -; EG-NEXT: LSHL T1.W, PV.Y, 1, -; EG-NEXT: LSHR * T2.W, T0.X, T1.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: OR_INT T1.Y, PS, PV.W, -; EG-NEXT: CNDE_INT T2.Z, PV.Z, PV.X, PV.Y, -; EG-NEXT: ASHR T1.W, T0.Y, T2.X, -; EG-NEXT: SETGT_UINT * T3.W, T1.X, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T2.X, PS, PV.Y, PV.W, -; EG-NEXT: ASHR T1.W, T0.W, literal.x, -; EG-NEXT: ASHR * T0.W, T0.W, T1.Z, +; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ASHR T1.Z, T0.Y, literal.x, -; EG-NEXT: ASHR T4.W, T0.Y, T1.X, -; EG-NEXT: CNDE_INT * T2.W, T0.Z, PS, PV.W, +; EG-NEXT: ASHR T1.Y, T0.W, PV.W, +; EG-NEXT: AND_INT T2.Z, T1.Z, literal.x, +; EG-NEXT: BIT_ALIGN_INT T1.W, T0.W, T0.Z, T1.Z, +; EG-NEXT: AND_INT * T2.W, T1.X, literal.y, +; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) +; EG-NEXT: ASHR T2.Y, T0.Y, PS, +; EG-NEXT: CNDE_INT T0.Z, PV.Z, PV.W, PV.Y, +; EG-NEXT: BIT_ALIGN_INT T1.W, T0.Y, T0.X, T1.X, +; EG-NEXT: AND_INT * T2.W, T1.X, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, PV.Y, +; EG-NEXT: ASHR T0.W, T0.W, literal.x, +; EG-NEXT: ASHR * T1.W, T0.Y, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, -; EG-NEXT: CNDE_INT * T2.Y, T3.W, PV.W, PV.Z, +; EG-NEXT: CNDE_INT * T0.W, T2.Z, T1.Y, PV.W, +; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; EG-NEXT: CNDE_INT * T0.Y, T2.W, T2.Y, T1.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 %a = load <2 x i64>, <2 x i64> addrspace(1)* %in @@ -657,80 +637,57 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 3 @6 -; EG-NEXT: ALU 62, @15, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T1.X, 0 -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; EG-NEXT: ALU 39, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: Fetch clause starting at 6: ; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1 ; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 48, #1 -; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1 -; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 ; EG-NEXT: ALU clause starting at 14: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 15: -; EG-NEXT: SUB_INT * T1.W, literal.x, T1.Z, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ASHR T4.Z, T0.W, literal.x, -; EG-NEXT: ASHR T2.W, T0.W, T1.Z, -; EG-NEXT: LSHL * T1.W, T0.W, PV.W, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ASHR T4.X, T3.W, literal.x, -; EG-NEXT: ASHR T1.Y, T3.W, T2.Z, BS:VEC_102/SCL_221 -; EG-NEXT: SUB_INT T5.Z, literal.x, T1.X, -; EG-NEXT: LSHL T1.W, PS, 1, -; EG-NEXT: LSHR * T4.W, T0.Z, T1.Z, +; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ADD_INT T5.X, T1.X, literal.x, -; EG-NEXT: OR_INT T2.Y, PS, PV.W, -; EG-NEXT: SUB_INT T0.Z, literal.y, T2.Z, -; EG-NEXT: LSHL T1.W, T0.Y, PV.Z, -; EG-NEXT: ADD_INT * T4.W, T1.Z, literal.x, -; EG-NEXT: -32(nan), 31(4.344025e-44) -; EG-NEXT: ASHR T6.X, T0.W, PS, -; EG-NEXT: SETGT_UINT T4.Y, T1.Z, literal.x, -; EG-NEXT: LSHL T1.Z, PV.W, 1, -; EG-NEXT: LSHL T0.W, T3.W, PV.Z, BS:VEC_120/SCL_212 -; EG-NEXT: SUB_INT * T1.W, literal.x, T2.X, +; EG-NEXT: ASHR T1.Y, T0.W, literal.x, +; EG-NEXT: ASHR T4.Z, T3.W, PV.W, BS:VEC_120/SCL_212 +; EG-NEXT: AND_INT T1.W, T1.Z, literal.y, +; EG-NEXT: AND_INT * T2.W, T2.Z, literal.x, +; EG-NEXT: 31(4.344025e-44), 32(4.484155e-44) +; EG-NEXT: BIT_ALIGN_INT T4.X, T3.W, T3.Z, T1.Z, +; EG-NEXT: ASHR T2.Y, T0.W, PS, BS:VEC_120/SCL_212 +; EG-NEXT: AND_INT * T1.Z, T2.Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, T2.Z, +; EG-NEXT: AND_INT * T2.W, T2.X, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ADD_INT T7.X, T2.X, literal.x, -; EG-NEXT: LSHL T5.Y, T3.Y, PS, -; EG-NEXT: ADD_INT T0.Z, T2.Z, literal.x, -; EG-NEXT: LSHL T0.W, PV.W, 1, -; EG-NEXT: LSHR * T1.W, T3.Z, T2.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T8.X, PS, PV.W, -; EG-NEXT: ASHR T6.Y, T3.W, PV.Z, -; EG-NEXT: SETGT_UINT T0.Z, T2.Z, literal.x, -; EG-NEXT: LSHL T0.W, PV.Y, 1, -; EG-NEXT: LSHR * T1.W, T3.X, T2.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.X, T0.X, T1.X, -; EG-NEXT: OR_INT T5.Y, PS, PV.W, -; EG-NEXT: CNDE_INT T3.Z, PV.Z, PV.X, PV.Y, -; EG-NEXT: ASHR * T0.W, T3.Y, T7.X, BS:VEC_021/SCL_122 -; EG-NEXT: SETGT_UINT * T1.W, T2.X, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T3.X, PV.W, T5.Y, T0.W, -; EG-NEXT: OR_INT T5.Y, T0.X, T1.Z, -; EG-NEXT: CNDE_INT * T5.Z, T4.Y, T2.Y, T6.X, BS:VEC_021/SCL_122 -; EG-NEXT: ASHR T0.W, T0.Y, T5.X, -; EG-NEXT: SETGT_UINT * T4.W, T1.X, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T5.X, PS, T5.Y, PV.W, BS:VEC_021/SCL_122 -; EG-NEXT: ASHR T2.Y, T3.Y, literal.x, -; EG-NEXT: ASHR T1.Z, T3.Y, T2.X, -; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, -; EG-NEXT: CNDE_INT * T3.W, T0.Z, T1.Y, T4.X, +; EG-NEXT: AND_INT T5.X, T1.X, literal.x, +; EG-NEXT: ASHR T4.Y, T0.Y, PS, +; EG-NEXT: CNDE_INT T0.Z, T1.Z, PV.W, T2.Y, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, T2.X, +; EG-NEXT: AND_INT * T2.W, T2.X, literal.y, +; EG-NEXT: 31(4.344025e-44), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, PV.Y, +; EG-NEXT: ASHR T5.Y, T3.Y, PV.X, +; EG-NEXT: CNDE_INT T2.Z, T1.W, T4.X, T4.Z, +; EG-NEXT: BIT_ALIGN_INT T0.W, T3.Y, T3.X, T1.X, BS:VEC_102/SCL_221 +; EG-NEXT: AND_INT * T4.W, T1.X, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Y, +; EG-NEXT: ASHR T6.Y, T3.W, literal.x, +; EG-NEXT: ASHR T3.Z, T0.Y, literal.x, BS:VEC_201 +; EG-NEXT: ADD_INT T3.W, KC0[2].Y, literal.y, +; EG-NEXT: CNDE_INT * T0.W, T1.Z, T2.Y, T1.Y, ; EG-NEXT: 31(4.344025e-44), 16(2.242078e-44) -; EG-NEXT: LSHR T0.X, PV.W, literal.x, -; EG-NEXT: CNDE_INT T3.Y, T1.W, PV.Z, PV.Y, -; EG-NEXT: ASHR T0.Z, T0.Y, literal.y, -; EG-NEXT: ASHR T0.W, T0.Y, T1.X, -; EG-NEXT: CNDE_INT * T5.W, T4.Y, T2.W, T4.Z, +; EG-NEXT: LSHR T1.X, PV.W, literal.x, +; EG-NEXT: CNDE_INT T0.Y, T2.W, T4.Y, PV.Z, +; EG-NEXT: ASHR T3.W, T3.Y, literal.y, +; EG-NEXT: CNDE_INT * T2.W, T1.W, T4.Z, PV.Y, ; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) -; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, -; EG-NEXT: CNDE_INT * T5.Y, T4.W, PV.W, PV.Z, +; EG-NEXT: LSHR T3.X, KC0[2].Y, literal.x, +; EG-NEXT: CNDE_INT * T2.Y, T4.W, T5.Y, PV.W, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll --- a/llvm/test/CodeGen/AMDGPU/srl.ll +++ b/llvm/test/CodeGen/AMDGPU/srl.ll @@ -239,7 +239,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 15, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -248,22 +248,16 @@ ; EG-NEXT: ALU clause starting at 8: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 9: -; EG-NEXT: SUB_INT * T0.W, literal.x, T0.Z, +; EG-NEXT: AND_INT * T0.W, T0.Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL * T0.W, T0.Y, PV.W, -; EG-NEXT: ADD_INT T1.Z, T0.Z, literal.x, -; EG-NEXT: LSHL T0.W, PV.W, 1, -; EG-NEXT: LSHR * T1.W, T0.X, T0.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T2.Z, PS, PV.W, -; EG-NEXT: LSHR T0.W, T0.Y, PV.Z, -; EG-NEXT: SETGT_UINT * T1.W, T0.Z, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, PS, PV.Z, PV.W, -; EG-NEXT: LSHR T0.W, T0.Y, T0.Z, +; EG-NEXT: LSHR T1.Z, T0.Y, PV.W, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, T0.Z, +; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T0.X, PS, PV.W, PV.Z, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: CNDE_INT * T0.Y, T1.W, PV.W, 0.0, +; EG-NEXT: CNDE_INT * T0.Y, T1.W, T1.Z, 0.0, %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 %a = load i64, i64 addrspace(1)* %in %b = load i64, i64 addrspace(1)* %b_ptr @@ -331,76 +325,53 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 3 @6 -; EG-NEXT: ALU 57, @15, KC0[CB0:0-32], KC1[] -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T1.X, 0 -; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; EG-NEXT: ALU 34, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: Fetch clause starting at 6: ; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1 -; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 48, #1 -; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 48, #1 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 ; EG-NEXT: ALU clause starting at 14: ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 15: -; EG-NEXT: SUB_INT * T1.W, literal.x, T1.Z, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHL * T1.W, T0.W, PV.W, -; EG-NEXT: LSHR T1.Y, T3.W, T2.Z, BS:VEC_102/SCL_221 -; EG-NEXT: SUB_INT T4.Z, literal.x, T1.X, -; EG-NEXT: LSHL T1.W, PV.W, 1, -; EG-NEXT: LSHR * T2.W, T0.Z, T1.Z, +; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ADD_INT T4.X, T1.X, literal.x, -; EG-NEXT: OR_INT T2.Y, PS, PV.W, -; EG-NEXT: SUB_INT T0.Z, literal.y, T2.Z, -; EG-NEXT: LSHL T1.W, T0.Y, PV.Z, -; EG-NEXT: ADD_INT * T2.W, T1.Z, literal.x, -; EG-NEXT: -32(nan), 31(4.344025e-44) -; EG-NEXT: LSHR T5.X, T0.W, PS, -; EG-NEXT: SETGT_UINT T4.Y, T1.Z, literal.x, -; EG-NEXT: LSHL T4.Z, PV.W, 1, -; EG-NEXT: LSHL T1.W, T3.W, PV.Z, BS:VEC_120/SCL_212 -; EG-NEXT: SUB_INT * T2.W, literal.x, T2.X, +; EG-NEXT: LSHR T4.Z, T0.W, PV.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: AND_INT * T3.W, T3.Z, literal.y, +; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) +; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, T1.Z, +; EG-NEXT: LSHR T1.Y, T2.W, PS, BS:VEC_120/SCL_212 +; EG-NEXT: AND_INT * T0.Z, T3.Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, T3.Z, +; EG-NEXT: AND_INT * T2.W, T3.X, literal.x, ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: ADD_INT T6.X, T2.X, literal.x, -; EG-NEXT: LSHL T5.Y, T3.Y, PS, -; EG-NEXT: ADD_INT T0.Z, T2.Z, literal.x, -; EG-NEXT: LSHL T1.W, PV.W, 1, -; EG-NEXT: LSHR * T2.W, T3.Z, T2.Z, -; EG-NEXT: -32(nan), 0(0.000000e+00) -; EG-NEXT: OR_INT T7.X, PS, PV.W, -; EG-NEXT: LSHR T6.Y, T3.W, PV.Z, -; EG-NEXT: SETGT_UINT T0.Z, T2.Z, literal.x, -; EG-NEXT: LSHL T1.W, PV.Y, 1, -; EG-NEXT: LSHR * T2.W, T3.X, T2.X, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: LSHR T0.X, T0.X, T1.X, -; EG-NEXT: OR_INT T5.Y, PS, PV.W, -; EG-NEXT: CNDE_INT T3.Z, PV.Z, PV.X, PV.Y, -; EG-NEXT: LSHR * T1.W, T3.Y, T6.X, BS:VEC_021/SCL_122 -; EG-NEXT: SETGT_UINT * T2.W, T2.X, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T3.X, PV.W, T5.Y, T1.W, -; EG-NEXT: OR_INT T5.Y, T0.X, T4.Z, -; EG-NEXT: CNDE_INT * T4.Z, T4.Y, T2.Y, T5.X, BS:VEC_021/SCL_122 -; EG-NEXT: LSHR T1.W, T0.Y, T4.X, -; EG-NEXT: SETGT_UINT * T5.W, T1.X, literal.x, -; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T4.X, PS, T5.Y, PV.W, BS:VEC_021/SCL_122 -; EG-NEXT: LSHR T2.Y, T0.W, T1.Z, -; EG-NEXT: LSHR T1.Z, T3.Y, T2.X, +; EG-NEXT: AND_INT T5.X, T1.X, literal.x, +; EG-NEXT: LSHR T3.Y, T2.Y, PS, +; EG-NEXT: CNDE_INT T2.Z, T0.Z, PV.W, T1.Y, +; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, T3.X, +; EG-NEXT: AND_INT * T3.W, T3.X, literal.y, +; EG-NEXT: 31(4.344025e-44), 32(4.484155e-44) +; EG-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Y, +; EG-NEXT: LSHR T4.Y, T0.Y, PV.X, +; EG-NEXT: CNDE_INT T1.Z, T1.W, T4.X, T4.Z, +; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, T1.X, BS:VEC_102/SCL_221 +; EG-NEXT: AND_INT * T4.W, T1.X, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT T1.X, PS, PV.W, PV.Y, ; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, -; EG-NEXT: CNDE_INT * T3.W, T0.Z, T1.Y, 0.0, +; EG-NEXT: CNDE_INT * T2.W, T0.Z, T1.Y, 0.0, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: LSHR T0.X, PV.W, literal.x, -; EG-NEXT: CNDE_INT T3.Y, T2.W, PV.Z, 0.0, -; EG-NEXT: LSHR T0.W, T0.Y, T1.X, -; EG-NEXT: CNDE_INT * T4.W, T4.Y, PV.Y, 0.0, -; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, -; EG-NEXT: CNDE_INT * T4.Y, T5.W, PV.W, 0.0, +; EG-NEXT: CNDE_INT T2.Y, T3.W, T3.Y, 0.0, +; EG-NEXT: CNDE_INT T1.W, T1.W, T4.Z, 0.0, BS:VEC_120/SCL_212 +; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T1.Y, T4.W, T4.Y, 0.0, %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr