Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -615,17 +615,7 @@ } else Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); - // Assert that the converted value fits in the original type. If it doesn't - // (eg: because the value being converted is too big), then the result of the - // original operation was undefined anyway, so the assert is still correct. - // - // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: - // before legalization: fp-to-uint16, 65534. -> 0xfffe - // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || - N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? - ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, - DAG.getValueType(N->getValueType(0).getScalarType())); + return Res; } SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) { Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -702,17 +702,6 @@ } else Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); - // Assert that the converted value fits in the original type. If it doesn't - // (eg: because the value being converted is too big), then the result of the - // original operation was undefined anyway, so the assert is still correct. - if (Node->getOpcode() == ISD::FP_TO_UINT || - Node->getOpcode() == ISD::STRICT_FP_TO_UINT) - NewOpc = ISD::AssertZext; - else - NewOpc = ISD::AssertSext; - - Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, - DAG.getValueType(VT.getScalarType())); Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); Results.push_back(Promoted); if (IsStrict) Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30749,20 +30749,6 @@ } else Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); - // Preserve what we know about the size of the original result. If the - // result is v2i32, we have to manually widen the assert. - if (PromoteVT == MVT::v2i32) - Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, - DAG.getUNDEF(MVT::v2i32)); - - Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl, - Res.getValueType(), Res, - DAG.getValueType(VT.getVectorElementType())); - - if (PromoteVT == MVT::v2i32) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - // Truncate back to the original width. Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); Index: llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -23,9 +23,6 @@ ; CHECK-DAG: xtn v[[XTN1:[0-9]+]].2s, v[[CONV1]].2d ; CHECK-DAG: xtn v[[XTN2:[0-9]+]].2s, v[[CONV2]].2d ; CHECK-DAG: xtn v[[XTN3:[0-9]+]].2s, v[[CONV3]].2d -; CHECK-DAG: uzp1 v[[UZP0:[0-9]+]].4h, v[[XTN1]].4h, v[[XTN0]].4h -; CHECK-DAG: uzp1 v[[UZP1:[0-9]+]].4h, v[[XTN3]].4h, v[[XTN2]].4h -; CHECK: uzp1 v0.8b, v[[UZP1:[0-9]+]].8b, v[[UZP0:[0-9]+]].8b %tmp1 = load <8 x double>, <8 x double>* %ptr %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 Index: llvm/test/CodeGen/AArch64/fptouint-i8-zext.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptouint-i8-zext.ll +++ llvm/test/CodeGen/AArch64/fptouint-i8-zext.ll @@ -8,6 +8,7 @@ ; CHECK-LABEL: float_char_int_func: ; CHECK: fcvtzs [[A:w[0-9]+]], s0 +; CHECK-NEXT: and w0, [[A:w[0-9]+]], #0xff ; CHECK-NEXT: ret define i32 @float_char_int_func(float %infloatVal) { entry: Index: llvm/test/CodeGen/AMDGPU/fp_to_uint.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -752,7 +752,7 @@ ; ; EG-LABEL: fp_to_uint_f32_to_i16: ; EG: ; %bb.0: -; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -760,11 +760,12 @@ ; EG-NEXT: TRUNC T0.W, KC0[2].Z, ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) -; EG-NEXT: LSHL T1.W, PS, literal.x, ; EG-NEXT: FLT_TO_UINT * T0.X, PV.W, -; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) -; EG-NEXT: LSHL T0.X, PS, PV.W, -; EG-NEXT: LSHL * T0.W, literal.x, PV.W, +; EG-NEXT: AND_INT T0.W, PS, literal.x, +; EG-NEXT: LSHL * T1.W, T1.W, literal.y, +; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) ; EG-NEXT: MOV T0.Y, 0.0, ; EG-NEXT: MOV * T0.Z, 0.0, Index: llvm/test/CodeGen/AMDGPU/fptoui.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -60,8 +60,9 @@ ; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] -; SI: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] +; SI-DAG: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] +; SI-DAG: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] +; SI: v_and_b32_e32 ; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] ; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_HI]] Index: llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll =================================================================== --- llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll +++ llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll @@ -771,7 +771,7 @@ ; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2shw: @@ -784,7 +784,7 @@ ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -848,7 +848,7 @@ ; CHECK-NEXT: xsaddqp v2, v2, v3 ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2shw_03: @@ -867,7 +867,7 @@ ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -932,7 +932,7 @@ ; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2uhw: @@ -945,6 +945,7 @@ ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1008,7 +1009,7 @@ ; CHECK-NEXT: xsaddqp v2, v2, v3 ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2uhw_03: @@ -1027,6 +1028,7 @@ ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1091,7 +1093,7 @@ ; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2sb: @@ -1104,7 +1106,7 @@ ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1168,7 +1170,7 @@ ; CHECK-NEXT: xsaddqp v2, v2, v3 ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2sb_03: @@ -1187,7 +1189,7 @@ ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1252,7 +1254,7 @@ ; CHECK-NEXT: lxv v2, 0(r3) ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2ub: @@ -1265,6 +1267,7 @@ ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1328,7 +1331,7 @@ ; CHECK-NEXT: xsaddqp v2, v2, v3 ; CHECK-NEXT: xscvqpswz v2, v2 ; CHECK-NEXT: mfvsrwz r3, v2 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpConv2ub_03: @@ -1347,6 +1350,7 @@ ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 Index: llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll +++ llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfs f1, -4(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %arg.addr = alloca float, align 4 @@ -44,7 +44,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfd f1, -8(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %arg.addr = alloca double, align 8 @@ -77,7 +77,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfs f1, -4(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %arg.addr = alloca float, align 4 @@ -110,7 +110,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfd f1, -8(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %arg.addr = alloca double, align 8 @@ -143,7 +143,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfs f1, -4(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %arg.addr = alloca float, align 4 @@ -176,7 +176,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfd f1, -8(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %arg.addr = alloca double, align 8 @@ -209,7 +209,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfs f1, -4(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %arg.addr = alloca float, align 4 @@ -242,7 +242,7 @@ ; CHECK-NEXT: xscvdpsxws f0, f1 ; CHECK-NEXT: stfd f1, -8(r1) ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %arg.addr = alloca double, align 8 Index: llvm/test/CodeGen/PowerPC/fp64-to-int16.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp64-to-int16.ll +++ llvm/test/CodeGen/PowerPC/fp64-to-int16.ll @@ -7,6 +7,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpsxws 0, 1 ; CHECK-NEXT: mffprwz 3, 0 +; CHECK-NEXT: clrlwi 3, 3, 16 ; CHECK-NEXT: xori 3, 3, 65534 ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 4, 3, 5 Index: llvm/test/CodeGen/PowerPC/pr47660.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr47660.ll +++ llvm/test/CodeGen/PowerPC/pr47660.ll @@ -16,7 +16,7 @@ ; CHECK-LE-NEXT: xsmuldp f0, f0, f1 ; CHECK-LE-NEXT: xscvdpsxws f0, f0 ; CHECK-LE-NEXT: mffprwz r3, f0 -; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: clrldi r3, r3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: _Z1f1c: @@ -30,8 +30,7 @@ ; CHECK-BE-NEXT: fmul f0, f0, f1 ; CHECK-BE-NEXT: fctiwz f0, f0 ; CHECK-BE-NEXT: stfd f0, -8(r1) -; CHECK-BE-NEXT: lwz r3, -4(r1) -; CHECK-BE-NEXT: clrldi r3, r3, 32 +; CHECK-BE-NEXT: lbz r3, -1(r1) ; CHECK-BE-NEXT: blr entry: %0 = and i24 %g.coerce, 255 Index: llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll =================================================================== --- llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll +++ llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll @@ -1680,7 +1680,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_int16_t_float: @@ -1688,7 +1688,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_int16_t_float: @@ -1696,7 +1696,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to float* @@ -1712,7 +1712,7 @@ ; CHECK-NEXT: lfs f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -1730,7 +1730,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_int16_t_float: @@ -1740,7 +1740,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -1759,7 +1759,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_int16_t_float: @@ -1770,7 +1770,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -1787,7 +1787,7 @@ ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -1805,7 +1805,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_int16_t_float: @@ -1814,7 +1814,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_int16_t_float: @@ -1823,7 +1823,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -1842,7 +1842,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_int16_t_float: @@ -1851,7 +1851,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_int16_t_float: @@ -1860,7 +1860,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -1878,7 +1878,7 @@ ; CHECK-P10-NEXT: lfs f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_int16_t_float: @@ -1887,7 +1887,7 @@ ; CHECK-P9-NEXT: lfs f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_int16_t_float: @@ -1897,7 +1897,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -1917,7 +1917,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_int16_t_float: @@ -1927,7 +1927,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_int16_t_float: @@ -1937,7 +1937,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -1957,7 +1957,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_float: @@ -1969,7 +1969,7 @@ ; CHECK-P9-NEXT: lfsx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_float: @@ -1981,7 +1981,7 @@ ; CHECK-P8-NEXT: lfsx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -2003,7 +2003,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_int16_t_float: @@ -2016,7 +2016,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_int16_t_float: @@ -2029,7 +2029,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -2050,7 +2050,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_int16_t_float: @@ -2062,7 +2062,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -2080,7 +2080,7 @@ ; CHECK-NEXT: lfs f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %0 = load float, float* inttoptr (i64 4080 to float*), align 16 @@ -2096,7 +2096,7 @@ ; CHECK-P10-NEXT: lfs f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_int16_t_float: @@ -2105,7 +2105,7 @@ ; CHECK-P9-NEXT: lfs f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_int16_t_float: @@ -2115,7 +2115,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 9999900 to float*), align 4 @@ -2132,7 +2132,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_int16_t_float: @@ -2143,7 +2143,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_int16_t_float: @@ -2154,7 +2154,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 1000000000000 to float*), align 4096 @@ -2169,7 +2169,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_int16_t_double: @@ -2177,7 +2177,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_int16_t_double: @@ -2185,7 +2185,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to double* @@ -2201,7 +2201,7 @@ ; CHECK-NEXT: lfd f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -2219,7 +2219,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_int16_t_double: @@ -2229,7 +2229,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -2248,7 +2248,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_int16_t_double: @@ -2259,7 +2259,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -2276,7 +2276,7 @@ ; CHECK-NEXT: lfdx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -2294,7 +2294,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_int16_t_double: @@ -2303,7 +2303,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_int16_t_double: @@ -2312,7 +2312,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -2331,7 +2331,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_int16_t_double: @@ -2340,7 +2340,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_int16_t_double: @@ -2349,7 +2349,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -2367,7 +2367,7 @@ ; CHECK-P10-NEXT: lfd f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_int16_t_double: @@ -2376,7 +2376,7 @@ ; CHECK-P9-NEXT: lfd f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_int16_t_double: @@ -2386,7 +2386,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -2406,7 +2406,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_int16_t_double: @@ -2416,7 +2416,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_int16_t_double: @@ -2426,7 +2426,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -2446,7 +2446,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_double: @@ -2458,7 +2458,7 @@ ; CHECK-P9-NEXT: lfdx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_double: @@ -2470,7 +2470,7 @@ ; CHECK-P8-NEXT: lfdx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -2492,7 +2492,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_int16_t_double: @@ -2505,7 +2505,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_int16_t_double: @@ -2518,7 +2518,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -2538,7 +2538,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_int16_t_double: @@ -2550,7 +2550,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsh r3, r3 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -2568,7 +2568,7 @@ ; CHECK-NEXT: lfd f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsh r3, r3 ; CHECK-NEXT: blr entry: %0 = load double, double* inttoptr (i64 4080 to double*), align 16 @@ -2584,7 +2584,7 @@ ; CHECK-P10-NEXT: lfd f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_int16_t_double: @@ -2593,7 +2593,7 @@ ; CHECK-P9-NEXT: lfd f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_int16_t_double: @@ -2603,7 +2603,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 9999900 to double*), align 8 @@ -2620,7 +2620,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsh r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_int16_t_double: @@ -2631,7 +2631,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsh r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_int16_t_double: @@ -2642,7 +2642,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsh r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 1000000000000 to double*), align 4096 @@ -4618,7 +4618,7 @@ ; CHECK-NEXT: lfs f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -4636,7 +4636,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_uint16_t_float: @@ -4646,7 +4646,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -4665,7 +4665,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_uint16_t_float: @@ -4676,7 +4676,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -4693,7 +4693,7 @@ ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -4711,7 +4711,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_uint16_t_float: @@ -4720,7 +4720,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_uint16_t_float: @@ -4729,7 +4729,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -4748,7 +4748,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_uint16_t_float: @@ -4757,7 +4757,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_uint16_t_float: @@ -4766,7 +4766,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -4784,7 +4784,7 @@ ; CHECK-P10-NEXT: lfs f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_uint16_t_float: @@ -4793,7 +4793,7 @@ ; CHECK-P9-NEXT: lfs f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_uint16_t_float: @@ -4803,7 +4803,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -4823,7 +4823,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_uint16_t_float: @@ -4833,7 +4833,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_uint16_t_float: @@ -4843,7 +4843,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -4863,7 +4863,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_float: @@ -4875,7 +4875,7 @@ ; CHECK-P9-NEXT: lfsx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_float: @@ -4887,7 +4887,7 @@ ; CHECK-P8-NEXT: lfsx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -4909,7 +4909,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_uint16_t_float: @@ -4922,7 +4922,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_uint16_t_float: @@ -4935,7 +4935,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -4955,7 +4955,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_uint16_t_float: @@ -4967,7 +4967,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -4985,7 +4985,7 @@ ; CHECK-NEXT: lfs f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %0 = load float, float* inttoptr (i64 4080 to float*), align 16 @@ -5001,7 +5001,7 @@ ; CHECK-P10-NEXT: lfs f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_uint16_t_float: @@ -5010,7 +5010,7 @@ ; CHECK-P9-NEXT: lfs f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_uint16_t_float: @@ -5020,7 +5020,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 9999900 to float*), align 4 @@ -5037,7 +5037,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_uint16_t_float: @@ -5048,7 +5048,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_uint16_t_float: @@ -5059,7 +5059,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 1000000000000 to float*), align 4096 @@ -5074,7 +5074,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_uint16_t_double: @@ -5082,7 +5082,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_uint16_t_double: @@ -5090,7 +5090,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to double* @@ -5106,7 +5106,7 @@ ; CHECK-NEXT: lfd f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -5124,7 +5124,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_uint16_t_double: @@ -5134,7 +5134,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -5153,7 +5153,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_uint16_t_double: @@ -5164,7 +5164,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -5181,7 +5181,7 @@ ; CHECK-NEXT: lfdx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -5199,7 +5199,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_uint16_t_double: @@ -5208,7 +5208,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_uint16_t_double: @@ -5217,7 +5217,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -5236,7 +5236,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_uint16_t_double: @@ -5245,7 +5245,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_uint16_t_double: @@ -5254,7 +5254,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -5272,7 +5272,7 @@ ; CHECK-P10-NEXT: lfd f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_uint16_t_double: @@ -5281,7 +5281,7 @@ ; CHECK-P9-NEXT: lfd f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_uint16_t_double: @@ -5291,7 +5291,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -5311,7 +5311,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_uint16_t_double: @@ -5321,7 +5321,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_uint16_t_double: @@ -5331,7 +5331,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -5351,7 +5351,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_double: @@ -5363,7 +5363,7 @@ ; CHECK-P9-NEXT: lfdx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_double: @@ -5375,7 +5375,7 @@ ; CHECK-P8-NEXT: lfdx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -5397,7 +5397,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_uint16_t_double: @@ -5410,7 +5410,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_uint16_t_double: @@ -5423,7 +5423,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -5443,7 +5443,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_uint16_t_double: @@ -5455,7 +5455,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 48 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -5473,7 +5473,7 @@ ; CHECK-NEXT: lfd f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: blr entry: %0 = load double, double* inttoptr (i64 4080 to double*), align 16 @@ -5489,7 +5489,7 @@ ; CHECK-P10-NEXT: lfd f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_uint16_t_double: @@ -5498,7 +5498,7 @@ ; CHECK-P9-NEXT: lfd f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_uint16_t_double: @@ -5508,7 +5508,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 9999900 to double*), align 8 @@ -5525,7 +5525,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 48 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_uint16_t_double: @@ -5536,7 +5536,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 48 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_uint16_t_double: @@ -5547,7 +5547,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 48 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 1000000000000 to double*), align 4096 Index: llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll =================================================================== --- llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll +++ llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll @@ -2092,7 +2092,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_int8_t_float: @@ -2100,7 +2100,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_int8_t_float: @@ -2108,7 +2108,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to float* @@ -2124,7 +2124,7 @@ ; CHECK-NEXT: lfs f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -2142,7 +2142,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_int8_t_float: @@ -2152,7 +2152,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -2171,7 +2171,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_int8_t_float: @@ -2182,7 +2182,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -2199,7 +2199,7 @@ ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -2217,7 +2217,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_int8_t_float: @@ -2226,7 +2226,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_int8_t_float: @@ -2235,7 +2235,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -2254,7 +2254,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_int8_t_float: @@ -2263,7 +2263,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_int8_t_float: @@ -2272,7 +2272,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -2290,7 +2290,7 @@ ; CHECK-P10-NEXT: lfs f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_int8_t_float: @@ -2299,7 +2299,7 @@ ; CHECK-P9-NEXT: lfs f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_int8_t_float: @@ -2309,7 +2309,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -2329,7 +2329,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_int8_t_float: @@ -2339,7 +2339,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_int8_t_float: @@ -2349,7 +2349,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -2369,7 +2369,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_float: @@ -2381,7 +2381,7 @@ ; CHECK-P9-NEXT: lfsx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_float: @@ -2393,7 +2393,7 @@ ; CHECK-P8-NEXT: lfsx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -2415,7 +2415,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_int8_t_float: @@ -2428,7 +2428,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_int8_t_float: @@ -2441,7 +2441,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -2461,7 +2461,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_int8_t_float: @@ -2473,7 +2473,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -2491,7 +2491,7 @@ ; CHECK-NEXT: lfs f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %0 = load float, float* inttoptr (i64 4080 to float*), align 16 @@ -2507,7 +2507,7 @@ ; CHECK-P10-NEXT: lfs f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_int8_t_float: @@ -2516,7 +2516,7 @@ ; CHECK-P9-NEXT: lfs f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_int8_t_float: @@ -2526,7 +2526,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 9999900 to float*), align 4 @@ -2543,7 +2543,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_int8_t_float: @@ -2554,7 +2554,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_int8_t_float: @@ -2565,7 +2565,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 1000000000000 to float*), align 4096 @@ -2580,7 +2580,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_int8_t_double: @@ -2588,7 +2588,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_int8_t_double: @@ -2596,7 +2596,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to double* @@ -2612,7 +2612,7 @@ ; CHECK-NEXT: lfd f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -2630,7 +2630,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_int8_t_double: @@ -2640,7 +2640,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -2659,7 +2659,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_int8_t_double: @@ -2670,7 +2670,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -2687,7 +2687,7 @@ ; CHECK-NEXT: lfdx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -2705,7 +2705,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_int8_t_double: @@ -2714,7 +2714,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_int8_t_double: @@ -2723,7 +2723,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -2742,7 +2742,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_int8_t_double: @@ -2751,7 +2751,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_int8_t_double: @@ -2760,7 +2760,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -2778,7 +2778,7 @@ ; CHECK-P10-NEXT: lfd f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_int8_t_double: @@ -2787,7 +2787,7 @@ ; CHECK-P9-NEXT: lfd f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_int8_t_double: @@ -2797,7 +2797,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -2817,7 +2817,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_int8_t_double: @@ -2827,7 +2827,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_int8_t_double: @@ -2837,7 +2837,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -2857,7 +2857,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_double: @@ -2869,7 +2869,7 @@ ; CHECK-P9-NEXT: lfdx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_double: @@ -2881,7 +2881,7 @@ ; CHECK-P8-NEXT: lfdx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -2903,7 +2903,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_int8_t_double: @@ -2916,7 +2916,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_int8_t_double: @@ -2929,7 +2929,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -2949,7 +2949,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_int8_t_double: @@ -2961,7 +2961,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: extsw r3, r3 +; CHECK-PREP10-NEXT: extsb r3, r3 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -2979,7 +2979,7 @@ ; CHECK-NEXT: lfd f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: extsb r3, r3 ; CHECK-NEXT: blr entry: %0 = load double, double* inttoptr (i64 4080 to double*), align 16 @@ -2995,7 +2995,7 @@ ; CHECK-P10-NEXT: lfd f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_int8_t_double: @@ -3004,7 +3004,7 @@ ; CHECK-P9-NEXT: lfd f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_int8_t_double: @@ -3014,7 +3014,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 9999900 to double*), align 8 @@ -3031,7 +3031,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: extsw r3, r3 +; CHECK-P10-NEXT: extsb r3, r3 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_int8_t_double: @@ -3042,7 +3042,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsb r3, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_int8_t_double: @@ -3053,7 +3053,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: extsw r3, r3 +; CHECK-P8-NEXT: extsb r3, r3 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 1000000000000 to double*), align 4096 @@ -5590,7 +5590,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_uint8_t_float: @@ -5598,7 +5598,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_uint8_t_float: @@ -5606,7 +5606,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to float* @@ -5622,7 +5622,7 @@ ; CHECK-NEXT: lfs f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -5640,7 +5640,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_uint8_t_float: @@ -5650,7 +5650,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -5670,7 +5670,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r5 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_unalign64_uint8_t_float: @@ -5682,7 +5682,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000001 @@ -5701,7 +5701,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_uint8_t_float: @@ -5712,7 +5712,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -5729,7 +5729,7 @@ ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -5747,7 +5747,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_uint8_t_float: @@ -5756,7 +5756,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_uint8_t_float: @@ -5765,7 +5765,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -5784,7 +5784,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_uint8_t_float: @@ -5793,7 +5793,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_uint8_t_float: @@ -5802,7 +5802,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -5820,7 +5820,7 @@ ; CHECK-P10-NEXT: lfs f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_uint8_t_float: @@ -5829,7 +5829,7 @@ ; CHECK-P9-NEXT: lfs f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_uint8_t_float: @@ -5839,7 +5839,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -5859,7 +5859,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_uint8_t_float: @@ -5869,7 +5869,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_uint8_t_float: @@ -5879,7 +5879,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -5899,7 +5899,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_float: @@ -5911,7 +5911,7 @@ ; CHECK-P9-NEXT: lfsx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_float: @@ -5923,7 +5923,7 @@ ; CHECK-P8-NEXT: lfsx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -5945,7 +5945,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_uint8_t_float: @@ -5958,7 +5958,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_uint8_t_float: @@ -5971,7 +5971,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -5992,7 +5992,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r5 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_unalign64_uint8_t_float: @@ -6005,7 +6005,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -6026,7 +6026,7 @@ ; CHECK-P10-NEXT: lfsx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_uint8_t_float: @@ -6038,7 +6038,7 @@ ; CHECK-PREP10-NEXT: lfsx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -6056,7 +6056,7 @@ ; CHECK-NEXT: lfs f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %0 = load float, float* inttoptr (i64 4080 to float*), align 16 @@ -6072,7 +6072,7 @@ ; CHECK-P10-NEXT: lfs f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_uint8_t_float: @@ -6081,7 +6081,7 @@ ; CHECK-P9-NEXT: lfs f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_uint8_t_float: @@ -6091,7 +6091,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 9999900 to float*), align 4 @@ -6109,7 +6109,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r4) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_unalign64_uint8_t_float: @@ -6121,7 +6121,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_unalign64_uint8_t_float: @@ -6133,7 +6133,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 1000000000001 to float*), align 4 @@ -6150,7 +6150,7 @@ ; CHECK-P10-NEXT: lfs f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_uint8_t_float: @@ -6161,7 +6161,7 @@ ; CHECK-P9-NEXT: lfs f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_uint8_t_float: @@ -6172,7 +6172,7 @@ ; CHECK-P8-NEXT: lfsx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load float, float* inttoptr (i64 1000000000000 to float*), align 4096 @@ -6187,7 +6187,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_0_uint8_t_double: @@ -6195,7 +6195,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_0_uint8_t_double: @@ -6203,7 +6203,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = inttoptr i64 %ptr to double* @@ -6219,7 +6219,7 @@ ; CHECK-NEXT: lfd f0, 8(r3) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8 @@ -6237,7 +6237,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align32_uint8_t_double: @@ -6247,7 +6247,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 99999000 @@ -6267,7 +6267,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r5 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_unalign64_uint8_t_double: @@ -6279,7 +6279,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000001 @@ -6298,7 +6298,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_align64_uint8_t_double: @@ -6309,7 +6309,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 1000000000000 @@ -6326,7 +6326,7 @@ ; CHECK-NEXT: lfdx f0, r3, r4 ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %off @@ -6344,7 +6344,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_or_uint8_t_double: @@ -6353,7 +6353,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_or_uint8_t_double: @@ -6362,7 +6362,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %conv = zext i8 %off to i64 @@ -6381,7 +6381,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint16_uint8_t_double: @@ -6390,7 +6390,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint16_uint8_t_double: @@ -6399,7 +6399,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 6 @@ -6417,7 +6417,7 @@ ; CHECK-P10-NEXT: lfd f0, 24(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align16_uint8_t_double: @@ -6426,7 +6426,7 @@ ; CHECK-P9-NEXT: lfd f0, 24(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align16_uint8_t_double: @@ -6436,7 +6436,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -4096 @@ -6456,7 +6456,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint32_uint8_t_double: @@ -6466,7 +6466,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint32_uint8_t_double: @@ -6476,7 +6476,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 99999 @@ -6496,7 +6496,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_double: @@ -6508,7 +6508,7 @@ ; CHECK-P9-NEXT: lfdx f0, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_double: @@ -6520,7 +6520,7 @@ ; CHECK-P8-NEXT: lfdx f0, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %and = and i64 %ptr, -1000341504 @@ -6542,7 +6542,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_not_disjoint64_uint8_t_double: @@ -6555,7 +6555,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_not_disjoint64_uint8_t_double: @@ -6568,7 +6568,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %or = or i64 %ptr, 1000000000001 @@ -6589,7 +6589,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r5 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_unalign64_uint8_t_double: @@ -6602,7 +6602,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -6623,7 +6623,7 @@ ; CHECK-P10-NEXT: lfdx f0, r3, r4 ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-PREP10-LABEL: ld_disjoint_align64_uint8_t_double: @@ -6635,7 +6635,7 @@ ; CHECK-PREP10-NEXT: lfdx f0, r3, r4 ; CHECK-PREP10-NEXT: xscvdpsxws f0, f0 ; CHECK-PREP10-NEXT: mffprwz r3, f0 -; CHECK-PREP10-NEXT: clrldi r3, r3, 32 +; CHECK-PREP10-NEXT: clrldi r3, r3, 56 ; CHECK-PREP10-NEXT: blr entry: %and = and i64 %ptr, -1099511627776 @@ -6653,7 +6653,7 @@ ; CHECK-NEXT: lfd f0, 4080(0) ; CHECK-NEXT: xscvdpsxws f0, f0 ; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr entry: %0 = load double, double* inttoptr (i64 4080 to double*), align 16 @@ -6669,7 +6669,7 @@ ; CHECK-P10-NEXT: lfd f0, -27108(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align32_uint8_t_double: @@ -6678,7 +6678,7 @@ ; CHECK-P9-NEXT: lfd f0, -27108(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align32_uint8_t_double: @@ -6688,7 +6688,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 9999900 to double*), align 8 @@ -6706,7 +6706,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r4) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_unalign64_uint8_t_double: @@ -6718,7 +6718,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_unalign64_uint8_t_double: @@ -6730,7 +6730,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 1000000000001 to double*), align 8 @@ -6747,7 +6747,7 @@ ; CHECK-P10-NEXT: lfd f0, 0(r3) ; CHECK-P10-NEXT: xscvdpsxws f0, f0 ; CHECK-P10-NEXT: mffprwz r3, f0 -; CHECK-P10-NEXT: clrldi r3, r3, 32 +; CHECK-P10-NEXT: clrldi r3, r3, 56 ; CHECK-P10-NEXT: blr ; ; CHECK-P9-LABEL: ld_cst_align64_uint8_t_double: @@ -6758,7 +6758,7 @@ ; CHECK-P9-NEXT: lfd f0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: clrldi r3, r3, 32 +; CHECK-P9-NEXT: clrldi r3, r3, 56 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: ld_cst_align64_uint8_t_double: @@ -6769,7 +6769,7 @@ ; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: clrldi r3, r3, 56 ; CHECK-P8-NEXT: blr entry: %0 = load double, double* inttoptr (i64 1000000000000 to double*), align 4096 Index: llvm/test/CodeGen/RISCV/rv64d-double-convert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rv64d-double-convert.ll +++ llvm/test/CodeGen/RISCV/rv64d-double-convert.ll @@ -22,6 +22,7 @@ ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 ; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz +; RV64ID-NEXT: sext.w a0, a0 ; RV64ID-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -53,7 +54,8 @@ ; RV64ID-LABEL: sext_fptoui: ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: sext.w a0, a0 ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 @@ -64,6 +66,8 @@ ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 ; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: slli a0, a0, 32 +; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 Index: llvm/test/CodeGen/RISCV/rv64f-float-convert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rv64f-float-convert.ll +++ llvm/test/CodeGen/RISCV/rv64f-float-convert.ll @@ -22,6 +22,7 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: sext.w a0, a0 ; RV64IF-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 @@ -53,7 +54,8 @@ ; RV64IF-LABEL: sext_fptoui: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: sext.w a0, a0 ; RV64IF-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 @@ -64,6 +66,8 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: slli a0, a0, 32 +; RV64IF-NEXT: srli a0, a0, 32 ; RV64IF-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 Index: llvm/test/CodeGen/RISCV/rv64f-half-convert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rv64f-half-convert.ll +++ llvm/test/CodeGen/RISCV/rv64f-half-convert.ll @@ -20,6 +20,7 @@ ; RV64IZFH-LABEL: sext_fptosi: ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: sext.w a0, a0 ; RV64IZFH-NEXT: ret %1 = fptosi half %a to i32 ret i32 %1 @@ -48,7 +49,8 @@ define signext i32 @sext_fptoui(half %a) nounwind { ; RV64IZFH-LABEL: sext_fptoui: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: sext.w a0, a0 ; RV64IZFH-NEXT: ret %1 = fptoui half %a to i32 ret i32 %1 @@ -58,6 +60,8 @@ ; RV64IZFH-LABEL: zext_fptoui: ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: slli a0, a0, 32 +; RV64IZFH-NEXT: srli a0, a0, 32 ; RV64IZFH-NEXT: ret %1 = fptoui half %a to i32 ret i32 %1 Index: llvm/test/CodeGen/X86/avx-cvt-2.ll =================================================================== --- llvm/test/CodeGen/X86/avx-cvt-2.ll +++ llvm/test/CodeGen/X86/avx-cvt-2.ll @@ -12,7 +12,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -26,7 +29,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -40,8 +46,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -55,8 +63,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/avx-fp2int.ll =================================================================== --- llvm/test/CodeGen/X86/avx-fp2int.ll +++ llvm/test/CodeGen/X86/avx-fp2int.ll @@ -7,8 +7,7 @@ ; CHECK-LABEL: test1: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %c = fptoui <4 x double> %d to <4 x i8> @@ -18,8 +17,7 @@ ; CHECK-LABEL: test2: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %c = fptosi <4 x double> %d to <4 x i8> Index: llvm/test/CodeGen/X86/avx512-cvt.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-cvt.ll +++ llvm/test/CodeGen/X86/avx512-cvt.ll @@ -2356,6 +2356,7 @@ ; NOVLDQ: # %bb.0: ; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -2364,6 +2365,7 @@ ; VLDQ-LABEL: test_4f64tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLDQ-NEXT: vpmovd2m %xmm0, %k1 ; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2371,6 +2373,7 @@ ; VLNODQ-LABEL: test_4f64tosb: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 ; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} ; VLNODQ-NEXT: retq @@ -2379,6 +2382,7 @@ ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -2392,6 +2396,7 @@ ; NOVLDQ-LABEL: test_8f64tosb: ; NOVLDQ: # %bb.0: ; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; NOVLDQ-NEXT: retq @@ -2399,6 +2404,7 @@ ; VLDQ-LABEL: test_8f64tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; VLDQ-NEXT: vpmovd2m %ymm0, %k1 ; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2406,6 +2412,7 @@ ; VLNODQ-LABEL: test_8f64tosb: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 ; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 ; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; VLNODQ-NEXT: retq @@ -2413,6 +2420,7 @@ ; DQNOVL-LABEL: test_8f64tosb: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: retq @@ -2426,6 +2434,7 @@ ; NOVLDQ: # %bb.0: ; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -2435,6 +2444,7 @@ ; VLDQ-LABEL: test_2f32tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLDQ-NEXT: vpmovd2m %xmm0, %k1 ; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2442,6 +2452,7 @@ ; VLNODQ-LABEL: test_2f32tosb: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 ; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; VLNODQ-NEXT: retq @@ -2450,6 +2461,7 @@ ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -2465,6 +2477,7 @@ ; NOVLDQ: # %bb.0: ; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -2473,6 +2486,7 @@ ; VLDQ-LABEL: test_4f32tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLDQ-NEXT: vpmovd2m %xmm0, %k1 ; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2480,6 +2494,7 @@ ; VLNODQ-LABEL: test_4f32tosb: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 ; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 ; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} ; VLNODQ-NEXT: retq @@ -2488,6 +2503,7 @@ ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 @@ -2501,6 +2517,7 @@ ; NOVLDQ-LABEL: test_8f32tosb: ; NOVLDQ: # %bb.0: ; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; NOVLDQ-NEXT: retq @@ -2508,6 +2525,7 @@ ; VLDQ-LABEL: test_8f32tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; VLDQ-NEXT: vpmovd2m %ymm0, %k1 ; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2515,6 +2533,7 @@ ; VLNODQ-LABEL: test_8f32tosb: ; VLNODQ: # %bb.0: ; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 ; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 ; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; VLNODQ-NEXT: retq @@ -2522,6 +2541,7 @@ ; DQNOVL-LABEL: test_8f32tosb: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: retq @@ -2534,6 +2554,7 @@ ; NODQ-LABEL: test_16f32tosb: ; NODQ: # %bb.0: ; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; NODQ-NEXT: vpslld $31, %zmm0, %zmm0 ; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; NODQ-NEXT: retq @@ -2541,6 +2562,7 @@ ; VLDQ-LABEL: test_16f32tosb: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; VLDQ-NEXT: vpslld $31, %zmm0, %zmm0 ; VLDQ-NEXT: vpmovd2m %zmm0, %k1 ; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; VLDQ-NEXT: retq @@ -2548,6 +2570,7 @@ ; DQNOVL-LABEL: test_16f32tosb: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 +; DQNOVL-NEXT: vpslld $31, %zmm0, %zmm0 ; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 ; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; DQNOVL-NEXT: retq Index: llvm/test/CodeGen/X86/fptoui-may-overflow.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fptoui-may-overflow.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s + +; We can't only generate "cvttps2dq %xmm0, %xmm0" for this function, because +; there may be overflow case in fptoui. The old DAG optimization will optimize +; "%f = fptoui <4 x float> %arg to <4 x i8>" to +; "%f = fptosi <4 x float> %arg to <4 x i16>" + "AssertZext %f, ValueType:ch:i8" +; that is not equal when overflow occur in fptoui. + +define <16 x i8> @src(<4 x float> %arg) { +; CHECK-LABEL: src: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %f = fptoui <4 x float> %arg to <4 x i8> + %s = shufflevector <4 x i8> %f, <4 x i8> undef, <16 x i32> + %ss = shufflevector <16 x i8> %s, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %ss +} Index: llvm/test/CodeGen/X86/min-legal-vector-width.ll =================================================================== --- llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -627,8 +627,10 @@ ; CHECK-LABEL: test_16f32tosb_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1 +; CHECK-NEXT: vpslld $31, %ymm1, %ymm1 ; CHECK-NEXT: vpmovd2m %ymm1, %k0 ; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm1 +; CHECK-NEXT: vpslld $31, %ymm1, %ymm1 ; CHECK-NEXT: vpmovd2m %ymm1, %k1 ; CHECK-NEXT: kunpckbw %k0, %k1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} @@ -643,6 +645,7 @@ ; CHECK-LABEL: test_16f32tosb_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm1 +; CHECK-NEXT: vpslld $31, %zmm1, %zmm1 ; CHECK-NEXT: vpmovd2m %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/pr48727.ll =================================================================== --- llvm/test/CodeGen/X86/pr48727.ll +++ llvm/test/CodeGen/X86/pr48727.ll @@ -5,15 +5,19 @@ ; CHECK-LABEL: PR48727: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vcvttpd2dqy 0, %xmm0 +; CHECK-NEXT: vpmovdw %xmm0, %xmm0 ; CHECK-NEXT: vcvttpd2dqy 128, %xmm1 -; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: vpmovdw %xmm1, %xmm1 ; CHECK-NEXT: vcvttpd2dqy 160, %xmm2 +; CHECK-NEXT: vpmovdw %xmm2, %xmm2 +; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; CHECK-NEXT: vcvttpd2dqy (%rax), %xmm2 +; CHECK-NEXT: vpmovdw %xmm2, %xmm2 ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 16(%rax) +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6] +; CHECK-NEXT: vpermi2q %ymm1, %ymm0, %ymm2 +; CHECK-NEXT: vmovdqu %ymm2, 16(%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: Index: llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll =================================================================== --- llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -1829,43 +1829,43 @@ ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-32-NEXT: packssdw %xmm0, %xmm0 +; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-64-NEXT: packssdw %xmm0, %xmm0 +; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a, metadata !"fpexcept.strict") #0 @@ -1888,31 +1888,31 @@ ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a, metadata !"fpexcept.strict") #0 @@ -1924,49 +1924,49 @@ ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: packssdw %xmm0, %xmm0 +; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: packssdw %xmm0, %xmm0 +; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a, metadata !"fpexcept.strict") #0 @@ -1992,35 +1992,35 @@ ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a, metadata !"fpexcept.strict") #0 @@ -2031,29 +2031,29 @@ ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-32-NEXT: packssdw %xmm0, %xmm0 -; SSE-32-NEXT: packsswb %xmm0, %xmm0 +; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-64-NEXT: packssdw %xmm0, %xmm0 -; SSE-64-NEXT: packsswb %xmm0, %xmm0 +; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8: @@ -2065,8 +2065,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: @@ -2083,6 +2082,7 @@ ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl @@ -2090,6 +2090,7 @@ ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq @@ -2097,15 +2098,13 @@ ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8: @@ -2117,8 +2116,7 @@ ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: @@ -2136,32 +2134,32 @@ ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: packssdw %xmm0, %xmm0 -; SSE-32-NEXT: packsswb %xmm0, %xmm0 +; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: packssdw %xmm0, %xmm0 -; SSE-64-NEXT: packsswb %xmm0, %xmm0 +; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8: @@ -2175,8 +2173,7 @@ ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8: @@ -2195,6 +2192,7 @@ ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl @@ -2203,6 +2201,7 @@ ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq @@ -2211,16 +2210,14 @@ ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8: @@ -2234,8 +2231,7 @@ ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8: @@ -3110,29 +3106,29 @@ ; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: packssdw %xmm0, %xmm0 -; SSE-32-NEXT: packsswb %xmm0, %xmm0 +; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 +; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: packssdw %xmm0, %xmm0 -; SSE-64-NEXT: packsswb %xmm0, %xmm0 +; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 +; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8: @@ -3144,8 +3140,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8: @@ -3162,6 +3157,7 @@ ; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl @@ -3169,6 +3165,7 @@ ; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq @@ -3176,15 +3173,13 @@ ; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8: @@ -3196,8 +3191,7 @@ ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8: @@ -3229,6 +3223,7 @@ ; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -3238,6 +3233,7 @@ ; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} @@ -3246,6 +3242,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -3255,6 +3252,7 @@ ; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0 ; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0 ; AVX512VLDQ-NEXT: ret{{[l|q]}} Index: llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll =================================================================== --- llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll +++ llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll @@ -1164,7 +1164,7 @@ ; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double> %a, @@ -1176,7 +1176,7 @@ ; CHECK-LABEL: strict_vector_fptoui_v4f64_to_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double> %a, @@ -1188,16 +1188,14 @@ ; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: ret{{[l|q]}} ; @@ -1211,8 +1209,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: ret{{[l|q]}} ; @@ -1231,16 +1228,14 @@ ; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: ret{{[l|q]}} ; @@ -1254,8 +1249,7 @@ ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: ret{{[l|q]}} ; @@ -1280,6 +1274,7 @@ ; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1289,6 +1284,7 @@ ; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} @@ -1298,6 +1294,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i1: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -1307,6 +1304,7 @@ ; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1: ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX512DQVL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0 ; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper @@ -1423,7 +1421,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; @@ -1466,7 +1467,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; @@ -1509,8 +1513,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; @@ -1551,8 +1557,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; @@ -1593,13 +1601,17 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 @@ -1610,6 +1622,7 @@ ; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} @@ -1620,6 +1633,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i1: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 @@ -1630,6 +1644,7 @@ ; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1: ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0 ; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 @@ -1645,7 +1660,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: vzeroupper ; AVX-NEXT: ret{{[l|q]}} ; Index: llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll =================================================================== --- llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -716,6 +716,7 @@ ; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} @@ -726,6 +727,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i1: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 @@ -834,6 +836,7 @@ ; AVX512VL-LABEL: strict_vector_fptosi_v16f32_to_v16i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttps2dq %zmm0, %zmm0 +; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 @@ -843,6 +846,7 @@ ; AVX512DQ-LABEL: strict_vector_fptosi_v16f32_to_v16i1: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 Index: llvm/test/CodeGen/X86/vec_cast2.ll =================================================================== --- llvm/test/CodeGen/X86/vec_cast2.ll +++ llvm/test/CodeGen/X86/vec_cast2.ll @@ -98,8 +98,10 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %res = fptosi <8 x float> %src to <8 x i8> @@ -111,7 +113,10 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %res = fptosi <8 x float> %src to <8 x i16> @@ -122,8 +127,7 @@ ; CHECK-LABEL: cvt_v4f32_v4i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: retl %res = fptosi <4 x float> %src to <4 x i8> ret <4 x i8> %res @@ -133,7 +137,7 @@ ; CHECK-LABEL: cvt_v4f32_v4i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: retl %res = fptosi <4 x float> %src to <4 x i16> ret <4 x i16> %res @@ -144,8 +148,10 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %res = fptoui <8 x float> %src to <8 x i8> @@ -157,7 +163,10 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %res = fptoui <8 x float> %src to <8 x i16> @@ -168,8 +177,7 @@ ; CHECK-LABEL: cvt_v4f32_v4u8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: retl %res = fptoui <4 x float> %src to <4 x i8> ret <4 x i8> %res @@ -179,7 +187,7 @@ ; CHECK-LABEL: cvt_v4f32_v4u16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: retl %res = fptoui <4 x float> %src to <4 x i16> ret <4 x i16> %res Index: llvm/test/CodeGen/X86/vec_cast3.ll =================================================================== --- llvm/test/CodeGen/X86/vec_cast3.ll +++ llvm/test/CodeGen/X86/vec_cast3.ll @@ -67,8 +67,7 @@ ; CHECK-LABEL: cvt_v2f32_v2i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: retl %res = fptosi <2 x float> %src to <2 x i8> ret <2 x i8> %res @@ -78,7 +77,7 @@ ; CHECK-LABEL: cvt_v2f32_v2i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: retl %res = fptosi <2 x float> %src to <2 x i16> ret <2 x i16> %res @@ -97,8 +96,7 @@ ; CHECK-LABEL: cvt_v2f32_v2u8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: retl %res = fptoui <2 x float> %src to <2 x i8> ret <2 x i8> %res @@ -108,7 +106,7 @@ ; CHECK-LABEL: cvt_v2f32_v2u16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: retl %res = fptoui <2 x float> %src to <2 x i16> ret <2 x i16> %res Index: llvm/test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2297,22 +2297,21 @@ ; SSE-LABEL: fptosi_2f32_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm0, %xmm0 -; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq ; ; VEX-LABEL: fptosi_2f32_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 -; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f32_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptosi_2f32_to_2i8: @@ -2324,8 +2323,7 @@ ; AVX512DQ-LABEL: fptosi_2f32_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: @@ -2341,13 +2339,13 @@ ; SSE-LABEL: fptosi_2f32_to_2i16: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f32_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: retq %cvt = fptosi <2 x float> %a to <2 x i16> ret <2 x i16> %cvt @@ -2357,6 +2355,7 @@ ; SSE-LABEL: fptoui_2f32_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq @@ -2364,15 +2363,13 @@ ; VEX-LABEL: fptoui_2f32_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 -; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f32_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f32_to_2i8: @@ -2384,8 +2381,7 @@ ; AVX512DQ-LABEL: fptoui_2f32_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: @@ -2407,7 +2403,7 @@ ; AVX-LABEL: fptoui_2f32_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: retq %cvt = fptoui <2 x float> %a to <2 x i16> ret <2 x i16> %cvt @@ -2417,22 +2413,21 @@ ; SSE-LABEL: fptosi_2f64_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm0, %xmm0 -; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq ; ; VEX-LABEL: fptosi_2f64_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f64_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptosi_2f64_to_2i8: @@ -2444,8 +2439,7 @@ ; AVX512DQ-LABEL: fptosi_2f64_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: @@ -2461,13 +2455,13 @@ ; SSE-LABEL: fptosi_2f64_to_2i16: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f64_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: retq %cvt = fptosi <2 x double> %a to <2 x i16> ret <2 x i16> %cvt @@ -2477,6 +2471,7 @@ ; SSE-LABEL: fptoui_2f64_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq @@ -2484,15 +2479,13 @@ ; VEX-LABEL: fptoui_2f64_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f64_to_2i8: @@ -2504,8 +2497,7 @@ ; AVX512DQ-LABEL: fptoui_2f64_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: @@ -2527,7 +2519,7 @@ ; AVX-LABEL: fptoui_2f64_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: retq %cvt = fptoui <2 x double> %a to <2 x i16> ret <2 x i16> %cvt @@ -2536,20 +2528,27 @@ define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) { ; SSE-LABEL: fptosi_8f64_to_8i16: ; SSE: # %bb.0: -; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 -; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,1,3,4,5,6,7] +; SSE-NEXT: cvttpd2dq %xmm3, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,2,0] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] +; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE-NEXT: retq ; ; VEX-LABEL: fptosi_8f64_to_8i16: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 +; VEX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 -; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; VEX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; VEX-NEXT: vzeroupper ; VEX-NEXT: retq ; @@ -2589,25 +2588,26 @@ define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) { ; SSE-LABEL: fptoui_8f64_to_8i16: ; SSE: # %bb.0: -; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 -; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] -; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,1,3,4,5,6,7] +; SSE-NEXT: cvttpd2dq %xmm3, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,2,0] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] +; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE-NEXT: retq ; ; VEX-LABEL: fptoui_8f64_to_8i16: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 +; VEX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VEX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; VEX-NEXT: vzeroupper ; VEX-NEXT: retq @@ -2649,35 +2649,52 @@ ; SSE-LABEL: fptosi_16f32_to_16i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm3, %xmm3 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0] +; SSE-NEXT: pand %xmm4, %xmm3 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pand %xmm4, %xmm2 +; SSE-NEXT: packuswb %xmm3, %xmm2 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE-NEXT: pand %xmm4, %xmm1 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: packuswb %xmm1, %xmm0 +; SSE-NEXT: packuswb %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: fptosi_16f32_to_16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm3[1],xmm4[2],xmm3[3],xmm4[4],xmm3[5],xmm4[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: fptosi_16f32_to_16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2695,11 +2712,16 @@ ; SSE-LABEL: fptoui_16f32_to_16i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm3, %xmm3 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0] +; SSE-NEXT: pand %xmm4, %xmm3 ; SSE-NEXT: cvttps2dq %xmm2, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pand %xmm4, %xmm2 +; SSE-NEXT: packuswb %xmm3, %xmm2 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE-NEXT: pand %xmm4, %xmm1 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: packuswb %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -2707,10 +2729,18 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm3[1],xmm4[2],xmm3[3],xmm4[4],xmm3[5],xmm4[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2718,11 +2748,15 @@ ; AVX2-LABEL: fptoui_16f32_to_16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq