diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1247,9 +1247,16 @@ } if (Subtarget.hasP9Altivec()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); - + if (Subtarget.isISA3_1()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + } else { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); @@ -1258,9 +1265,6 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } - - if (Subtarget.isISA3_1()) - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); } if (Subtarget.pairedVectorMemops()) { @@ -10752,7 +10756,6 @@ SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue V3 = Op.getOperand(2); if (VT == MVT::v2f64 && C) return Op; @@ -10761,18 +10764,10 @@ if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64()) return SDValue(); // On P10, we have legal lowering for constant and variable indices for - // integer vectors. + // all vectors. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v2i64) - return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3); - // For f32 and f64 vectors, we have legal lowering for variable indices. - // For f32 we also have legal lowering when the element is loaded from - // memory. - if (VT == MVT::v4f32 || VT == MVT::v2f64) { - if (!C || (VT == MVT::v4f32 && isa(V2))) - return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3); + VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) return Op; - } } // Before P10, we have legal lowering for constant indices but not for diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -29,9 +29,6 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [ SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1> ]>; -def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3> -]>; //===----------------------------------------------------------------------===// // ISA 3.1 specific PPCISD nodes. @@ -45,7 +42,6 @@ def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, []>; def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; -def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>; //===----------------------------------------------------------------------===// @@ -2797,135 +2793,128 @@ } def InsertEltShift { - dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32); + dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32)); dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30); dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29); + dag Left1 = (RLWINM $rB, 1, 0, 30); + dag Left2 = (RLWINM $rB, 2, 0, 29); dag Left3 = (RLWINM8 $rB, 3, 0, 28); } let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in { // Indexed vector insert element - def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)), - (VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>; - def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)), + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), + (VINSBRX $vDi, InsertEltShift.Sub32, $rA)>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), (VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>; - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)), + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>; - def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)), + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, $rA)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), + (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; - - // Immediate vector insert element - foreach i = [0, 1, 2, 3] in { - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))), - (VINSW $vDi, !mul(!sub(3, i), 4), $rA)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))), - (VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))), - (VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))), - (VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>; + let AddedComplexity = 400 in { + // Immediate vector insert element + foreach Idx = [0, 1, 2, 3] in { + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), + (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)), + (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)), + (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)), + (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>; + } + foreach i = [0, 1] in + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))), + (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>; } - foreach i = [0, 1] in - def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))), - (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>; } let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in { // Indexed vector insert element - def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)), + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)), (VINSBLX $vDi, $rB, $rA)>; - def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)), - (VINSHLX $vDi, $rB, $rA)>; - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)), - (VINSWLX $vDi, $rB, $rA)>; - - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)), - (VINSWLX $vDi, $rB, Bitcast.FltToInt)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), - i32:$rB)), - (VINSWLX $vDi, $rB, (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), - i32:$rB)), - (VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), - i32:$rB)), - (VINSWLX $vDi, $rB, (LWZX memrr:$rA))>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)), + (VINSHLX $vDi, InsertEltShift.Left1, $rA)>; + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)), + (VINSWLX $vDi, InsertEltShift.Left2, $rA)>; - // Immediate vector insert element - foreach i = [0, 1, 2, 3] in { - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))), - (VINSW $vDi, !mul(i, 4), $rA)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), - (i32 i))), - (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), - (i32 i))), - (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), - (i32 i))), - (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>; - } + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), + (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)), + (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)), + (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>; + def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)), + (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>; } let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { // Indexed vector insert element - def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)), - (VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>; - def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)), + def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)), + (VINSBLX $vDi, InsertEltShift.Sub32, $rA)>; + def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)), (VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>; - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)), + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>; - def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)), + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, $rA)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), + (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; +} +let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in { // Immediate vector insert element - foreach i = [0, 1, 2, 3] in { - def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))), - (VINSW $vDi, !mul(i, 4), $rA)>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))), - (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))), - (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))), - (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>; + foreach Ty = [i32, i64] in { + foreach Idx = [0, 1, 2, 3] in { + def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), + (VINSW $vDi, !mul(Idx, 4), $rA)>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), + (Ty Idx))), + (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), + (Ty Idx))), + (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>; + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), + (Ty Idx))), + (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>; + } } - foreach i = [0, 1] in - def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))), - (VINSD $vDi, !mul(i, 8), $rA)>; + + foreach Idx = [0, 1] in + def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)), + (VINSD $vDi, !mul(Idx, 8), $rA)>; } diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -69,7 +69,8 @@ ; ; CHECK-32-P10-LABEL: testHalf: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: vinshlx 2, 6, 4 +; CHECK-32-P10-NEXT: slwi 3, 6, 1 +; CHECK-32-P10-NEXT: vinshlx 2, 3, 4 ; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i16 @@ -106,7 +107,8 @@ ; ; CHECK-32-P10-LABEL: testWord: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: vinswlx 2, 6, 4 +; CHECK-32-P10-NEXT: slwi 3, 6, 2 +; CHECK-32-P10-NEXT: vinswlx 2, 3, 4 ; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i32 @@ -186,8 +188,10 @@ ; CHECK-32-P10-LABEL: testDoubleword: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: add 5, 6, 6 -; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: slwi 6, 5, 2 +; CHECK-32-P10-NEXT: vinswlx 2, 6, 3 ; CHECK-32-P10-NEXT: addi 3, 5, 1 +; CHECK-32-P10-NEXT: slwi 3, 3, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 3, 4 ; CHECK-32-P10-NEXT: blr entry: @@ -280,18 +284,17 @@ ; ; CHECK-64-P10-LABEL: testFloat1: ; CHECK-64-P10: # %bb.0: # %entry -; CHECK-64-P10-NEXT: xscvdpspn 0, 1 -; CHECK-64-P10-NEXT: extsw 4, 4 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 -; CHECK-64-P10-NEXT: mffprwz 3, 0 -; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: xscvdpspn 35, 1 +; CHECK-64-P10-NEXT: extsw 3, 4 +; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testFloat1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: xscvdpspn 0, 1 -; CHECK-32-P10-NEXT: mffprwz 3, 0 -; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-32-P10-NEXT: xscvdpspn 35, 1 +; CHECK-32-P10-NEXT: slwi 3, 4, 2 +; CHECK-32-P10-NEXT: vinswvlx 2, 3, 3 ; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 @@ -347,8 +350,10 @@ ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lwz 6, 0(3) ; CHECK-32-P10-NEXT: lwz 3, 1(3) +; CHECK-32-P10-NEXT: slwi 4, 4, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 -; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: slwi 4, 5, 2 +; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i8* %b to float* @@ -415,10 +420,12 @@ ; CHECK-32-P10-LABEL: testFloat3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lis 6, 1 +; CHECK-32-P10-NEXT: slwi 4, 4, 2 ; CHECK-32-P10-NEXT: lwzx 6, 3, 6 ; CHECK-32-P10-NEXT: lwz 3, 0(3) ; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 -; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: slwi 4, 5, 2 +; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -199,20 +199,18 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-LABEL: testFloat1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdpspn vs0, f1 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 -; CHECK-NEXT: mffprwz r3, f0 -; CHECK-NEXT: vinswrx v2, r4, r3 +; CHECK-NEXT: xscvdpspn v3, f1 +; CHECK-NEXT: extsw r3, r6 +; CHECK-NEXT: slwi r3, r3, 2 +; CHECK-NEXT: vinswvrx v2, r3, v3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpspn vs0, f1 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: vinswlx v2, r4, r3 +; CHECK-BE-NEXT: xscvdpspn v3, f1 +; CHECK-BE-NEXT: extsw r3, r6 +; CHECK-BE-NEXT: slwi r3, r3, 2 +; CHECK-BE-NEXT: vinswvlx v2, r3, v3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat1: