diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1252,7 +1252,6 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); } else { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); @@ -10754,6 +10753,24 @@ if (VT == MVT::v2f64 && C) return Op; + if (Subtarget.hasP9Vector()) { + // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way + // because on P10, it allows this specific insert_vector_elt load pattern to + // utilize the refactored load and store infrastructure in order to exploit + // prefixed loads. + // Additionally, on Power9, this enables utilizing direct moves (which are + // cheaper than the alternative sequence). + if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) && + (isa(V2))) { + SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1); + SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2); + SDValue InsVecElt = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector, + BitcastLoad, Op.getOperand(2)); + return DAG.getBitcast(MVT::v4f32, InsVecElt); + } + } + if (Subtarget.isISA3_1()) { if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64()) return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2814,31 +2814,27 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), i64:$rB)), (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; let AddedComplexity = 400 in { // Immediate vector insert element foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>; } foreach i = [0, 1] in @@ -2858,11 +2854,9 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)), - (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i32:$rB)), (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>; - def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)), + def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load XForm : $rA)), i32 : $rB)), (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>; } @@ -2879,20 +2873,18 @@ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), i64:$rB)), (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; } @@ -2902,13 +2894,10 @@ foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), - (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load PDForm:$rA)), (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), + def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load XForm:$rA)), (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>; } diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -306,32 +306,32 @@ ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lwz 6, 0(3) ; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stwx 6, 7, 4 ; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -16 -; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: lwz 3, 1(3) -; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stxv 0, -32(1) ; CHECK-64-NEXT: stwx 3, 5, 4 -; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat2: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: lwz 6, 0(3) -; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: addi 7, 1, -16 ; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 -; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stxv 34, -16(1) ; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-32-NEXT: stwx 6, 7, 4 -; CHECK-32-NEXT: addi 4, 1, -16 -; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: addi 4, 1, -48 +; CHECK-32-NEXT: lxv 0, -16(1) ; CHECK-32-NEXT: lwz 3, 1(3) -; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stxv 0, -48(1) ; CHECK-32-NEXT: stwx 3, 4, 5 -; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: lxv 34, -48(1) ; CHECK-32-NEXT: blr ; ; CHECK-64-P10-LABEL: testFloat2: @@ -371,36 +371,36 @@ ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 ; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-DAG: addi 7, 1, -16 ; CHECK-64-NEXT: lwzx 6, 3, 6 -; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stwx 6, 7, 4 ; CHECK-64-NEXT: li 4, 1 -; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: lwzx 3, 3, 4 ; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -16 -; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: stxv 0, -32(1) ; CHECK-64-NEXT: stwx 3, 5, 4 -; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat3: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: lis 6, 1 -; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: addi 7, 1, -16 ; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 ; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-32-NEXT: lwzx 6, 3, 6 -; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stxv 34, -16(1) ; CHECK-32-NEXT: stwx 6, 7, 4 -; CHECK-32-NEXT: addi 4, 1, -16 -; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: addi 4, 1, -48 +; CHECK-32-NEXT: lxv 0, -16(1) ; CHECK-32-NEXT: lwz 3, 0(3) -; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stxv 0, -48(1) ; CHECK-32-NEXT: stwx 3, 4, 5 -; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: lxv 34, -48(1) ; CHECK-32-NEXT: blr ; ; CHECK-64-P10-LABEL: testFloat3: @@ -419,10 +419,9 @@ ; ; CHECK-32-P10-LABEL: testFloat3: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: lis 6, 1 -; CHECK-32-P10-NEXT: slwi 4, 4, 2 -; CHECK-32-P10-NEXT: lwzx 6, 3, 6 +; CHECK-32-P10-NEXT: plwz 6, 65536(3), 0 ; CHECK-32-P10-NEXT: lwz 3, 0(3) +; CHECK-32-P10-NEXT: slwi 4, 4, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 ; CHECK-32-P10-NEXT: slwi 4, 5, 2 ; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 @@ -478,21 +477,21 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { ; CHECK-64-LABEL: testFloatImm2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: lfs 0, 0(3) -; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: lwz 4, 0(3) +; CHECK-64-NEXT: lwz 3, 4(3) +; CHECK-64-NEXT: mtfprwz 0, 4 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 -; CHECK-64-NEXT: lfs 0, 4(3) -; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: mtfprwz 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloatImm2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lfs 0, 0(3) -; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: lwz 4, 0(3) +; CHECK-32-NEXT: lwz 3, 4(3) +; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 -; CHECK-32-NEXT: lfs 0, 4(3) -; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: mtfprwz 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr ; @@ -526,24 +525,24 @@ ; CHECK-64-LABEL: testFloatImm3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 4, 4 -; CHECK-64-NEXT: lfsx 0, 3, 4 +; CHECK-64-NEXT: lwzx 4, 3, 4 +; CHECK-64-NEXT: mtfprwz 0, 4 ; CHECK-64-NEXT: li 4, 1 ; CHECK-64-NEXT: rldic 4, 4, 38, 25 -; CHECK-64-NEXT: xscvdpspn 0, 0 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 -; CHECK-64-NEXT: lfsx 0, 3, 4 -; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: mtfprwz 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloatImm3: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: lis 4, 4 -; CHECK-32-NEXT: lfsx 0, 3, 4 -; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: lwzx 4, 3, 4 +; CHECK-32-NEXT: lwz 3, 0(3) +; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 -; CHECK-32-NEXT: lfs 0, 0(3) -; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: mtfprwz 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr ; @@ -559,8 +558,7 @@ ; ; CHECK-32-P10-LABEL: testFloatImm3: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: lis 4, 4 -; CHECK-32-P10-NEXT: lwzx 4, 3, 4 +; CHECK-32-P10-NEXT: plwz 4, 262144(3), 0 ; CHECK-32-P10-NEXT: lwz 3, 0(3) ; CHECK-32-P10-NEXT: vinsw 2, 4, 0 ; CHECK-32-P10-NEXT: vinsw 2, 3, 8 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -214,15 +214,15 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { ; P9LE-LABEL: s2v_test_f1: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lfs f0, 0(r3) -; P9LE-NEXT: xscvdpspn vs0, f0 +; P9LE-NEXT: lwz r3, 0(r3) +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: xxinsertw v2, vs0, 12 ; P9LE-NEXT: blr ; ; P9BE-LABEL: s2v_test_f1: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfs f0, 0(r3) -; P9BE-NEXT: xscvdpspn vs0, f0 +; P9BE-NEXT: lwz r3, 0(r3) +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: xxinsertw v2, vs0, 0 ; P9BE-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -255,16 +255,16 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lwz r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -32 -; CHECK-P9-NEXT: stxv v2, -32(r1) +; CHECK-P9-NEXT: addi r6, r1, -16 +; CHECK-P9-NEXT: stxv v2, -16(r1) ; CHECK-P9-NEXT: stwx r3, r6, r4 ; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 -; CHECK-P9-NEXT: lxv vs0, -32(r1) +; CHECK-P9-NEXT: lxv vs0, -16(r1) ; CHECK-P9-NEXT: lwz r3, 1(r5) -; CHECK-P9-NEXT: addi r5, r1, -16 -; CHECK-P9-NEXT: stxv vs0, -16(r1) +; CHECK-P9-NEXT: addi r5, r1, -32 +; CHECK-P9-NEXT: stxv vs0, -32(r1) ; CHECK-P9-NEXT: stwx r3, r5, r4 -; CHECK-P9-NEXT: lxv v2, -16(r1) +; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr entry: %0 = bitcast i8* %b to float* @@ -310,19 +310,19 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -32 +; CHECK-P9-NEXT: addi r6, r1, -16 ; CHECK-P9-NEXT: lwzx r3, r5, r3 -; CHECK-P9-NEXT: stxv v2, -32(r1) +; CHECK-P9-NEXT: stxv v2, -16(r1) ; CHECK-P9-NEXT: stwx r3, r6, r4 ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 -; CHECK-P9-NEXT: lxv vs0, -32(r1) +; CHECK-P9-NEXT: lxv vs0, -16(r1) ; CHECK-P9-NEXT: rldic r3, r3, 36, 27 ; CHECK-P9-NEXT: lwzx r3, r5, r3 -; CHECK-P9-NEXT: addi r5, r1, -16 -; CHECK-P9-NEXT: stxv vs0, -16(r1) +; CHECK-P9-NEXT: addi r5, r1, -32 +; CHECK-P9-NEXT: stxv vs0, -32(r1) ; CHECK-P9-NEXT: stwx r3, r5, r4 -; CHECK-P9-NEXT: lxv v2, -16(r1) +; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 @@ -384,11 +384,11 @@ ; ; CHECK-P9-LABEL: testFloatImm2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lfs f0, 0(r5) -; CHECK-P9-NEXT: xscvdpspn vs0, f0 +; CHECK-P9-NEXT: lwz r3, 0(r5) +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: lwz r3, 4(r5) ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 -; CHECK-P9-NEXT: lfs f0, 4(r5) -; CHECK-P9-NEXT: xscvdpspn vs0, f0 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr entry: @@ -426,13 +426,13 @@ ; CHECK-P9-LABEL: testFloatImm3: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lis r3, 4 -; CHECK-P9-NEXT: lfsx f0, r5, r3 +; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: rldic r3, r3, 38, 25 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 -; CHECK-P9-NEXT: lfsx f0, r5, r3 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 +; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr entry: