Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1816,8 +1816,7 @@ // Output dag used to bitcast f32 to i32 and f64 to i64 def Bitcast { - dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI (XSCVDPSPN $A), - (XSCVDPSPN $A), 3), sub_64))); + dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XSCVDPSPN $A), sub_64))); dag DblToLong = (i64 (MFVSRD $A)); } @@ -2212,7 +2211,7 @@ } def AlignValues { - dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); + dag F32_TO_BE_WORD1 = (v4f32 (XSCVDPSPN $B)); dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64); } @@ -2796,6 +2795,9 @@ v4i32, FltToUIntLoad.A, (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1), (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>; +def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)), + (f32 (fpround f64:$A)), (f32 (fpround f64:$A)))), + (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$A), sub_64), 0))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)), @@ -4095,14 +4097,30 @@ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>; // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst), @@ -4274,14 +4292,30 @@ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; +def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)), + (v4f32 (XXINSERTW v4f32:$A, + (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>; def : Pat<(v8i16 (PPCld_vec_be ForceXForm:$src)), (COPY_TO_REGCLASS (LXVH8X ForceXForm:$src), VRRC)>; Index: llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll +++ llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll @@ -743,14 +743,12 @@ ; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xscvdpspn 0, 1 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: xscvdpspn 0, 1 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: blr entry: @@ -762,14 +760,12 @@ ; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xscvdpspn 0, 1 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 4 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: xscvdpspn 0, 1 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 4 ; CHECK-32-NEXT: blr entry: @@ -781,14 +777,12 @@ ; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xscvdpspn 0, 1 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: xscvdpspn 0, 1 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr entry: @@ -800,14 +794,12 @@ ; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xscvdpspn 0, 1 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 12 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: xscvdpspn 0, 1 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 12 ; CHECK-32-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -203,18 +203,18 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: lwz 3, 1(3) -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lwz 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lwz 3, 1(3) +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat2: @@ -246,21 +246,21 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: lwzx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: lwzx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: lwzx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat3: @@ -297,7 +297,6 @@ ; CHECK-64-LABEL: testFloatImm1: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: xscvdpspn 0, 1 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr @@ -305,7 +304,6 @@ ; CHECK-32-LABEL: testFloatImm1: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: xscvdpspn 0, 1 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr @@ -320,11 +318,9 @@ ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lfs 0, 0(3) ; CHECK-64-NEXT: xscvdpspn 0, 0 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 ; CHECK-64-NEXT: lfs 0, 4(3) ; CHECK-64-NEXT: xscvdpspn 0, 0 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr ; @@ -332,11 +328,9 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: lfs 0, 0(3) ; CHECK-32-NEXT: xscvdpspn 0, 0 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: lfs 0, 4(3) ; CHECK-32-NEXT: xscvdpspn 0, 0 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr entry: @@ -358,11 +352,9 @@ ; CHECK-64-NEXT: li 4, 1 ; CHECK-64-NEXT: rldic 4, 4, 38, 25 ; CHECK-64-NEXT: xscvdpspn 0, 0 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 0 ; CHECK-64-NEXT: lfsx 0, 3, 4 ; CHECK-64-NEXT: xscvdpspn 0, 0 -; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-64-NEXT: xxinsertw 34, 0, 8 ; CHECK-64-NEXT: blr ; @@ -371,11 +363,9 @@ ; CHECK-32-NEXT: lis 4, 4 ; CHECK-32-NEXT: lfsx 0, 3, 4 ; CHECK-32-NEXT: xscvdpspn 0, 0 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: lfs 0, 0(3) ; CHECK-32-NEXT: xscvdpspn 0, 0 -; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr entry: @@ -418,19 +408,19 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: ld 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble2: @@ -462,21 +452,21 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: ldx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: ldx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble3: Index: llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll =================================================================== --- llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll +++ llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll @@ -10,8 +10,8 @@ ; CHECK-P7: stfs 1, ; CHECK-P7: lwa 3, ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1 -; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3 -; CHECK: mffprwz 3, [[SHIFTREG]] +; CHECK-NOT: xxsldwi +; CHECK: mffprwz 3, [[CONVREG]] } define i64 @f64toi64(double %a) { @@ -50,8 +50,8 @@ ; CHECK-P7: stfs 1, ; CHECK-P7: lwz 3, ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1 -; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3 -; CHECK: mffprwz 3, [[SHIFTREG]] +; CHECK-NOT: xxsldwi +; CHECK: mffprwz 3, [[CONVREG]] } define i64 @f64toi64u(double %a) { Index: llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -506,11 +506,9 @@ entry: ; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ ; CHECK: xscvdpspn 0, 1 -; CHECK: xxsldwi 0, 0, 0, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ ; CHECK-BE: xscvdpspn 0, 1 -; CHECK-BE: xxsldwi 0, 0, 0, 3 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = insertelement <4 x float> %a, float %b, i32 0 ret <4 x float> %vecins @@ -520,11 +518,9 @@ entry: ; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ ; CHECK: xscvdpspn 0, 1 -; CHECK: xxsldwi 0, 0, 0, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ ; CHECK-BE: xscvdpspn 0, 1 -; CHECK-BE: xxsldwi 0, 0, 0, 3 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = insertelement <4 x float> %a, float %b, i32 1 ret <4 x float> %vecins @@ -534,11 +530,9 @@ entry: ; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ ; CHECK: xscvdpspn 0, 1 -; CHECK: xxsldwi 0, 0, 0, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ ; CHECK-BE: xscvdpspn 0, 1 -; CHECK-BE: xxsldwi 0, 0, 0, 3 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = insertelement <4 x float> %a, float %b, i32 2 ret <4 x float> %vecins @@ -548,11 +542,9 @@ entry: ; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ ; CHECK: xscvdpspn 0, 1 -; CHECK: xxsldwi 0, 0, 0, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ ; CHECK-BE: xscvdpspn 0, 1 -; CHECK-BE: xxsldwi 0, 0, 0, 3 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = insertelement <4 x float> %a, float %b, i32 3 ret <4 x float> %vecins Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll =================================================================== --- llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -216,7 +216,6 @@ ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfs f0, 0(r3) ; P9LE-NEXT: xscvdpspn vs0, f0 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 ; P9LE-NEXT: xxinsertw v2, vs0, 12 ; P9LE-NEXT: blr ; @@ -224,7 +223,6 @@ ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfs f0, 0(r3) ; P9BE-NEXT: xscvdpspn vs0, f0 -; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 ; P9BE-NEXT: xxinsertw v2, vs0, 0 ; P9BE-NEXT: blr ; Index: llvm/test/CodeGen/PowerPC/vec_insert_elt.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -200,21 +200,19 @@ ; CHECK-LABEL: testFloat1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpspn vs0, f1 -; CHECK-NEXT: extsw r3, r6 -; CHECK-NEXT: slwi r3, r3, 2 -; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3 -; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: vinswrx v2, r3, r4 +; CHECK-NEXT: extsw r4, r6 +; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn vs0, f1 -; CHECK-BE-NEXT: extsw r3, r6 -; CHECK-BE-NEXT: slwi r3, r3, 2 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: vinswlx v2, r3, r4 +; CHECK-BE-NEXT: extsw r4, r6 +; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat1: @@ -346,7 +344,6 @@ ; CHECK-LABEL: testFloatImm1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpspn vs0, f1 -; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-NEXT: xxinsertw v2, vs0, 12 ; CHECK-NEXT: xxinsertw v2, vs0, 4 ; CHECK-NEXT: blr @@ -354,7 +351,6 @@ ; CHECK-BE-LABEL: testFloatImm1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn vs0, f1 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-BE-NEXT: xxinsertw v2, vs0, 0 ; CHECK-BE-NEXT: xxinsertw v2, vs0, 8 ; CHECK-BE-NEXT: blr @@ -362,7 +358,6 @@ ; CHECK-P9-LABEL: testFloatImm1: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpspn vs0, f1 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr @@ -393,11 +388,9 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lfs f0, 0(r5) ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-P9-NEXT: lfs f0, 4(r5) ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr entry: @@ -439,11 +432,9 @@ ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: rldic r3, r3, 38, 25 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-P9-NEXT: lfsx f0, r5, r3 ; CHECK-P9-NEXT: xscvdpspn vs0, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr entry: @@ -738,3 +729,26 @@ ret <2 x double> %vecins } +define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 { +; CHECK-LABEL: testInsertDoubleToFloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsp f0, f1 +; CHECK-NEXT: xxinsertw v2, vs0, 8 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: testInsertDoubleToFloat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xscvdpsp f0, f1 +; CHECK-BE-NEXT: xxinsertw v2, vs0, 4 +; CHECK-BE-NEXT: blr +; +; CHECK-P9-LABEL: testInsertDoubleToFloat: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvdpsp f0, f1 +; CHECK-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-P9-NEXT: blr +entry: + %conv = fptrunc double %b to float + %vecins = insertelement <4 x float> %a, float %conv, i32 1 + ret <4 x float> %vecins +}