diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3367,6 +3367,15 @@ def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)), (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>; + +defm : ScalToVecWPermute< + v4i32, (i32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; +defm : ScalToVecWPermute< + v4f32, (f32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; } // HasVSX, HasP8Vector, IsBigEndian // Big endian Power8 64Bit VSX subtarget. @@ -3381,14 +3390,6 @@ (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>; -defm : ScalToVecWPermute< - v4i32, (i32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; -defm : ScalToVecWPermute< - v4f32, (f32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { @@ -422,9 +422,8 @@ ; ; P8-AIX-32-LABEL: s2v_test_f1: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) ; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, 0, r3 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 ; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr @@ -466,33 +465,12 @@ ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f2: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f2: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f2: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f2: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f2: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -542,8 +520,7 @@ ; P9-AIX-32-LABEL: s2v_test_f3: ; P9-AIX-32: # %bb.0: # %entry ; P9-AIX-32-NEXT: slwi r4, r4, 2 -; P9-AIX-32-NEXT: lfsx f0, r3, r4 -; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P9-AIX-32-NEXT: vmrgow v2, v3, v2 ; P9-AIX-32-NEXT: blr ; @@ -557,8 +534,7 @@ ; P8-AIX-32-LABEL: s2v_test_f3: ; P8-AIX-32: # %bb.0: # %entry ; P8-AIX-32-NEXT: slwi r4, r4, 2 -; P8-AIX-32-NEXT: lfsx f0, r3, r4 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P8-AIX-32-NEXT: vmrgow v2, v3, v2 ; P8-AIX-32-NEXT: blr entry: @@ -601,33 +577,12 @@ ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f4: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f4: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f4: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f4: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f4: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -663,31 +618,11 @@ ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f5: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f5: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 0(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f5: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f5: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f5: +; AIX: # %bb.0: # %entry +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %0 = load float, float* %ptr1, align 8 %vecins = insertelement <2 x float> %vec, float %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -659,15 +659,14 @@ ; ; AIX-P8-32-LABEL: testFloatImm2: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lfs f0, 0(r3) ; AIX-P8-32-NEXT: lwz r4, L..C8(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-P8-32-NEXT: li r5, 4 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 4(r3) -; AIX-P8-32-NEXT: lwz r3, L..C9(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C9(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 ; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 ; AIX-P8-32-NEXT: blr entry: @@ -732,17 +731,15 @@ ; ; AIX-P8-32-LABEL: testFloatImm3: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lis r4, 4 -; AIX-P8-32-NEXT: lfsx f0, r3, r4 ; AIX-P8-32-NEXT: lwz r4, L..C10(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lis r5, 4 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 0(r3) -; AIX-P8-32-NEXT: lwz r3, L..C11(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C11(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 -; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r4 +; AIX-P8-32-NEXT: lxsiwzx v4, 0, r3 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 ; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536