diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1247,7 +1247,7 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } - if (Subtarget.isISA3_1()) + if (Subtarget.isISA3_1() && Subtarget.isPPC64()) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); } @@ -10444,6 +10444,8 @@ return Op; if (Subtarget.isISA3_1()) { + if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64()) + return SDValue(); // On P10, we have legal lowering for constant and variable indices for // integer vectors. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1,3 +1,6 @@ +//-------------------------- Predicate definitions ---------------------------// +def IsPPC32 : Predicate<"!Subtarget->isPPC64()">; + // Mask immediates for MMA instructions (2, 4 and 8 bits). def Msk2Imm : ImmLeaf(Imm); }]>; def Msk4Imm : ImmLeaf(Imm); }]>; @@ -2752,7 +2755,38 @@ (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>; } -let Predicates = [IsISA3_1, HasVSX, IsBigEndian] in { +let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in { + // Indexed vector insert element + def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)), + (VINSBLX $vDi, $rB, $rA)>; + def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)), + (VINSHLX $vDi, $rB, $rA)>; + def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)), + (VINSWLX $vDi, $rB, $rA)>; + + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)), + (VINSWLX $vDi, $rB, Bitcast.FltToInt)>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)), + (VINSWLX $vDi, $rB, (LWZ memri:$rA))>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)), + (VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i32:$rB)), + (VINSWLX $vDi, $rB, (LWZX memrr:$rA))>; + + // Immediate vector insert element + foreach i = [0, 1, 2, 3] in { + def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))), + (VINSW $vDi, !mul(i, 4), $rA)>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i32 i))), + (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i32 i))), + (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>; + def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i32 i))), + (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>; + } +} + +let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { // Indexed vector insert element def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)), (VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>; diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-64-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-32-P10 ; Byte indexed @@ -22,6 +24,16 @@ ; CHECK-32-NEXT: stbx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testByte: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsblx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testByte: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinsblx 2, 6, 4 +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i8 %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx @@ -48,6 +60,17 @@ ; CHECK-32-NEXT: sthx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testHalf: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: slwi 4, 4, 1 +; CHECK-64-P10-NEXT: vinshlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testHalf: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinshlx 2, 6, 4 +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i16 %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx @@ -74,6 +97,17 @@ ; CHECK-32-NEXT: stwx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testWord: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testWord: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinswlx 2, 6, 4 +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx @@ -96,6 +130,18 @@ ; CHECK-32-NEXT: xxinsertw 34, 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 12 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testWordImm: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsw 2, 3, 4 +; CHECK-64-P10-NEXT: vinsw 2, 3, 12 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testWordImm: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinsw 2, 4, 4 +; CHECK-32-P10-NEXT: vinsw 2, 4, 12 +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 @@ -130,6 +176,20 @@ ; CHECK-32-NEXT: stwx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleword: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleword: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: add 5, 6, 6 +; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: addi 3, 5, 1 +; CHECK-32-P10-NEXT: vinswlx 2, 3, 4 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx ret <2 x i64> %vecins @@ -151,6 +211,17 @@ ; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 12 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoublewordImm: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsd 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoublewordImm: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinsw 2, 3, 8 +; CHECK-32-P10-NEXT: vinsw 2, 4, 12 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 1 ret <2 x i64> %vecins @@ -170,6 +241,17 @@ ; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 4 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoublewordImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsd 2, 3, 0 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoublewordImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: vinsw 2, 3, 0 +; CHECK-32-P10-NEXT: vinsw 2, 4, 4 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 0 ret <2 x i64> %vecins @@ -195,6 +277,24 @@ ; CHECK-32-NEXT: stfsx 1, 4, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: xscvdpspn 0, 1 +; CHECK-64-P10-NEXT: extsw 3, 4 +; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-P10-NEXT: mffprwz 4, 0 +; CHECK-64-P10-NEXT: vinswlx 2, 3, 4 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: xscvdpspn 0, 1 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: mffprwz 3, 0 +; CHECK-32-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 ret <4 x float> %vecins @@ -203,18 +303,18 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: lwz 3, 1(3) -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lwz 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lwz 3, 1(3) +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat2: @@ -232,6 +332,26 @@ ; CHECK-32-NEXT: stwx 3, 4, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lwz 6, 0(3) +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: lwz 3, 1(3) +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lwz 6, 0(3) +; CHECK-32-P10-NEXT: lwz 3, 1(3) +; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i8* %b to float* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -246,21 +366,21 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: lwzx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: lwzx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: lwzx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat3: @@ -279,6 +399,29 @@ ; CHECK-32-NEXT: stwx 3, 4, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0 +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-P10-NEXT: lwzx 3, 3, 4 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 6, 1 +; CHECK-32-P10-NEXT: lwzx 6, 3, 6 +; CHECK-32-P10-NEXT: lwz 3, 0(3) +; CHECK-32-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-32-P10-NEXT: vinswlx 2, 5, 3 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to float* @@ -309,6 +452,22 @@ ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: xscvdpspn 0, 1 +; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: xscvdpspn 0, 1 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 0 %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2 @@ -339,6 +498,22 @@ ; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lwz 4, 0(3) +; CHECK-64-P10-NEXT: lwz 3, 4(3) +; CHECK-64-P10-NEXT: vinsw 2, 4, 0 +; CHECK-64-P10-NEXT: vinsw 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lwz 4, 0(3) +; CHECK-32-P10-NEXT: lwz 3, 4(3) +; CHECK-32-P10-NEXT: vinsw 2, 4, 0 +; CHECK-32-P10-NEXT: vinsw 2, 3, 8 +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i32* %b to float* %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1 @@ -378,6 +553,25 @@ ; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: plwz 4, 262144(3), 0 +; CHECK-64-P10-NEXT: vinsw 2, 4, 0 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-P10-NEXT: lwzx 3, 3, 4 +; CHECK-64-P10-NEXT: vinsw 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 4, 4 +; CHECK-32-P10-NEXT: lwzx 4, 3, 4 +; CHECK-32-P10-NEXT: lwz 3, 0(3) +; CHECK-32-P10-NEXT: vinsw 2, 4, 0 +; CHECK-32-P10-NEXT: vinsw 2, 3, 8 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to float* @@ -410,6 +604,23 @@ ; CHECK-32-NEXT: stfdx 1, 4, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: mffprd 3, 1 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 4, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 %idx1 ret <2 x double> %vecins @@ -418,19 +629,19 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: ld 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble2: @@ -448,6 +659,34 @@ ; CHECK-32-NEXT: stfdx 1, 3, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: ld 6, 0(3) +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: pld 3, 1(3), 0 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: stfdx 0, 6, 4 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lfd 1, 1(3) +; CHECK-32-P10-NEXT: addi 3, 1, -16 +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 3, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i8* %b to double* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -462,21 +701,21 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: ldx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: ldx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble3: @@ -495,6 +734,37 @@ ; CHECK-32-NEXT: stfdx 1, 3, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: pld 6, 65536(3), 0 +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-P10-NEXT: ldx 3, 3, 4 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 6, 1 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: lfdx 0, 3, 6 +; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: stfdx 0, 6, 4 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lfd 1, 0(3) +; CHECK-32-P10-NEXT: addi 3, 1, -16 +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 3, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to double* @@ -521,6 +791,18 @@ ; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 0 ret <2 x double> %vecins @@ -538,6 +820,18 @@ ; CHECK-32-NEXT: lfd 0, 0(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lfd 0, 0(3) +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i32* %b to double* %1 = load double, double* %0, align 8 @@ -557,6 +851,18 @@ ; CHECK-32-NEXT: lfd 0, 4(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lfd 0, 4(3) +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 4(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 1 %0 = bitcast i32* %add.ptr to double* @@ -579,6 +885,20 @@ ; CHECK-32-NEXT: lfdx 0, 3, 4 ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm4: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lis 4, 4 +; CHECK-64-P10-NEXT: lfdx 0, 3, 4 +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm4: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 4, 4 +; CHECK-32-P10-NEXT: lfdx 0, 3, 4 +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to double* @@ -601,6 +921,20 @@ ; CHECK-32-NEXT: lfd 0, 0(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm5: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-P10-NEXT: lfdx 0, 3, 4 +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm5: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736 %0 = bitcast i32* %add.ptr to double*