diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1235,7 +1235,7 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } - if (Subtarget.isISA3_1()) + if (Subtarget.isISA3_1() && Subtarget.isPPC64()) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); } @@ -10338,7 +10338,7 @@ if (VT == MVT::v2f64 && C) return Op; - if (Subtarget.isISA3_1()) { + if (Subtarget.isISA3_1() && Subtarget.isPPC64()) { // On P10, we have legal lowering for constant and variable indices for // integer vectors. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-64-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-32-P10 ; Byte indexed @@ -22,6 +24,20 @@ ; CHECK-32-NEXT: stbx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testByte: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsblx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testByte: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: addi 5, 1, -16 +; CHECK-32-P10-NEXT: clrlwi 3, 6, 28 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: stbx 4, 5, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i8 %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx @@ -48,6 +64,21 @@ ; CHECK-32-NEXT: sthx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testHalf: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: slwi 4, 4, 1 +; CHECK-64-P10-NEXT: vinshlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testHalf: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: addi 5, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 6, 1, 28, 30 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: sthx 4, 5, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i16 %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx @@ -74,6 +105,21 @@ ; CHECK-32-NEXT: stwx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testWord: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testWord: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: addi 5, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 6, 2, 28, 29 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: stwx 4, 5, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx @@ -96,6 +142,19 @@ ; CHECK-32-NEXT: xxinsertw 34, 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 12 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testWordImm: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsw 2, 3, 4 +; CHECK-64-P10-NEXT: vinsw 2, 3, 12 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testWordImm: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: mtfprwz 0, 4 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-P10-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 @@ -130,6 +189,28 @@ ; CHECK-32-NEXT: stwx 4, 5, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleword: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleword: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: add 5, 6, 6 +; CHECK-32-P10-NEXT: addi 7, 1, -32 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: rlwinm 6, 5, 2, 28, 29 +; CHECK-32-P10-NEXT: stwx 3, 7, 6 +; CHECK-32-P10-NEXT: addi 3, 5, 1 +; CHECK-32-P10-NEXT: addi 5, 1, -16 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stwx 4, 5, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx ret <2 x i64> %vecins @@ -151,6 +232,19 @@ ; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 12 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoublewordImm: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsd 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoublewordImm: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: mtfprwz 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-P10-NEXT: mtfprwz 0, 4 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 1 ret <2 x i64> %vecins @@ -170,6 +264,19 @@ ; CHECK-32-NEXT: mtfprwz 0, 4 ; CHECK-32-NEXT: xxinsertw 34, 0, 4 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoublewordImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: vinsd 2, 3, 0 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoublewordImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: mtfprwz 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-P10-NEXT: mtfprwz 0, 4 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 0 ret <2 x i64> %vecins @@ -195,6 +302,25 @@ ; CHECK-32-NEXT: stfsx 1, 4, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: xscvdpspn 0, 1 +; CHECK-64-P10-NEXT: extsw 3, 4 +; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-P10-NEXT: mffprwz 4, 0 +; CHECK-64-P10-NEXT: vinswlx 2, 3, 4 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: stfsx 1, 4, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 ret <4 x float> %vecins @@ -203,18 +329,18 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: lwz 3, 1(3) -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lwz 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lwz 3, 1(3) +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat2: @@ -232,6 +358,34 @@ ; CHECK-32-NEXT: stwx 3, 4, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lwz 6, 0(3) +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: lwz 3, 1(3) +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lwz 6, 0(3) +; CHECK-32-P10-NEXT: addi 7, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-P10-NEXT: stwx 6, 7, 4 +; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lwz 3, 1(3) +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stwx 3, 4, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i8* %b to float* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -246,21 +400,21 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: lwzx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stwx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: lwzx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stwx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: lwzx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testFloat3: @@ -279,6 +433,37 @@ ; CHECK-32-NEXT: stwx 3, 4, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloat3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0 +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-P10-NEXT: lwzx 3, 3, 4 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloat3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 6, 1 +; CHECK-32-P10-NEXT: addi 7, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-P10-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-P10-NEXT: lwzx 6, 3, 6 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: stwx 6, 7, 4 +; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lwz 3, 0(3) +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stwx 3, 4, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to float* @@ -309,6 +494,22 @@ ; CHECK-32-NEXT: xxinsertw 34, 0, 0 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: xscvdpspn 0, 1 +; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: xscvdpspn 0, 1 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 0 %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2 @@ -339,6 +540,26 @@ ; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lwz 4, 0(3) +; CHECK-64-P10-NEXT: lwz 3, 4(3) +; CHECK-64-P10-NEXT: vinsw 2, 4, 0 +; CHECK-64-P10-NEXT: vinsw 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfs 0, 0(3) +; CHECK-32-P10-NEXT: xscvdpspn 0, 0 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-P10-NEXT: lfs 0, 4(3) +; CHECK-32-P10-NEXT: xscvdpspn 0, 0 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i32* %b to float* %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1 @@ -378,6 +599,29 @@ ; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 ; CHECK-32-NEXT: xxinsertw 34, 0, 8 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testFloatImm3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: plwz 4, 262144(3), 0 +; CHECK-64-P10-NEXT: vinsw 2, 4, 0 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-P10-NEXT: lwzx 3, 3, 4 +; CHECK-64-P10-NEXT: vinsw 2, 3, 8 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testFloatImm3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 4, 4 +; CHECK-32-P10-NEXT: lfsx 0, 3, 4 +; CHECK-32-P10-NEXT: xscvdpspn 0, 0 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-P10-NEXT: lfs 0, 0(3) +; CHECK-32-P10-NEXT: xscvdpspn 0, 0 +; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to float* @@ -410,6 +654,23 @@ ; CHECK-32-NEXT: stfdx 1, 4, 3 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: mffprd 3, 1 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: addi 4, 1, -16 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: stxv 34, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 4, 3 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 %idx1 ret <2 x double> %vecins @@ -418,19 +679,19 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: ld 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble2: @@ -448,6 +709,34 @@ ; CHECK-32-NEXT: stfdx 1, 3, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: ld 6, 0(3) +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: pld 3, 1(3), 0 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: stfdx 0, 6, 4 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lfd 1, 1(3) +; CHECK-32-P10-NEXT: addi 3, 1, -16 +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 3, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i8* %b to double* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -462,21 +751,21 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 -; CHECK-64-DAG: ldx 6, 3, 6 -; CHECK-64-DAG: stxv 34, -32(1) -; CHECK-64-DAG: stdx 6, 7, 4 -; CHECK-64-DAG: li 4, 1 -; CHECK-64-DAG: lxv 0, -32(1) -; CHECK-64-DAG: rldic 4, 4, 36, 27 -; CHECK-64-DAG: ldx 3, 3, 4 -; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-DAG: addi 5, 1, -16 -; CHECK-64-DAG: stxv 0, -16(1) -; CHECK-64-DAG: stdx 3, 5, 4 -; CHECK-64-DAG: lxv 34, -16(1) +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: ldx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: testDouble3: @@ -495,6 +784,37 @@ ; CHECK-32-NEXT: stfdx 1, 3, 5 ; CHECK-32-NEXT: lxv 34, -16(1) ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDouble3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: pld 6, 65536(3), 0 +; CHECK-64-P10-NEXT: extsw 4, 4 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-P10-NEXT: ldx 3, 3, 4 +; CHECK-64-P10-NEXT: extsw 4, 5 +; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDouble3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 6, 1 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: lfdx 0, 3, 6 +; CHECK-32-P10-NEXT: addi 6, 1, -32 +; CHECK-32-P10-NEXT: stxv 34, -32(1) +; CHECK-32-P10-NEXT: stfdx 0, 6, 4 +; CHECK-32-P10-NEXT: lxv 0, -32(1) +; CHECK-32-P10-NEXT: lfd 1, 0(3) +; CHECK-32-P10-NEXT: addi 3, 1, -16 +; CHECK-32-P10-NEXT: stxv 0, -16(1) +; CHECK-32-P10-NEXT: stfdx 1, 3, 5 +; CHECK-32-P10-NEXT: lxv 34, -16(1) +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to double* @@ -521,6 +841,18 @@ ; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm1: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm1: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 0 ret <2 x double> %vecins @@ -538,6 +870,18 @@ ; CHECK-32-NEXT: lfd 0, 0(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm2: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lfd 0, 0(3) +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm2: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %0 = bitcast i32* %b to double* %1 = load double, double* %0, align 8 @@ -557,6 +901,18 @@ ; CHECK-32-NEXT: lfd 0, 4(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm3: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lfd 0, 4(3) +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm3: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 4(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 1 %0 = bitcast i32* %add.ptr to double* @@ -579,6 +935,20 @@ ; CHECK-32-NEXT: lfdx 0, 3, 4 ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm4: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: lis 4, 4 +; CHECK-64-P10-NEXT: lfdx 0, 3, 4 +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm4: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lis 4, 4 +; CHECK-32-P10-NEXT: lfdx 0, 3, 4 +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to double* @@ -601,6 +971,20 @@ ; CHECK-32-NEXT: lfd 0, 0(3) ; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 ; CHECK-32-NEXT: blr +; +; CHECK-64-P10-LABEL: testDoubleImm5: +; CHECK-64-P10: # %bb.0: # %entry +; CHECK-64-P10-NEXT: li 4, 1 +; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-P10-NEXT: lfdx 0, 3, 4 +; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-P10-NEXT: blr +; +; CHECK-32-P10-LABEL: testDoubleImm5: +; CHECK-32-P10: # %bb.0: # %entry +; CHECK-32-P10-NEXT: lfd 0, 0(3) +; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-P10-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736 %0 = bitcast i32* %add.ptr to double*