diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -825,9 +825,11 @@ UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false) UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false) UNALIASED_CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false) +UNALIASED_CUSTOM_BUILTIN(vsx_build_pair, "vW256*VV", false) UNALIASED_CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false) UNALIASED_CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false) +UNALIASED_CUSTOM_BUILTIN(mma_build_acc, "vW512*VVVV", false) UNALIASED_CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false) UNALIASED_CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true) UNALIASED_CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15731,6 +15731,20 @@ } return Call; } + if (BuiltinID == PPC::BI__builtin_vsx_build_pair || + BuiltinID == PPC::BI__builtin_mma_build_acc) { + bool IsLE = getTarget().isLittleEndian(); + if (IsLE) { + SmallVector RevOps; + unsigned NumVecs = 2; + if (BuiltinID == PPC::BI__builtin_mma_build_acc) + NumVecs = 4; + for (unsigned i = NumVecs; i > 0; --i) + RevOps.push_back(Ops[i]); + for (unsigned i = 1; i <= NumVecs; ++i) + Ops[i] = RevOps[i - 1]; + } + } bool Accumulate; switch (BuiltinID) { #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ diff --git a/clang/test/CodeGen/builtins-ppc-pair-mma.c b/clang/test/CodeGen/builtins-ppc-pair-mma.c --- a/clang/test/CodeGen/builtins-ppc-pair-mma.c +++ b/clang/test/CodeGen/builtins-ppc-pair-mma.c @@ -16,6 +16,21 @@ *((__vector_quad *)resp) = res; } +// CHECK-LABEL: @test1a( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.build.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void test1a(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_quad res; + __builtin_mma_build_acc(&res, vc, vc, vc, vc); + *((__vector_quad *)resp) = res; +} + // CHECK-LABEL: @test2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* @@ -57,6 +72,21 @@ *((__vector_pair *)resp) = res; } +// CHECK-LABEL: @test3a( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.build.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: ret void +// +void test3a(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_vsx_build_pair(&res, vc, vc); + *((__vector_pair *)resp) = res; +} + // CHECK-LABEL: @test4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* diff --git a/clang/test/Sema/ppc-pair-mma-types.c b/clang/test/Sema/ppc-pair-mma-types.c --- a/clang/test/Sema/ppc-pair-mma-types.c +++ b/clang/test/Sema/ppc-pair-mma-types.c @@ -253,6 +253,18 @@ *vpp = vp3; } +// local +void testVPLocal1(int *ptr, vector unsigned char vc) { + __vector_pair *vpp = (__vector_pair *)ptr; + __vector_pair vp1 = *vpp; + __vector_pair vp2; + __builtin_vsx_build_pair(&vp2, vc, vc); + __vector_pair vp3; + __vector_quad vq; + __builtin_mma_xvf64ger(&vq, vp3, vc); + *vpp = vp3; +} + // struct field struct TestVPStruct { int a; diff --git a/clang/test/SemaCXX/ppc-pair-mma-types.cpp b/clang/test/SemaCXX/ppc-pair-mma-types.cpp --- a/clang/test/SemaCXX/ppc-pair-mma-types.cpp +++ b/clang/test/SemaCXX/ppc-pair-mma-types.cpp @@ -368,6 +368,7 @@ return *vpp; // expected-error {{invalid use of PPC MMA type}} }; auto f3 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_assemble_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}} + auto f4 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_build_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}} } // cast diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1439,6 +1439,10 @@ Intrinsic<[llvm_v256i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_vsx_build_pair : + Intrinsic<[llvm_v256i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_vsx_disassemble_pair : Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty], [llvm_v256i1_ty], [IntrNoMem]>; @@ -1448,6 +1452,11 @@ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_mma_build_acc : + Intrinsic<[llvm_v512i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_ppc_mma_disassemble_acc : Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [llvm_v512i1_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1610,6 +1610,9 @@ def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, v16i8:$vs3, v16i8:$vs2)), (XXMTACC Concats.VecsToVecQuad)>; + def : Pat<(v512i1 (int_ppc_mma_build_acc v16i8:$vs1, v16i8:$vs0, + v16i8:$vs3, v16i8:$vs2)), + (XXMTACC Concats.VecsToVecQuad)>; def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)), Extracts.Vec0>; @@ -1626,6 +1629,8 @@ Concats.VecsToVecPair0>; def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)), Concats.VecsToVecPair0>; + def : Pat<(v256i1 (int_ppc_vsx_build_pair v16i8:$vs1, v16i8:$vs0)), + Concats.VecsToVecPair0>; def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 0)), (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>; def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, 1)), diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -40,6 +40,101 @@ ret void } +; build_acc +declare <512 x i1> @llvm.ppc.mma.build.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +define void @build_acc1(<512 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: build_acc1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: xxlor vs0, v2, v2 +; CHECK-NEXT: xxlor vs1, v3, v3 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs3, v3, v3 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: build_acc1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-NEXT: xxlor vs1, v3, v3 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs3, v3, v3 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.build.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %0, <512 x i1>* %ptr, align 64 + ret void +} + +; build_acc +define void @build_acc2(<512 x i1>* %ptr, <16 x i8> %in1, <16 x i8> %in2, <16 x i8> %in3, <16 x i8> %in4) { +; CHECK-LABEL: build_acc2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor vs0, v5, v5 +; CHECK-NEXT: xxlor vs1, v4, v4 +; CHECK-NEXT: # kill: def $vsrp0 killed $vsrp0 def $uacc0 +; CHECK-NEXT: vmr v4, v3 +; CHECK-NEXT: vmr v5, v2 +; CHECK-NEXT: xxlor vs2, v4, v4 +; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: build_acc2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlor vs0, v5, v5 +; CHECK-BE-NEXT: xxlor vs1, v4, v4 +; CHECK-BE-NEXT: # kill: def $vsrp0 killed $vsrp0 def $uacc0 +; CHECK-BE-NEXT: vmr v4, v3 +; CHECK-BE-NEXT: vmr v5, v2 +; CHECK-BE-NEXT: xxlor vs2, v4, v4 +; CHECK-BE-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: blr +entry: +%0 = tail call <512 x i1> @llvm.ppc.mma.build.acc(<16 x i8> %in4, <16 x i8> %in3, <16 x i8> %in2, <16 x i8> %in1) +store <512 x i1> %0, <512 x i1>* %ptr, align 64 +ret void +} + +; build_pair +declare <256 x i1> @llvm.ppc.vsx.build.pair(<16 x i8>, <16 x i8>) +define void @build_pair(<256 x i1>* %ptr, <16 x i8> %in1, <16 x i8> %in2) { +; CHECK-LABEL: build_pair: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v4, v3 +; CHECK-NEXT: vmr v5, v2 +; CHECK-NEXT: stxv v4, 16(r3) +; CHECK-NEXT: stxv v5, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: build_pair: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v4, v3 +; CHECK-BE-NEXT: vmr v5, v2 +; CHECK-BE-NEXT: stxv v2, 16(r3) +; CHECK-BE-NEXT: stxv v4, 0(r3) +; CHECK-BE-NEXT: blr +entry: +%0 = tail call <256 x i1> @llvm.ppc.vsx.build.pair(<16 x i8> %in2, <16 x i8> %in1) +store <256 x i1> %0, <256 x i1>* %ptr, align 64 +ret void +} + ; xxmtacc declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) { @@ -185,18 +280,18 @@ ; CHECK-LABEL: testBranch: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r7, 0 -; CHECK-NEXT: beq cr0, .LBB5_2 +; CHECK-NEXT: beq cr0, .LBB8_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: b .LBB5_3 -; CHECK-NEXT: .LBB5_2: # %if.else +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %if.else ; CHECK-NEXT: lxv vs1, 32(r3) ; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv vs3, 0(r3) ; CHECK-NEXT: lxv vs2, 16(r3) ; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-NEXT: .LBB5_3: # %if.end +; CHECK-NEXT: .LBB8_3: # %if.end ; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r3) ; CHECK-NEXT: stxv vs1, 32(r3) @@ -207,18 +302,18 @@ ; CHECK-BE-LABEL: testBranch: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: cmplwi r7, 0 -; CHECK-BE-NEXT: beq cr0, .LBB5_2 +; CHECK-BE-NEXT: beq cr0, .LBB8_2 ; CHECK-BE-NEXT: # %bb.1: # %if.then ; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: b .LBB5_3 -; CHECK-BE-NEXT: .LBB5_2: # %if.else +; CHECK-BE-NEXT: b .LBB8_3 +; CHECK-BE-NEXT: .LBB8_2: # %if.else ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-BE-NEXT: .LBB5_3: # %if.end +; CHECK-BE-NEXT: .LBB8_3: # %if.end ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs0, 0(r3) @@ -397,7 +492,7 @@ ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB9_2: # %for.body +; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: rldic r7, r6, 4, 28 ; CHECK-NEXT: xxsetaccz acc2 @@ -432,7 +527,7 @@ ; CHECK-NEXT: stxv vs1, 160(r7) ; CHECK-NEXT: stxv vs2, 144(r7) ; CHECK-NEXT: stxv vs3, 128(r7) -; CHECK-NEXT: bdnz .LBB9_2 +; CHECK-NEXT: bdnz .LBB12_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr ; @@ -446,7 +541,7 @@ ; CHECK-BE-NEXT: mtctr r4 ; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB9_2: # %for.body +; CHECK-BE-NEXT: .LBB12_2: # %for.body ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: rldic r7, r6, 4, 28 ; CHECK-BE-NEXT: xxsetaccz acc2 @@ -481,7 +576,7 @@ ; CHECK-BE-NEXT: stxv vs0, 128(r7) ; CHECK-BE-NEXT: stxv vs3, 176(r7) ; CHECK-BE-NEXT: stxv vs2, 160(r7) -; CHECK-BE-NEXT: bdnz .LBB9_2 +; CHECK-BE-NEXT: bdnz .LBB12_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll @@ -51,6 +51,42 @@ ret void } +; build_pair +declare <256 x i1> @llvm.ppc.vsx.build.pair(<16 x i8>, <16 x i8>) +define void @build_pair(<256 x i1>* %ptr, <16 x i8> %vc) { +; CHECK-LABEL: build_pair: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: blr +; +; CHECK-NOMMA-LABEL: build_pair: +; CHECK-NOMMA: # %bb.0: # %entry +; CHECK-NOMMA-NEXT: vmr v3, v2 +; CHECK-NOMMA-NEXT: stxv v2, 16(r3) +; CHECK-NOMMA-NEXT: stxv v3, 0(r3) +; CHECK-NOMMA-NEXT: blr +; +; CHECK-BE-LABEL: build_pair: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: stxv v2, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: blr +; +; CHECK-BE-NOMMA-LABEL: build_pair: +; CHECK-BE-NOMMA: # %bb.0: # %entry +; CHECK-BE-NOMMA-NEXT: vmr v3, v2 +; CHECK-BE-NOMMA-NEXT: stxv v2, 16(r3) +; CHECK-BE-NOMMA-NEXT: stxv v2, 0(r3) +; CHECK-BE-NOMMA-NEXT: blr +entry: + %0 = tail call <256 x i1> @llvm.ppc.vsx.build.pair(<16 x i8> %vc, <16 x i8> %vc) + store <256 x i1> %0, <256 x i1>* %ptr, align 32 + ret void +} + ; disassemble_pair declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>) define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {