diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -20,11 +20,15 @@ // MMA builtins that are using their own format documented below. #if defined(BUILTIN) && !defined(CUSTOM_BUILTIN) -# define CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) BUILTIN(__builtin_##ID, "i.", "t") +# define CUSTOM_BUILTIN(ID, INTR, TYPES, ACCUMULATE) \ + BUILTIN(__builtin_##ID, "i.", "t") #elif defined(CUSTOM_BUILTIN) && !defined(BUILTIN) # define BUILTIN(ID, TYPES, ATTRS) #endif +#define UNALIASED_CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) \ + CUSTOM_BUILTIN(ID, ID, TYPES, ACCUMULATE) + BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n") // This is just a placeholder, the types and attributes are wrong. @@ -674,90 +678,103 @@ // Because these built-ins rely on target-dependent types and to avoid pervasive // change, they are type checked manually in Sema using custom type descriptors. // The first argument of the CUSTOM_BUILTIN macro is the name of the built-in -// with its prefix, the second argument specifies the type of the function +// with its prefix, the second argument is the name of the intrinsic this +// built-in generates, the third argument specifies the type of the function // (result value, then each argument) as follows: // i -> Unsigned integer followed by the greatest possible value for that // argument or 0 if no constraint on the value. // (e.g. i15 for a 4-bits value) -// V -> Vector type used with MMA builtins (vector unsigned char) +// V -> Vector type used with MMA built-ins (vector unsigned char) // W -> PPC Vector type followed by the size of the vector type. // (e.g. W512 for __vector_quad) // any other descriptor -> Fall back to generic type descriptor decoding. // The 'C' suffix can be used as a suffix to specify the const type. // The '*' suffix can be used as a suffix to specify a pointer to a type. -// The third argument is set to true if the builtin accumulates its result into +// The fourth argument is set to true if the built-in accumulates its result into // its given accumulator. -CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false) -CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false) -CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false) -CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false) - -CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false) -CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false) -CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true) -CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true) -CUSTOM_BUILTIN(mma_xxsetaccz, "vW512*", false) -CUSTOM_BUILTIN(mma_xvi4ger8, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvi8ger4, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false) -CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false) -CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false) -CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false) -CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false) -CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false) -CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false) -CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false) -CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false) -CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvi8ger4spp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvi16ger2pp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvi16ger2spp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_pmxvi4ger8pp, "vW512*VVi15i15i255", true) -CUSTOM_BUILTIN(mma_pmxvi8ger4pp, "vW512*VVi15i15i15", true) -CUSTOM_BUILTIN(mma_pmxvi8ger4spp, "vW512*VVi15i15i15", true) -CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true) -CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true) -CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true) -CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true) -CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true) -CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true) -CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true) -CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true) -CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true) -CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true) -CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true) -CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true) -CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false) -CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false) -CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true) -CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true) -CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true) -CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true) +// Provided builtins with _mma_ prefix for compatibility. +CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLLiW256C*", false) +CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLLiW256C*", false) +CUSTOM_BUILTIN(mma_assemble_pair, vsx_assemble_pair, "vW256*VV", false) +CUSTOM_BUILTIN(mma_disassemble_pair, vsx_disassemble_pair, "vv*W256*", false) + +// UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have +// the same name as that of the intrinsic they generate, i.e. the +// ID and INTR are the same. +// This avoids repeating the ID and INTR in the macro expression. + +UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false) +UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false) +UNALIASED_CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false) +UNALIASED_CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false) + +UNALIASED_CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false) +UNALIASED_CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false) +UNALIASED_CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true) +UNALIASED_CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true) +UNALIASED_CUSTOM_BUILTIN(mma_xxsetaccz, "vW512*", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvi4ger8, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4spp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2pp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2spp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi4ger8pp, "vW512*VVi15i15i255", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4pp, "vW512*VVi15i15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4spp, "vW512*VVi15i15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false) +UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true) +UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true) // FIXME: Obviously incomplete. #undef BUILTIN #undef CUSTOM_BUILTIN +#undef UNALIASED_CUSTOM_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15349,7 +15349,7 @@ // use custom code generation to expand a builtin call with a pointer to a // load (if the corresponding instruction accumulates its result) followed by // the call to the intrinsic and a store of the result. -#define CUSTOM_BUILTIN(Name, Types, Accumulate) \ +#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \ case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { @@ -15358,7 +15358,8 @@ // return values. So, here we emit code extracting these values from the // intrinsic results and storing them using that pointer. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || - BuiltinID == PPC::BI__builtin_vsx_disassemble_pair) { + BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || + BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { unsigned NumVecs = 2; auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { @@ -15381,16 +15382,19 @@ } bool Accumulate; switch (BuiltinID) { - #define CUSTOM_BUILTIN(Name, Types, Acc) \ + #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ case PPC::BI__builtin_##Name: \ - ID = Intrinsic::ppc_##Name; \ + ID = Intrinsic::ppc_##Intr; \ Accumulate = Acc; \ break; #include "clang/Basic/BuiltinsPPC.def" } if (BuiltinID == PPC::BI__builtin_vsx_lxvp || - BuiltinID == PPC::BI__builtin_vsx_stxvp) { - if (BuiltinID == PPC::BI__builtin_vsx_lxvp) { + BuiltinID == PPC::BI__builtin_vsx_stxvp || + BuiltinID == PPC::BI__builtin_mma_lxvp || + BuiltinID == PPC::BI__builtin_mma_stxvp) { + if (BuiltinID == PPC::BI__builtin_vsx_lxvp || + BuiltinID == PPC::BI__builtin_mma_lxvp) { Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); } else { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3331,7 +3331,7 @@ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); case PPC::BI__builtin_vsx_xxpermx: return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); -#define CUSTOM_BUILTIN(Name, Types, Acc) \ +#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ case PPC::BI__builtin_##Name: \ return SemaBuiltinPPCMMACall(TheCall, Types); #include "clang/Basic/BuiltinsPPC.def" diff --git a/clang/test/CodeGen/builtins-ppc-pair-mma.c b/clang/test/CodeGen/builtins-ppc-pair-mma.c --- a/clang/test/CodeGen/builtins-ppc-pair-mma.c +++ b/clang/test/CodeGen/builtins-ppc-pair-mma.c @@ -5,7 +5,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -46,7 +46,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* -// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6 +// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: ret void // void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -78,10 +78,10 @@ // CHECK-LABEL: @test5( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -94,10 +94,10 @@ // CHECK-LABEL: @test6( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -111,7 +111,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -125,7 +125,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -139,7 +139,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -153,7 +153,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -167,7 +167,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -181,7 +181,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -195,7 +195,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -208,10 +208,10 @@ // CHECK-LABEL: @test14( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -225,7 +225,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -239,7 +239,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -253,7 +253,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -267,7 +267,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -281,7 +281,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -295,7 +295,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -308,10 +308,10 @@ // CHECK-LABEL: @test21( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -324,10 +324,10 @@ // CHECK-LABEL: @test22( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -340,10 +340,10 @@ // CHECK-LABEL: @test23( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -356,10 +356,10 @@ // CHECK-LABEL: @test24( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -372,10 +372,10 @@ // CHECK-LABEL: @test25( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -388,10 +388,10 @@ // CHECK-LABEL: @test26( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -404,10 +404,10 @@ // CHECK-LABEL: @test27( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -420,10 +420,10 @@ // CHECK-LABEL: @test28( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -436,10 +436,10 @@ // CHECK-LABEL: @test29( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -452,10 +452,10 @@ // CHECK-LABEL: @test30( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -468,10 +468,10 @@ // CHECK-LABEL: @test31( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -484,10 +484,10 @@ // CHECK-LABEL: @test32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -500,10 +500,10 @@ // CHECK-LABEL: @test33( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -516,10 +516,10 @@ // CHECK-LABEL: @test34( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -532,10 +532,10 @@ // CHECK-LABEL: @test35( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -548,10 +548,10 @@ // CHECK-LABEL: @test36( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -564,10 +564,10 @@ // CHECK-LABEL: @test37( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -580,10 +580,10 @@ // CHECK-LABEL: @test38( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -596,10 +596,10 @@ // CHECK-LABEL: @test39( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -612,10 +612,10 @@ // CHECK-LABEL: @test40( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -628,10 +628,10 @@ // CHECK-LABEL: @test41( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -644,10 +644,10 @@ // CHECK-LABEL: @test42( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -660,10 +660,10 @@ // CHECK-LABEL: @test43( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -676,10 +676,10 @@ // CHECK-LABEL: @test44( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -692,10 +692,10 @@ // CHECK-LABEL: @test45( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -708,10 +708,10 @@ // CHECK-LABEL: @test46( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -724,10 +724,10 @@ // CHECK-LABEL: @test47( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -740,12 +740,12 @@ // CHECK-LABEL: @test48( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -758,12 +758,12 @@ // CHECK-LABEL: @test49( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -776,12 +776,12 @@ // CHECK-LABEL: @test50( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -794,12 +794,12 @@ // CHECK-LABEL: @test51( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -812,12 +812,12 @@ // CHECK-LABEL: @test52( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -830,12 +830,12 @@ // CHECK-LABEL: @test53( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -848,12 +848,12 @@ // CHECK-LABEL: @test54( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -866,12 +866,12 @@ // CHECK-LABEL: @test55( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6 +// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -885,7 +885,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -899,7 +899,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -912,10 +912,10 @@ // CHECK-LABEL: @test58( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -928,10 +928,10 @@ // CHECK-LABEL: @test59( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -944,10 +944,10 @@ // CHECK-LABEL: @test60( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -960,10 +960,10 @@ // CHECK-LABEL: @test61( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -976,10 +976,10 @@ // CHECK-LABEL: @test62( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -992,10 +992,10 @@ // CHECK-LABEL: @test63( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1008,10 +1008,10 @@ // CHECK-LABEL: @test64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1024,10 +1024,10 @@ // CHECK-LABEL: @test65( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2 +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2 +// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1143,13 +1143,13 @@ // CHECK-LABEL: @test73( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8 // CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0) // CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1162,12 +1162,12 @@ // CHECK-LABEL: @test74( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* // CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]]) // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1180,13 +1180,13 @@ // CHECK-LABEL: @test75( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* -// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]] // CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]]) // CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* -// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]] +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { @@ -1195,3 +1195,195 @@ __builtin_mma_xvf64gernp(&vq, vp, vc); *((__vector_quad *)resp) = vq; } + +// CHECK-LABEL: @test76( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: ret void +// +void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_mma_assemble_pair(&res, vc, vc); + *((__vector_pair *)resp) = res; +} + +// CHECK-LABEL: @test77( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-NEXT: ret void +// +void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp); +} + +// CHECK-LABEL: @test78( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) +// CHECK-NEXT: ret void +// +void test78(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); + __builtin_mma_stxvp(vp, 0LL, vp2); +} + +// CHECK-LABEL: @test79( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]] +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test79(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(offset, vpp); + __builtin_mma_stxvp(vp, offset, vp2); +} + +// CHECK-LABEL: @test80( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18 +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test80(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(18LL, vpp); + __builtin_mma_stxvp(vp, 18LL, vp2); +} + +// CHECK-LABEL: @test81( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1 +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test81(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(1LL, vpp); + __builtin_mma_stxvp(vp, 1LL, vp2); +} + +// CHECK-LABEL: @test82( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42 +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test82(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(42LL, vpp); + __builtin_mma_stxvp(vp, 42LL, vp2); +} + +// CHECK-LABEL: @test83( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8* +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test83(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp); + __builtin_mma_stxvp(vp, 32768LL, vp2); +} + +// CHECK-LABEL: @test84( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799 +// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test84(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp); + __builtin_mma_stxvp(vp, 32799LL, vp2); +} + +// CHECK-LABEL: @test85( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(8LL, vpp); + __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0); + *((__vector_quad *)resp) = vq; +} + +// CHECK-LABEL: @test86( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); + __builtin_mma_xvf64gernp(&vq, vp, vc); + *((__vector_quad *)resp) = vq; +} + +// CHECK-LABEL: @test87( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]] +// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void test87(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(offs, vpp); + __builtin_mma_xvf64gernp(&vq, vp, vc); + *((__vector_quad *)resp) = vq; +}