diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -122,10 +122,10 @@ ; BE-PWR8-NEXT: stxvd2x vs2, r3, r5 ; BE-PWR8-NEXT: blr entry: - %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 1 - %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64 - %arrayidx1 = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 2 - store <512 x i1> %0, <512 x i1>* %arrayidx1, align 64 + %arrayidx = getelementptr inbounds <512 x i1>, ptr @f, i64 1 + %0 = load <512 x i1>, ptr %arrayidx, align 64 + %arrayidx1 = getelementptr inbounds <512 x i1>, ptr @f, i64 2 + store <512 x i1> %0, ptr %arrayidx1, align 64 ret void } @@ -243,10 +243,10 @@ ; BE-PWR8-NEXT: stxvd2x vs3, r4, r9 ; BE-PWR8-NEXT: blr entry: - %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 %SrcIdx - %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64 - %arrayidx1 = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 %DstIdx - store <512 x i1> %0, <512 x i1>* %arrayidx1, align 64 + %arrayidx = getelementptr inbounds <512 x i1>, ptr @f, i64 %SrcIdx + %0 = load <512 x i1>, ptr %arrayidx, align 64 + %arrayidx1 = getelementptr inbounds <512 x i1>, ptr @f, i64 %DstIdx + store <512 x i1> %0, ptr %arrayidx1, align 64 ret void } @@ -365,13 +365,10 @@ ; BE-PWR8-NEXT: stxvd2x vs2, r3, r5 ; BE-PWR8-NEXT: blr entry: - %0 = bitcast <512 x i1>* @f to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 11 - %add.ptr1 = getelementptr inbounds i8, i8* %0, i64 19 - %1 = bitcast i8* %add.ptr to <512 x i1>* - %2 = bitcast i8* %add.ptr1 to <512 x i1>* - %3 = load <512 x i1>, <512 x i1>* %1, align 64 - store <512 x i1> %3, <512 x i1>* %2, align 64 + %add.ptr = getelementptr inbounds i8, ptr @f, i64 11 + %add.ptr1 = getelementptr inbounds i8, ptr @f, i64 19 + %0 = load <512 x i1>, ptr %add.ptr, align 64 + store <512 x i1> %0, ptr %add.ptr1, align 64 ret void } @@ -442,10 +439,10 @@ ; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 ; BE-PWR8-NEXT: blr entry: - %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 1 - %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64 - %arrayidx1 = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 2 - store <256 x i1> %0, <256 x i1>* %arrayidx1, align 64 + %arrayidx = getelementptr inbounds <256 x i1>, ptr @g, i64 1 + %0 = load <256 x i1>, ptr %arrayidx, align 64 + %arrayidx1 = getelementptr inbounds <256 x i1>, ptr @g, i64 2 + store <256 x i1> %0, ptr %arrayidx1, align 64 ret void } @@ -535,10 +532,10 @@ ; BE-PWR8-NEXT: stxvd2x vs1, r4, r7 ; BE-PWR8-NEXT: blr entry: - %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 %SrcIdx - %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64 - %arrayidx1 = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 %DstIdx - store <256 x i1> %0, <256 x i1>* %arrayidx1, align 64 + %arrayidx = getelementptr inbounds <256 x i1>, ptr @g, i64 %SrcIdx + %0 = load <256 x i1>, ptr %arrayidx, align 64 + %arrayidx1 = getelementptr inbounds <256 x i1>, ptr @g, i64 %DstIdx + store <256 x i1> %0, ptr %arrayidx1, align 64 ret void } @@ -617,12 +614,9 @@ ; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 ; BE-PWR8-NEXT: blr entry: - %0 = bitcast <256 x i1>* @g to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 11 - %add.ptr1 = getelementptr inbounds i8, i8* %0, i64 19 - %1 = bitcast i8* %add.ptr to <256 x i1>* - %2 = bitcast i8* %add.ptr1 to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %1, align 64 - store <256 x i1> %3, <256 x i1>* %2, align 64 + %add.ptr = getelementptr inbounds i8, ptr @g, i64 11 + %add.ptr1 = getelementptr inbounds i8, ptr @g, i64 19 + %0 = load <256 x i1>, ptr %add.ptr, align 64 + store <256 x i1> %0, ptr %add.ptr1, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll @@ -9,7 +9,7 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare void @foo() -define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) { +define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) { ; CHECK-LABEL: intrinsics1: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 @@ -116,7 +116,6 @@ %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3) tail call void @foo() %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3) - %4 = bitcast i8* %ptr to <512 x i1>* - store <512 x i1> %3, <512 x i1>* %4, align 64 + store <512 x i1> %3, ptr %ptr, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll --- a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll @@ -7,7 +7,7 @@ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE ; Function Attrs: nofree nounwind writeonly -define dso_local void @test1(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvi16ger2 acc0, v2, v2 @@ -29,8 +29,7 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } @@ -38,7 +37,7 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8>, <16 x i8>) ; Function Attrs: nofree nounwind writeonly -define dso_local void @test2(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test2(ptr nocapture readnone %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvi16ger2 acc0, v2, v2, 0, 0, 0 @@ -60,8 +59,7 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } @@ -69,7 +67,7 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8>, <16 x i8>, i32, i32, i32) ; Function Attrs: nofree nounwind -define dso_local void @test3(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test3(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -100,11 +98,9 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } @@ -112,7 +108,7 @@ declare <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1>, <16 x i8>, <16 x i8>) ; Function Attrs: nofree nounwind -define dso_local void @test4(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test4(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -143,11 +139,9 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } @@ -155,7 +149,7 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) ; Function Attrs: nofree nounwind -define dso_local void @test5(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test5(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -186,11 +180,9 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } @@ -198,7 +190,7 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) ; Function Attrs: nofree nounwind -define dso_local void @test6(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define dso_local void @test6(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -229,11 +221,9 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -8,7 +8,7 @@ ; assemble_acc declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) { +define void @ass_acc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: ass_acc: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmr v3, v2 @@ -36,13 +36,13 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) - store <512 x i1> %0, <512 x i1>* %ptr, align 64 + store <512 x i1> %0, ptr %ptr, align 64 ret void } ; xxmtacc declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) -define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) { +define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmtacc: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmr v3, v2 @@ -75,13 +75,13 @@ ; generated from the call to xxmtacc then one xxmfacc is generated for the store %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0) - store <512 x i1> %1, <512 x i1>* %ptr, align 64 + store <512 x i1> %1, ptr %ptr, align 64 ret void } ; xxmfacc declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) -define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) { +define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmfacc: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmr v3, v2 @@ -112,13 +112,13 @@ ; generated from the call to xxmfacc then one xxmfacc is generated for the store %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0) - store <512 x i1> %1, <512 x i1>* %ptr, align 64 + store <512 x i1> %1, ptr %ptr, align 64 ret void } ; xxsetaccz declare <512 x i1> @llvm.ppc.mma.xxsetaccz() -define void @int_xxsetaccz(<512 x i1>* %ptr) { +define void @int_xxsetaccz(ptr %ptr) { ; CHECK-LABEL: int_xxsetaccz: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -140,13 +140,13 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() - store <512 x i1> %0, <512 x i1>* %ptr, align 64 + store <512 x i1> %0, ptr %ptr, align 64 ret void } ; disassemble_acc declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) -define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) { +define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-LABEL: disass_acc: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -173,15 +173,15 @@ %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 - store <16 x i8> %2, <16 x i8>* %ptr1, align 16 - store <16 x i8> %3, <16 x i8>* %ptr2, align 16 - store <16 x i8> %4, <16 x i8>* %ptr3, align 16 - store <16 x i8> %5, <16 x i8>* %ptr4, align 16 + store <16 x i8> %2, ptr %ptr1, align 16 + store <16 x i8> %3, ptr %ptr2, align 16 + store <16 x i8> %4, ptr %ptr3, align 16 + store <16 x i8> %5, ptr %ptr4, align 16 ret void } declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) { +define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-LABEL: testBranch: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r7, 0 @@ -234,13 +234,13 @@ br label %if.end if.else: - %1 = load <512 x i1>, <512 x i1>* %ptr, align 64 + %1 = load <512 x i1>, ptr %ptr, align 64 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) br label %if.end if.end: %vq1.0 = phi <512 x i1> [ %0, %if.then ], [ %2, %if.else ] - store <512 x i1> %vq1.0, <512 x i1>* %ptr, align 64 + store <512 x i1> %vq1.0, ptr %ptr, align 64 ret void } @@ -249,7 +249,7 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @testcse(<512 x i1>* %res, <16 x i8> %vc) { +define void @testcse(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -284,14 +284,14 @@ %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 - %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 - store <512 x i1> %2, <512 x i1>* %4, align 64 - store <512 x i1> %3, <512 x i1>* %5, align 64 + %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 0 + %5 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 + store <512 x i1> %2, ptr %4, align 64 + store <512 x i1> %3, ptr %5, align 64 ret void } -define void @testcse2(<512 x i1>* %res, <16 x i8> %vc) { +define void @testcse2(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -332,14 +332,14 @@ %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 - %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 - store <512 x i1> %2, <512 x i1>* %4, align 64 - store <512 x i1> %3, <512 x i1>* %5, align 64 + %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 0 + %5 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 + store <512 x i1> %2, ptr %4, align 64 + store <512 x i1> %3, ptr %5, align 64 ret void } -define void @testcse3(<512 x i1>* %res, <16 x i8> %vc) { +define void @testcse3(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -379,14 +379,14 @@ %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) - %3 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0 - %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1 - store <512 x i1> %1, <512 x i1>* %3, align 64 - store <512 x i1> %2, <512 x i1>* %4, align 64 + %3 = getelementptr inbounds <512 x i1>, ptr %res, i64 0 + %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 + store <512 x i1> %1, ptr %3, align 64 + store <512 x i1> %2, ptr %4, align 64 ret void } -define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) { +define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-LABEL: testcse4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpwi r4, 1 @@ -503,46 +503,46 @@ %3 = trunc i64 %indvars.iv to i32 %mul = mul nsw i32 %3, 6 %idxprom = zext i32 %mul to i64 - %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom - %4 = load <16 x i8>, <16 x i8>* %arrayidx, align 16 + %arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom + %4 = load <16 x i8>, ptr %arrayidx, align 16 %add2 = or i32 %mul, 1 %idxprom3 = zext i32 %add2 to i64 - %arrayidx4 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom3 - %5 = load <16 x i8>, <16 x i8>* %arrayidx4, align 16 + %arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3 + %5 = load <16 x i8>, ptr %arrayidx4, align 16 %6 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %4, <16 x i8> %5) %add6 = add nuw nsw i32 %mul, 2 %idxprom7 = zext i32 %add6 to i64 - %arrayidx8 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom7 - %7 = load <16 x i8>, <16 x i8>* %arrayidx8, align 16 + %arrayidx8 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom7 + %7 = load <16 x i8>, ptr %arrayidx8, align 16 %add10 = add nuw nsw i32 %mul, 3 %idxprom11 = zext i32 %add10 to i64 - %arrayidx12 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom11 - %8 = load <16 x i8>, <16 x i8>* %arrayidx12, align 16 + %arrayidx12 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom11 + %8 = load <16 x i8>, ptr %arrayidx12, align 16 %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %7, <16 x i8> %8) %add14 = add nuw nsw i32 %mul, 4 %idxprom15 = zext i32 %add14 to i64 - %arrayidx16 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom15 - %10 = load <16 x i8>, <16 x i8>* %arrayidx16, align 16 + %arrayidx16 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom15 + %10 = load <16 x i8>, ptr %arrayidx16, align 16 %add18 = add nuw nsw i32 %mul, 5 %idxprom19 = zext i32 %add18 to i64 - %arrayidx20 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom19 - %11 = load <16 x i8>, <16 x i8>* %arrayidx20, align 16 + %arrayidx20 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom19 + %11 = load <16 x i8>, ptr %arrayidx20, align 16 %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %2, <16 x i8> %10, <16 x i8> %11) %mul21 = mul i64 %indvars.iv, 3 %idx.ext = and i64 %mul21, 4294967295 - %add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 %idx.ext - store <512 x i1> %6, <512 x i1>* %add.ptr, align 64 - %add.ptr26 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 1 - store <512 x i1> %9, <512 x i1>* %add.ptr26, align 64 - %add.ptr30 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 2 - store <512 x i1> %12, <512 x i1>* %add.ptr30, align 64 + %add.ptr = getelementptr inbounds <512 x i1>, ptr %res, i64 %idx.ext + store <512 x i1> %6, ptr %add.ptr, align 64 + %add.ptr26 = getelementptr inbounds <512 x i1>, ptr %add.ptr, i64 1 + store <512 x i1> %9, ptr %add.ptr26, align 64 + %add.ptr30 = getelementptr inbounds <512 x i1>, ptr %add.ptr, i64 2 + store <512 x i1> %12, ptr %add.ptr30, align 64 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } declare i32 @testRedundantPrimeUnprimeF() -define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind { +define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-LABEL: testRedundantPrimeUnprime: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 @@ -611,19 +611,19 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() - store <512 x i1> %0, <512 x i1>* %dst, align 64 + store <512 x i1> %0, ptr %dst, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) - %call = tail call signext i32 bitcast (i32 ()* @testRedundantPrimeUnprimeF to i32 ()*)() - %add.ptr1 = getelementptr inbounds <512 x i1>, <512 x i1>* %dst, i64 1 - store <512 x i1> %1, <512 x i1>* %add.ptr1, align 64 + %call = tail call signext i32 bitcast (ptr @testRedundantPrimeUnprimeF to ptr)() + %add.ptr1 = getelementptr inbounds <512 x i1>, ptr %dst, i64 1 + store <512 x i1> %1, ptr %add.ptr1, align 64 ret void } -declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) -declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*) +declare <256 x i1> @llvm.ppc.vsx.lxvp(ptr) +declare void @llvm.ppc.vsx.stxvp(<256 x i1>, ptr) ; Function Attrs: nofree nounwind -define void @test_ldst_1(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -656,19 +656,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast <256 x i1>* %vpp to i8* - %3 = getelementptr i8, i8* %2, i64 8 - %4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3) - %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0) - %6 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %5, <512 x i1>* %6, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = getelementptr i8, ptr %vpp, i64 8 + %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %1) + %3 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %0, <256 x i1> %2, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %3, ptr %resp, align 64 ret void } ; Function Attrs: nofree nounwind -define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -701,18 +698,15 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast <256 x i1>* %vpp to i8* - %3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2) - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } ; Function Attrs: nofree nounwind -define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -745,13 +739,10 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r9) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast <256 x i1>* %vpp to i8* - %3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2) - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -8,7 +8,7 @@ declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) -define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) { +define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) { ; CHECK-LABEL: intrinsics1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmr v1, v4 @@ -62,13 +62,12 @@ %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0) %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1) %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0) - %7 = bitcast i8* %ptr to <512 x i1>* - store <512 x i1> %6, <512 x i1>* %7, align 64 + store <512 x i1> %6, ptr %ptr, align 64 ret void } declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) -define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4, i8* %ptr) { +define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { ; CHECK-LABEL: intrinsics2: ; CHECK: # %bb.0: ; CHECK-NEXT: lxv v2, 0(r3) @@ -116,10 +115,10 @@ ; CHECK-BE-NEXT: stxv vs2, 0(r5) ; CHECK-BE-NEXT: stxv vs3, 0(r6) ; CHECK-BE-NEXT: blr - %vc1 = load <16 x i8>, <16 x i8>* %ptr1, align 16 - %vc2 = load <16 x i8>, <16 x i8>* %ptr2, align 16 - %vc3 = load <16 x i8>, <16 x i8>* %ptr3, align 16 - %vc4 = load <16 x i8>, <16 x i8>* %ptr4, align 16 + %vc1 = load <16 x i8>, ptr %ptr1, align 16 + %vc2 = load <16 x i8>, ptr %ptr2, align 16 + %vc3 = load <16 x i8>, ptr %ptr3, align 16 + %vc4 = load <16 x i8>, ptr %ptr4, align 16 %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4) %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2) %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3) @@ -131,15 +130,14 @@ %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 1 %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 2 %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 3 - %12 = bitcast i8* %ptr to <512 x i1>* - store <16 x i8> %8, <16 x i8>* %ptr1, align 16 - store <16 x i8> %9, <16 x i8>* %ptr2, align 16 - store <16 x i8> %10, <16 x i8>* %ptr3, align 16 - store <16 x i8> %11, <16 x i8>* %ptr4, align 16 + store <16 x i8> %8, ptr %ptr1, align 16 + store <16 x i8> %9, ptr %ptr2, align 16 + store <16 x i8> %10, ptr %ptr3, align 16 + store <16 x i8> %11, ptr %ptr4, align 16 ret void } -define void @test1(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvi4ger8 acc0, v2, v2 @@ -161,15 +159,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8>, <16 x i8>) -define void @test2(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -200,18 +197,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test3(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 @@ -233,15 +228,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8>, <16 x i8>, i32, i32, i32) -define void @test4(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -272,18 +266,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test5(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvi8ger4 acc0, v2, v2 @@ -305,15 +297,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8>, <16 x i8>) -define void @test6(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -344,18 +335,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test7(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 @@ -377,15 +366,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8>, <16 x i8>, i32, i32, i32) -define void @test8(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -416,18 +404,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test9(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvi16ger2s acc0, v2, v2 @@ -449,15 +435,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8>, <16 x i8>) -define void @test10(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -488,18 +473,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test11(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 @@ -521,15 +504,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8>, <16 x i8>, i32, i32, i32) -define void @test12(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -560,18 +542,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test13(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvf16ger2 acc0, v2, v2 @@ -593,15 +573,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8>, <16 x i8>) -define void @test14(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -632,18 +611,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test15(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -674,18 +651,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test16(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -716,18 +691,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test17(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -758,18 +731,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test18(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 @@ -791,15 +762,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32) -define void @test19(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -830,18 +800,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test20(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test20: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -872,18 +840,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test21(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test21: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -914,18 +880,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test22(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -956,18 +920,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32) -define void @test23(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test23: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvf32ger acc0, v2, v2 @@ -989,15 +951,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) -define void @test24(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test24: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1028,18 +989,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test25(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test25: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1070,18 +1029,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test26(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test26: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1112,18 +1069,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test27(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test27: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1154,18 +1109,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>) -define void @test28(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test28: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 @@ -1187,15 +1140,14 @@ ; CHECK-BE-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) - %1 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %0, <512 x i1>* %1, align 64 + store <512 x i1> %0, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8>, <16 x i8>, i32, i32) -define void @test29(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test29: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1226,18 +1178,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32) -define void @test30(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test30: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1268,18 +1218,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32) -define void @test31(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test31: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1310,18 +1258,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32) -define void @test32(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1352,18 +1298,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32) -define void @test33(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test33: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v5, 0(r4) @@ -1388,18 +1332,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vpp to <256 x i1>* - %1 = load <256 x i1>, <256 x i1>* %0, align 32 - %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %1, <16 x i8> %vc) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <256 x i1>, ptr %vpp, align 32 + %1 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %0, <16 x i8> %vc) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>) -define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test34: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1434,20 +1376,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>) -define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test35: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1482,20 +1421,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>) -define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test36: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1530,20 +1466,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) -define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test37: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1578,20 +1511,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>) -define void @test38(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test38: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v5, 0(r4) @@ -1616,18 +1546,16 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vpp to <256 x i1>* - %1 = load <256 x i1>, <256 x i1>* %0, align 32 - %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %1, <16 x i8> %vc, i32 0, i32 0) - %3 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %2, <512 x i1>* %3, align 64 + %0 = load <256 x i1>, ptr %vpp, align 32 + %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %0, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %1, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1>, <16 x i8>, i32, i32) -define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test39: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1662,20 +1590,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) -define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test40: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1710,20 +1635,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) -define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test41: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1758,20 +1680,17 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %2, ptr %resp, align 64 ret void } declare <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) -define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) { +define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test42: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs1, 32(r3) @@ -1806,13 +1725,10 @@ ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr entry: - %0 = bitcast i8* %vqp to <512 x i1>* - %1 = load <512 x i1>, <512 x i1>* %0, align 64 - %2 = bitcast i8* %vpp to <256 x i1>* - %3 = load <256 x i1>, <256 x i1>* %2, align 32 - %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0) - %5 = bitcast i8* %resp to <512 x i1>* - store <512 x i1> %4, <512 x i1>* %5, align 64 + %0 = load <512 x i1>, ptr %vqp, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc, i32 0, i32 0) + store <512 x i1> %2, ptr %resp, align 64 ret void } diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -10,7 +10,7 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz() declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>) declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) -define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) { +define void @testPHI1(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-LABEL: testPHI1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxsetaccz acc0 @@ -65,9 +65,9 @@ ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: - %0 = load <16 x i8>, <16 x i8>* %Src, align 16 - %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1 - %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16 + %0 = load <16 x i8>, ptr %Src, align 16 + %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 + %1 = load <16 x i8>, ptr %arrayidx1, align 16 %2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1) %3 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() %cmp11 = icmp sgt i32 %Len, 2 @@ -81,23 +81,23 @@ %Acc.0.lcssa = phi <512 x i1> [ %3, %entry ], [ %13, %for.body ] %4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %Acc.0.lcssa) %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 0 - store <16 x i8> %5, <16 x i8>* %Dst, align 16 + store <16 x i8> %5, ptr %Dst, align 16 %6 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 1 - %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 1 - store <16 x i8> %6, <16 x i8>* %7, align 16 + %7 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 1 + store <16 x i8> %6, ptr %7, align 16 %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 2 - %9 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 2 - store <16 x i8> %8, <16 x i8>* %9, align 16 + %9 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 2 + store <16 x i8> %8, ptr %9, align 16 %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 3 - %11 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 3 - store <16 x i8> %10, <16 x i8>* %11, align 16 + %11 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 3 + store <16 x i8> %10, ptr %11, align 16 ret void for.body: %indvars.iv = phi i64 [ 2, %for.body.preheader ], [ %indvars.iv.next, %for.body ] %Acc.012 = phi <512 x i1> [ %3, %for.body.preheader ], [ %13, %for.body ] - %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 %indvars.iv - %12 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16 + %arrayidx2 = getelementptr inbounds <16 x i8>, ptr %Src, i64 %indvars.iv + %12 = load <16 x i8>, ptr %arrayidx2, align 16 %13 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %Acc.012, <256 x i1> %2, <16 x i8> %12) %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count @@ -105,7 +105,7 @@ } declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>) -define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) { +define dso_local void @testPHI2(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-LABEL: testPHI2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v2, 0(r4) @@ -162,12 +162,12 @@ ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: - %0 = load <16 x i8>, <16 x i8>* %Src, align 16 - %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1 - %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16 + %0 = load <16 x i8>, ptr %Src, align 16 + %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 + %1 = load <16 x i8>, ptr %arrayidx1, align 16 %2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1) - %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 2 - %3 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16 + %arrayidx2 = getelementptr inbounds <16 x i8>, ptr %Src, i64 2 + %3 = load <16 x i8>, ptr %arrayidx2, align 16 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %2, <16 x i8> %3) %cmp14 = icmp sgt i32 %Len, 3 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup @@ -180,23 +180,23 @@ %Acc.0.lcssa = phi <512 x i1> [ %4, %entry ], [ %14, %for.body ] %5 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %Acc.0.lcssa) %6 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 0 - store <16 x i8> %6, <16 x i8>* %Dst, align 16 + store <16 x i8> %6, ptr %Dst, align 16 %7 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 1 - %8 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 1 - store <16 x i8> %7, <16 x i8>* %8, align 16 + %8 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 1 + store <16 x i8> %7, ptr %8, align 16 %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 2 - %10 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 2 - store <16 x i8> %9, <16 x i8>* %10, align 16 + %10 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 2 + store <16 x i8> %9, ptr %10, align 16 %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 3 - %12 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 3 - store <16 x i8> %11, <16 x i8>* %12, align 16 + %12 = getelementptr inbounds <16 x i8>, ptr %Dst, i64 3 + store <16 x i8> %11, ptr %12, align 16 ret void for.body: %indvars.iv = phi i64 [ 3, %for.body.preheader ], [ %indvars.iv.next, %for.body ] %Acc.015 = phi <512 x i1> [ %4, %for.body.preheader ], [ %14, %for.body ] - %arrayidx3 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 %indvars.iv - %13 = load <16 x i8>, <16 x i8>* %arrayidx3, align 16 + %arrayidx3 = getelementptr inbounds <16 x i8>, ptr %Src, i64 %indvars.iv + %13 = load <16 x i8>, ptr %arrayidx3, align 16 %14 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %Acc.015, <256 x i1> %2, <16 x i8> %13) %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count @@ -207,7 +207,7 @@ ; implicitely defined unprimed accumulator and the unprimed result of the call ; to xvf64gerpp. The compiler should replace this PHI node by a primed ; accumulator PHI node. -define void @testImplicitDef(<16 x i8>* %ptr) { +define void @testImplicitDef(ptr %ptr) { ; CHECK-LABEL: testImplicitDef: ; CHECK: # %bb.0: # %label1 ; CHECK-NEXT: # implicit-def: $acc0 @@ -240,7 +240,7 @@ %1 = phi <512 x i1> [ undef, %label1 ], [ %0, %label2 ] %2 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %1) %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %2, 3 - store <16 x i8> %3, <16 x i8>* %ptr, align 16 + store <16 x i8> %3, ptr %ptr, align 16 ret void } @@ -248,7 +248,7 @@ ; PHI node operand. The compiler should replace these PHI nodes by primed ; accumulator PHI nodes. declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) -define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %count, <512 x i1>* nocapture %ptr, <16 x i8> %vc) { +define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %count, ptr nocapture %ptr, <16 x i8> %vc) { ; CHECK-LABEL: testNestedPHI: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r3, 0 @@ -327,7 +327,7 @@ for.cond.cleanup: %vq.1.lcssa = phi <512 x i1> [ %vq.0, %if.end ], [ %1, %for.body ] - store <512 x i1> %vq.1.lcssa, <512 x i1>* %ptr, align 64 + store <512 x i1> %vq.1.lcssa, ptr %ptr, align 64 ret i32 0 for.body: