Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -975,20 +975,22 @@ static Optional instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II) { - IntrinsicInst *Op1 = dyn_cast(II.getArgOperand(0)); - IntrinsicInst *Op2 = dyn_cast(II.getArgOperand(1)); + IntrinsicInst *Pg = dyn_cast(II.getArgOperand(0)); + IntrinsicInst *Op = dyn_cast(II.getArgOperand(1)); - if (!Op1 || !Op2) + if (!Pg || !Op) return None; + Intrinsic::ID OpIID = Op->getIntrinsicID(); + IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); - if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { - Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; - Type *Tys[] = {Op1->getArgOperand(0)->getType()}; + if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + OpIID == Intrinsic::aarch64_sve_convert_to_svbool && + Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) { + Value *Ops[] = {Pg->getArgOperand(0), Op->getArgOperand(0)}; + Type *Tys[] = {Pg->getArgOperand(0)->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); @@ -999,12 +1001,21 @@ // Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). // Later optimizations may rewrite sequence to use the flag-setting variant // of instruction X to remove PTEST. - if ((Op1 == Op2) && - (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && - ((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) || - (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) { - Value *Ops[] = {Op1->getArgOperand(0), Op1}; - Type *Tys[] = {Op1->getType()}; + if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && + ((OpIID == Intrinsic::aarch64_sve_brka_z) || + (OpIID == Intrinsic::aarch64_sve_brkb_z) || + (OpIID == Intrinsic::aarch64_sve_brkpa_z) || + (OpIID == Intrinsic::aarch64_sve_brkpb_z) || + (OpIID == Intrinsic::aarch64_sve_rdffr_z) || + (OpIID == Intrinsic::aarch64_sve_and_z) || + (OpIID == Intrinsic::aarch64_sve_bic_z) || + (OpIID == Intrinsic::aarch64_sve_eor_z) || + (OpIID == Intrinsic::aarch64_sve_nand_z) || + (OpIID == Intrinsic::aarch64_sve_nor_z) || + (OpIID == Intrinsic::aarch64_sve_orn_z) || + (OpIID == Intrinsic::aarch64_sve_orr_z))) { + Value *Ops[] = {Pg->getArgOperand(0), Pg}; + Type *Tys[] = {Pg->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); PTest->takeName(&II); Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll =================================================================== --- llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -32,29 +32,6 @@ ret i1 %out } -; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). -define i1 @ptest_any_brkb_z( %pg, %a) { -; CHECK-LABEL: @ptest_any_brkb_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - -define i1 @ptest_any_rdffr_z( %pg) { -; CHECK-LABEL: @ptest_any_rdffr_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - define i1 @ptest_first( %a) #0 { ; CHECK-LABEL: @ptest_first( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 0) @@ -91,6 +68,140 @@ ret i1 %out } +; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). + +define i1 @ptest_any_brka_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brka_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brka.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brka.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpa_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpa_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkb_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brkb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpb_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_rdffr_z( %pg) { +; CHECK-LABEL: @ptest_any_rdffr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_and_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_and_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_bic_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_bic_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.bic.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_eor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_eor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nand_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nand_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nand.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nand.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orn_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orn_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orn.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orn.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orr_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orr.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) @@ -104,7 +215,18 @@ declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.brka.z.nxv16i1(, ) declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) +declare @llvm.aarch64.sve.brkpa.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) declare @llvm.aarch64.sve.rdffr.z() +declare @llvm.aarch64.sve.and.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.bic.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.eor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nand.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orn.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orr.z.nxv16i1(, , ) + attributes #0 = { "target-features"="+sve" }