Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -966,14 +966,15 @@ IntrinsicInst *Op1 = dyn_cast(II.getArgOperand(0)); IntrinsicInst *Op2 = dyn_cast(II.getArgOperand(1)); - if (Op1 && Op2 && - Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { + if (!Op1 || !Op2) + return None; - IRBuilder<> Builder(II.getContext()); - Builder.SetInsertPoint(&II); + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; Type *Tys[] = {Op1->getArgOperand(0)->getType()}; @@ -983,6 +984,22 @@ return IC.replaceInstUsesWith(II, PTest); } + // Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). + // Later optimizations may rewrite sequence to use the flag-setting variant + // of instruction X to remove PTEST. + if ((Op1 == Op2) && + (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && + ((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) || + (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) { + Value *Ops[] = {Op1->getArgOperand(0), Op1}; + Type *Tys[] = {Op1->getType()}; + + auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); + PTest->takeName(&II); + + return IC.replaceInstUsesWith(II, PTest); + } + return None; } Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll =================================================================== --- llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -32,6 +32,33 @@ ret i1 %out } +; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). +define i32 @ptest_any_brkb_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brkb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @ptest_any_rdffr_z( %pg) { +; CHECK-LABEL: @ptest_any_rdffr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + define i1 @ptest_first( %a) #0 { ; CHECK-LABEL: @ptest_first( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 0) @@ -81,4 +108,7 @@ declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) +declare @llvm.aarch64.sve.rdffr.z() + attributes #0 = { "target-features"="+sve" }