diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -281,6 +281,86 @@ return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +/// The function will remove redundant reinterprets casting in the presence +/// of the control flow +static Optional processPhiNode(InstCombiner &IC, + IntrinsicInst &II) { + Type *RequiredType = II.getType(); + + auto *PN = dyn_cast(II.getArgOperand(0)); + assert(PN && "Expected Phi Node!"); + + // Don't create a new Phi unless we can remove the old one. + if (!PN->hasOneUse()) + return None; + + for (Value *IncValPhi : PN->incoming_values()) { + auto *Reinterpret = dyn_cast(IncValPhi); + if (!Reinterpret || + Reinterpret->getIntrinsicID() != + Intrinsic::aarch64_sve_convert_to_svbool || + RequiredType != Reinterpret->getArgOperand(0)->getType()) + return None; + } + + // Create the new Phi + LLVMContext &Ctx = PN->getContext(); + IRBuilder<> Builder(Ctx); + Builder.SetInsertPoint(PN); + PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); + + for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { + auto *Reinterpret = cast(PN->getIncomingValue(I)); + NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); + } + + // Cleanup Phi Node and reinterprets + return IC.replaceInstUsesWith(II, NPN); +} + +static Optional instCombineConvertFromSVBool(InstCombiner &IC, + IntrinsicInst &II) { + // Handle reinterprets of phi nodes. + if (isa(II.getArgOperand(0))) + return processPhiNode(IC, II); + + Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr; + + const auto *IVTy = cast(II.getType()); + + // Walk the chain of conversions. + while (Cursor) { + // If the type of the cursor has fewer lanes than the final result, zeroing + // must take place, which breaks the equivalence chain. + const auto *CursorVTy = cast(Cursor->getType()); + if (CursorVTy->getElementCount().getKnownMinValue() < + IVTy->getElementCount().getKnownMinValue()) + break; + + // If the cursor has the same type as I, it is a viable replacement. + if (Cursor->getType() == IVTy) + EarliestReplacement = Cursor; + + auto *IntrinsicCursor = dyn_cast(Cursor); + + // If this is not an SVE conversion intrinsic, this is the end of the chain. + if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_to_svbool || + IntrinsicCursor->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_from_svbool)) + break; + + Cursor = IntrinsicCursor->getOperand(0); + } + + // If no viable replacement in the conversion chain was found, there is + // nothing to do. + if (!EarliestReplacement) + return None; + + return IC.replaceInstUsesWith(II, EarliestReplacement); +} + static Optional instCombineSVELast(InstCombiner &IC, IntrinsicInst &II) { Value *Pg = II.getArgOperand(0); @@ -368,6 +448,8 @@ switch (IID) { default: break; + case Intrinsic::aarch64_sve_convert_from_svbool: + return instCombineConvertFromSVBool(IC, II); case Intrinsic::aarch64_sve_lasta: case Intrinsic::aarch64_sve_lastb: return instCombineSVELast(IC, II); diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -11,18 +11,13 @@ // // This pass performs the following optimizations: // -// - removes unnecessary reinterpret intrinsics -// (llvm.aarch64.sve.convert.[to|from].svbool), e.g: -// %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) -// %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) -// // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g: // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) // ; (%1 can be replaced with a reinterpret of %2) // -// - optimizes ptest intrinsics and phi instructions where the operands are -// being needlessly converted to and from svbool_t. +// - optimizes ptest intrinsics where the operands are being needlessly +// converted to and from svbool_t. // //===----------------------------------------------------------------------===// @@ -75,12 +70,9 @@ /// the functions themselves. bool optimizeFunctions(SmallSetVector &Functions); - static bool optimizeConvertFromSVBool(IntrinsicInst *I); static bool optimizePTest(IntrinsicInst *I); static bool optimizeVectorMul(IntrinsicInst *I); static bool optimizeTBL(IntrinsicInst *I); - - static bool processPhiNode(IntrinsicInst *I); }; } // end anonymous namespace @@ -197,17 +189,30 @@ Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy}, {MostEncompassingPTrue}); + bool ConvertFromCreated = false; for (auto *PTrue : PTrues) { auto *PTrueVTy = cast(PTrue->getType()); - Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator()); - auto *ConvertFromSVBool = - Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, - {PTrueVTy}, {ConvertToSVBool}); - PTrue->replaceAllUsesWith(ConvertFromSVBool); + // Only create the converts if the types are not already the same, otherwise + // just use the most encompassing ptrue. + if (MostEncompassingPTrueVTy != PTrueVTy) { + ConvertFromCreated = true; + + Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator()); + auto *ConvertFromSVBool = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, + {PTrueVTy}, {ConvertToSVBool}); + PTrue->replaceAllUsesWith(ConvertFromSVBool); + } else + PTrue->replaceAllUsesWith(MostEncompassingPTrue); + PTrue->eraseFromParent(); } + // We never used the ConvertTo so remove it + if (!ConvertFromCreated) + ConvertToSVBool->eraseFromParent(); + return true; } @@ -294,51 +299,6 @@ return Changed; } -/// The function will remove redundant reinterprets casting in the presence -/// of the control flow -bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { - - SmallVector Worklist; - auto RequiredType = X->getType(); - - auto *PN = dyn_cast(X->getArgOperand(0)); - assert(PN && "Expected Phi Node!"); - - // Don't create a new Phi unless we can remove the old one. - if (!PN->hasOneUse()) - return false; - - for (Value *IncValPhi : PN->incoming_values()) { - auto *Reinterpret = isReinterpretToSVBool(IncValPhi); - if (!Reinterpret || - RequiredType != Reinterpret->getArgOperand(0)->getType()) - return false; - } - - // Create the new Phi - LLVMContext &Ctx = PN->getContext(); - IRBuilder<> Builder(Ctx); - Builder.SetInsertPoint(PN); - PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); - Worklist.push_back(PN); - - for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { - auto *Reinterpret = cast(PN->getIncomingValue(I)); - NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); - Worklist.push_back(Reinterpret); - } - - // Cleanup Phi Node and reinterprets - X->replaceAllUsesWith(NPN); - X->eraseFromParent(); - - for (auto &I : Worklist) - if (I->use_empty()) - I->eraseFromParent(); - - return true; -} - bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { IntrinsicInst *Op1 = dyn_cast(I->getArgOperand(0)); IntrinsicInst *Op2 = dyn_cast(I->getArgOperand(1)); @@ -473,69 +433,12 @@ return true; } -bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { - assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && - "Unexpected opcode"); - - // If the reinterpret instruction operand is a PHI Node - if (isa(I->getArgOperand(0))) - return processPhiNode(I); - - SmallVector CandidatesForRemoval; - Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr; - - const auto *IVTy = cast(I->getType()); - - // Walk the chain of conversions. - while (Cursor) { - // If the type of the cursor has fewer lanes than the final result, zeroing - // must take place, which breaks the equivalence chain. - const auto *CursorVTy = cast(Cursor->getType()); - if (CursorVTy->getElementCount().getKnownMinValue() < - IVTy->getElementCount().getKnownMinValue()) - break; - - // If the cursor has the same type as I, it is a viable replacement. - if (Cursor->getType() == IVTy) - EarliestReplacement = Cursor; - - auto *IntrinsicCursor = dyn_cast(Cursor); - - // If this is not an SVE conversion intrinsic, this is the end of the chain. - if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == - Intrinsic::aarch64_sve_convert_to_svbool || - IntrinsicCursor->getIntrinsicID() == - Intrinsic::aarch64_sve_convert_from_svbool)) - break; - - CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor); - Cursor = IntrinsicCursor->getOperand(0); - } - - // If no viable replacement in the conversion chain was found, there is - // nothing to do. - if (!EarliestReplacement) - return false; - - I->replaceAllUsesWith(EarliestReplacement); - I->eraseFromParent(); - - while (!CandidatesForRemoval.empty()) { - Instruction *Candidate = CandidatesForRemoval.pop_back_val(); - if (Candidate->use_empty()) - Candidate->eraseFromParent(); - } - return true; -} - bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { IntrinsicInst *IntrI = dyn_cast(I); if (!IntrI) return false; switch (IntrI->getIntrinsicID()) { - case Intrinsic::aarch64_sve_convert_from_svbool: - return optimizeConvertFromSVBool(IntrI); case Intrinsic::aarch64_sve_fmul: case Intrinsic::aarch64_sve_mul: return optimizeVectorMul(IntrI); @@ -591,7 +494,6 @@ continue; switch (F.getIntrinsicID()) { - case Intrinsic::aarch64_sve_convert_from_svbool: case Intrinsic::aarch64_sve_ptest_any: case Intrinsic::aarch64_sve_ptest_first: case Intrinsic::aarch64_sve_ptest_last: diff --git a/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll b/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll --- a/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll @@ -163,14 +163,13 @@ define @coalesce_test_promoted_ptrue(i32* %addr1, i16* %addr2) { ; CHECK-LABEL: @coalesce_test_promoted_ptrue( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.ld1.nxv4i32( [[TMP3]], i32* [[ADDR1:%.*]]) -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.ld1.nxv8i16( [[TMP5]], i16* [[ADDR2:%.*]]) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.aarch64.sve.ld1.nxv8i16( [[TMP1]], i16* [[ADDR2]]) -; CHECK-NEXT: ret [[TMP8]] +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.ld1.nxv4i32( [[TMP2]], i32* [[ADDR1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.ld1.nxv8i16( [[TMP4]], i16* [[ADDR2:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.ld1.nxv8i16( [[TMP1]], i16* [[ADDR2]]) +; CHECK-NEXT: ret [[TMP7]] ; %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll rename from llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll rename to llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -1,9 +1,11 @@ -; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s +; RUN: opt -S -instcombine < %s | FileCheck %s -define @reinterpret_test_h( %a) { -; OPT-LABEL: @reinterpret_test_h( -; OPT-NOT: convert -; OPT: ret %a +target triple = "aarch64" + +define @reinterpret_test_h( %a) #0 { +; CHECK-LABEL: @reinterpret_test_h( +; CHECK-NOT: convert +; CHECK: ret %a %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) ret %2 @@ -11,20 +13,20 @@ ; Reinterprets are not redundant because the second reinterpret zeros the ; lanes that don't exist within its input. -define @reinterpret_test_h_rev( %a) { -; OPT-LABEL: @reinterpret_test_h_rev( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) -; OPT-NEXT: ret %2 +define @reinterpret_test_h_rev( %a) #0 { +; CHECK-LABEL: @reinterpret_test_h_rev( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) ret %2 } -define @reinterpret_test_w( %a) { -; OPT-LABEL: @reinterpret_test_w( -; OPT-NOT: convert -; OPT: ret %a +define @reinterpret_test_w( %a) #0 { +; CHECK-LABEL: @reinterpret_test_w( +; CHECK-NOT: convert +; CHECK: ret %a %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) ret %2 @@ -32,20 +34,20 @@ ; Reinterprets are not redundant because the second reinterpret zeros the ; lanes that don't exist within its input. -define @reinterpret_test_w_rev( %a) { -; OPT-LABEL: @reinterpret_test_w_rev( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) -; OPT-NEXT: ret %2 +define @reinterpret_test_w_rev( %a) #0 { +; CHECK-LABEL: @reinterpret_test_w_rev( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) ret %2 } -define @reinterpret_test_d( %a) { -; OPT-LABEL: @reinterpret_test_d( -; OPT-NOT: convert -; OPT: ret %a +define @reinterpret_test_d( %a) #0 { +; CHECK-LABEL: @reinterpret_test_d( +; CHECK-NOT: convert +; CHECK: ret %a %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) ret %2 @@ -53,19 +55,19 @@ ; Reinterprets are not redundant because the second reinterpret zeros the ; lanes that don't exist within its input. -define @reinterpret_test_d_rev( %a) { -; OPT-LABEL: @reinterpret_test_d_rev( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) -; OPT-NEXT: ret %2 +define @reinterpret_test_d_rev( %a) #0 { +; CHECK-LABEL: @reinterpret_test_d_rev( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) ret %2 } -define @reinterpret_test_full_chain( %a) { -; OPT-LABEL: @reinterpret_test_full_chain( -; OPT: ret %a +define @reinterpret_test_full_chain( %a) #0 { +; CHECK-LABEL: @reinterpret_test_full_chain( +; CHECK: ret %a %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) @@ -77,11 +79,11 @@ ; The last two reinterprets are not necessary, since they are doing the same ; work as the first two. -define @reinterpret_test_partial_chain( %a) { -; OPT-LABEL: @reinterpret_test_partial_chain( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) -; OPT-NEXT: ret %2 +define @reinterpret_test_partial_chain( %a) #0 { +; CHECK-LABEL: @reinterpret_test_partial_chain( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) +; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) @@ -91,13 +93,13 @@ ; The chain cannot be reduced because of the second reinterpret, which causes ; zeroing. -define @reinterpret_test_irreducible_chain( %a) { -; OPT-LABEL: @reinterpret_test_irreducible_chain( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) -; OPT-NEXT: %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) -; OPT-NEXT: %4 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %3) -; OPT-NEXT: ret %4 +define @reinterpret_test_irreducible_chain( %a) #0 { +; CHECK-LABEL: @reinterpret_test_irreducible_chain( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) +; CHECK-NEXT: %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) +; CHECK-NEXT: %4 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %3) +; CHECK-NEXT: ret %4 %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) @@ -107,11 +109,11 @@ ; Here, the candidate list is larger than the number of instructions that we ; end up removing. -define @reinterpret_test_keep_some_candidates( %a) { -; OPT-LABEL: @reinterpret_test_keep_some_candidates( -; OPT: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) -; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) -; OPT-NEXT: ret %2 +define @reinterpret_test_keep_some_candidates( %a) #0 { +; CHECK-LABEL: @reinterpret_test_keep_some_candidates( +; CHECK: %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) +; CHECK-NEXT: %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) +; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) @@ -119,13 +121,13 @@ ret %4 } -define @reinterpret_reductions(i32 %cond, %a, %b, %c) { -; OPT-LABEL: reinterpret_reductions -; OPT-NOT: convert -; OPT-NOT: phi -; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] -; OPT-NOT: convert -; OPT: ret +define @reinterpret_reductions(i32 %cond, %a, %b, %c) #0 { +; CHECK-LABEL: reinterpret_reductions +; CHECK-NOT: convert +; CHECK-NOT: phi +; CHECK: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; CHECK-NOT: convert +; CHECK: ret entry: switch i32 %cond, label %br_phi_c [ @@ -153,13 +155,13 @@ ; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1) ; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets. -define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { -; OPT-LABEL: reinterpret_reductions_1 -; OPT: convert -; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] -; OPT-NOT: phi -; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) -; OPT: ret +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) #0 { +; CHECK-LABEL: reinterpret_reductions_1 +; CHECK: convert +; CHECK: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; CHECK-NOT: phi +; CHECK: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; CHECK: ret entry: switch i32 %cond, label %br_phi_c [ @@ -187,13 +189,13 @@ ; No transform. Similar to the the test above, but here only two of the arguments need to ; be converted to svbool. -define @reinterpret_reductions_2(i32 %cond, %a, %b, %c) { -; OPT-LABEL: reinterpret_reductions_2 -; OPT: convert -; OPT: phi [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ] -; OPT-NOT: phi -; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) -; OPT: ret +define @reinterpret_reductions_2(i32 %cond, %a, %b, %c) #0 { +; CHECK-LABEL: reinterpret_reductions_2 +; CHECK: convert +; CHECK: phi [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ] +; CHECK-NOT: phi +; CHECK: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; CHECK: ret entry: switch i32 %cond, label %br_phi_c [ @@ -220,12 +222,11 @@ ; Similar to reinterpret_reductions but the reinterprets remain because the ; original phi cannot be removed (i.e. prefer reinterprets over multiple phis). -define @reinterpret_reductions3(i32 %cond, %a, %b, %c) { -; OPT-LABEL: reinterpret_reductions3 -; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] -; OPT-NOT: phi -; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) -; OPT-NEXT: ret %pg +define @reinterpret_reductions3(i32 %cond, %a, %b, %c) #0 { +; CHECK-LABEL: reinterpret_reductions3 +; CHECK: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; CHECK-NOT: phi +; CHECK: ret %pg entry: switch i32 %cond, label %br_phi_c [ @@ -257,3 +258,5 @@ declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +attributes #0 = { "target-features"="+sve" }