diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -59,6 +59,12 @@ LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl &Dead); +/// findCommonDominator - Find a point in code which dominates all given +/// instructions. We can safely assume that, whatever fact we can prove at the +/// found point, this fact is also true for each of the given instructions. +Instruction *findCommonDominator(ArrayRef Instructions, + DominatorTree &DT); + /// Collect information about induction variables that are used by sign/zero /// extend operations. This information is recorded by CollectExtend and provides /// the input to WidenIV. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -102,6 +102,7 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" #include #include #include @@ -3668,7 +3669,6 @@ /// instruction past all of the instructions between it and the end of its /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->getSingleUndroppableUse() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. @@ -3822,51 +3822,67 @@ // See if we can trivially sink this instruction to its user if we can // prove that the successor is not executed more frequently than our block. - if (EnableCodeSinking) + if (EnableCodeSinking) { + BasicBlock *UserParent = nullptr; + BasicBlock *BB = I->getParent(); + if (Use *SingleUse = I->getSingleUndroppableUse()) { - BasicBlock *BB = I->getParent(); Instruction *UserInst = cast(SingleUse->getUser()); - BasicBlock *UserParent; // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) UserParent = PN->getIncomingBlock(*SingleUse); else UserParent = UserInst->getParent(); + } else { + SmallVector Uses; - // Try sinking to another block. If that block is unreachable, then do - // not bother. SimplifyCFG should handle it. - if (UserParent != BB && DT.isReachableFromEntry(UserParent)) { - // See if the user is one of our successors that has only one - // predecessor, so that we don't have to split the critical edge. - bool ShouldSink = UserParent->getUniquePredecessor() == BB; - // Another option where we can sink is a block that ends with a - // terminator that does not pass control to other block (such as - // return or unreachable). In this case: - // - I dominates the User (by SSA form); - // - the User will be executed at most once. - // So sinking I down to User is always profitable or neutral. - if (!ShouldSink) { - auto *Term = UserParent->getTerminator(); - ShouldSink = isa(Term) || isa(Term); - } - if (ShouldSink) { - assert(DT.dominates(BB, UserParent) && - "Dominance relation broken?"); - // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { - LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; - // We'll add uses of the sunk instruction below, but since sinking - // can expose opportunities for it's *operands* add them to the - // worklist - for (Use &U : I->operands()) - if (Instruction *OpI = dyn_cast(U.get())) - Worklist.push(OpI); - } + for (Use &U : I->uses()) { + if (PHINode *PN = dyn_cast(U.getUser())) + Uses.push_back(PN->getIncomingBlock(U)->getTerminator()); + else + Uses.push_back(dyn_cast(U.getUser())); + } + + if (!Uses.empty()) { + // Get the block where the common dominator of the users is locate. + Instruction *CommonDominator = findCommonDominator(Uses, DT); + UserParent = CommonDominator->getParent(); + } + } + + // Try sinking to another block. If that block is unreachable, then do + // not bother. SimplifyCFG should handle it. + if (UserParent != BB && DT.isReachableFromEntry(UserParent)) { + // See if the user is one of our successors that has only one + // predecessor, so that we don't have to split the critical edge. + bool ShouldSink = UserParent->getUniquePredecessor() == BB; + // Another option where we can sink is a block that ends with a + // terminator that does not pass control to other block (such as + // return or unreachable). In this case: + // - I dominates the User (by SSA form); + // - the User will be executed at most once. + // So sinking I down to User is always profitable or neutral. + if (!ShouldSink) { + auto *Term = UserParent->getTerminator(); + ShouldSink = isa(Term) || isa(Term); + } + if (ShouldSink) { + assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); + // Okay, the CFG is simple enough, try to sink this instruction. + if (TryToSinkInstruction(I, UserParent)) { + LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + MadeIRChange = true; + // We'll add uses of the sunk instruction below, but since sinking + // can expose opportunities for it's *operands* add them to the + // worklist + for (Use &U : I->operands()) + if (Instruction *OpI = dyn_cast(U.get())) + Worklist.push(OpI); } } } + } // Now that we have an instruction, try combining it to simplify it. Builder.SetInsertPoint(I); diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -99,24 +99,6 @@ }; } -/// Find a point in code which dominates all given instructions. We can safely -/// assume that, whatever fact we can prove at the found point, this fact is -/// also true for each of the given instructions. -static Instruction *findCommonDominator(ArrayRef Instructions, - DominatorTree &DT) { - Instruction *CommonDom = nullptr; - for (auto *Insn : Instructions) - if (!CommonDom || DT.dominates(Insn, CommonDom)) - CommonDom = Insn; - else if (!DT.dominates(CommonDom, Insn)) - // If there is no dominance relation, use common dominator. - CommonDom = - DT.findNearestCommonDominator(CommonDom->getParent(), - Insn->getParent())->getTerminator(); - assert(CommonDom && "Common dominator not found?"); - return CommonDom; -} - /// Fold an IV operand into its use. This removes increments of an /// aligned IV when used by a instruction that ignores the low bits. /// @@ -940,6 +922,24 @@ return Changed; } +/// Find a point in code which dominates all given instructions. We can safely +/// assume that, whatever fact we can prove at the found point, this fact is +/// also true for each of the given instructions. +Instruction *findCommonDominator(ArrayRef Instructions, + DominatorTree &DT) { + Instruction *CommonDom = nullptr; + for (auto *Insn : Instructions) + if (!CommonDom || DT.dominates(Insn, CommonDom)) + CommonDom = Insn; + else if (!DT.dominates(CommonDom, Insn)) + // If there is no dominance relation, use common dominator. + CommonDom = DT.findNearestCommonDominator(CommonDom->getParent(), + Insn->getParent()) + ->getTerminator(); + assert(CommonDom && "Common dominator not found?"); + return CommonDom; +} + } // namespace llvm //===----------------------------------------------------------------------===// diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -302,9 +302,9 @@ ; DEFAULT-LABEL: @nonnull3( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 -; DEFAULT-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null ; DEFAULT-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] ; DEFAULT: taken: +; DEFAULT-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null ; DEFAULT-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; DEFAULT-NEXT: ret i1 false ; DEFAULT: not_taken: diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll --- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll @@ -19,10 +19,10 @@ ; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL3]], [[TMP2]] ; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[AND]] to i32 ; CHECK-NEXT: [[TOBOOL7_NOT:%.*]] = icmp eq i32 [[CONV4]], 0 -; CHECK-NEXT: [[PHITMP:%.*]] = zext i1 [[TOBOOL7_NOT]] to i32 +; CHECK-NEXT: [[PHI_CAST:%.*]] = zext i1 [[TOBOOL7_NOT]] to i32 ; CHECK-NEXT: br label [[LOR_END]] ; CHECK: lor.end: -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[PHITMP]], [[LOR_RHS]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[LOR_RHS]] ] ; CHECK-NEXT: ret i32 [[TMP4]] ; entry: @@ -56,9 +56,9 @@ define void @PR33765(i8 %beth) { ; CHECK-LABEL: @PR33765( -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 ; CHECK-NEXT: br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]] ; CHECK: if.then9: +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] ; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[MUL]] to i16 diff --git a/llvm/test/Transforms/InstCombine/intptr7.ll b/llvm/test/Transforms/InstCombine/intptr7.ll --- a/llvm/test/Transforms/InstCombine/intptr7.ll +++ b/llvm/test/Transforms/InstCombine/intptr7.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define void @matching_phi(i64 %a, float* %b, i1 %cond) { ; CHECK-LABEL: @matching_phi( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float* ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BBB:%.*]], label [[A:%.*]] ; CHECK: A: ; CHECK-NEXT: [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2 ; CHECK-NEXT: br label [[C:%.*]] ; CHECK: Bbb: +; CHECK-NEXT: [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1 +; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float* ; CHECK-NEXT: store float 1.000000e+01, float* [[ADD]], align 4 ; CHECK-NEXT: br label [[C]] ; CHECK: C: @@ -48,18 +48,20 @@ ; CHECK-LABEL: @no_matching_phi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1 -; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float* ; CHECK-NEXT: [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[B:%.*]], label [[A:%.*]] ; CHECK: A: ; CHECK-NEXT: br label [[C:%.*]] ; CHECK: B: +; CHECK-NEXT: [[ADDB_INT:%.*]] = ptrtoint float* [[ADDB]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float* ; CHECK-NEXT: store float 1.000000e+01, float* [[ADD]], align 4 ; CHECK-NEXT: br label [[C]] ; CHECK: C: ; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[A]] ], [ [[ADD]], [[B]] ] -; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD]], [[A]] ], [ [[ADDB]], [[B]] ] -; CHECK-NEXT: [[I1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4 +; CHECK-NEXT: [[B_ADDR_02:%.*]] = phi i64 [ [[ADD_INT]], [[A]] ], [ [[ADDB_INT]], [[B]] ] +; CHECK-NEXT: [[I0:%.*]] = inttoptr i64 [[B_ADDR_02]] to float* +; CHECK-NEXT: [[I1:%.*]] = load float, float* [[I0]], align 4 ; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01 ; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll --- a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll +++ b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll @@ -11,17 +11,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TEXT:%.*]] = alloca [1 x i8], align 1 ; CHECK-NEXT: [[BUFF:%.*]] = alloca [1 x i8], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0 ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] ; CHECK-NEXT: br label [[FIN:%.*]] ; CHECK: else: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP1]]) ; CHECK-NEXT: call void @foo(i8* [[TMP1]], i8* [[TMP0]]) diff --git a/llvm/test/Transforms/InstCombine/lifetime.ll b/llvm/test/Transforms/InstCombine/lifetime.ll --- a/llvm/test/Transforms/InstCombine/lifetime.ll +++ b/llvm/test/Transforms/InstCombine/lifetime.ll @@ -11,17 +11,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TEXT:%.*]] = alloca [1 x i8], align 1 ; CHECK-NEXT: [[BUFF:%.*]] = alloca [1 x i8], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0 ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] ; CHECK-NEXT: br label [[FIN:%.*]] ; CHECK: else: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP0]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP1]]) ; CHECK-NEXT: call void @foo(i8* nonnull [[TMP1]], i8* nonnull [[TMP0]]) diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -15,7 +15,6 @@ ; CHECK-NEXT: [[I:%.*]] = load i8, i8* @var_7, align 1 ; CHECK-NEXT: [[I1:%.*]] = icmp eq i8 [[I]], -1 ; CHECK-NEXT: [[I4:%.*]] = load i16, i16* @var_0, align 2 -; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32 ; CHECK-NEXT: br i1 [[I1]], label [[BB10:%.*]], label [[BB9:%.*]] ; CHECK: bb9: ; CHECK-NEXT: br label [[BB12:%.*]] @@ -31,6 +30,7 @@ ; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] ; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 0), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 0), align 2 +; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32 ; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 0), align 16 ; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 1), align 2 diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -537,10 +537,10 @@ ; (icmp slt smax(PositiveA, B) 2) -> (icmp eq B 1) define i32 @clamp_check_for_no_infinite_loop3(i32 %i) { ; CHECK-LABEL: @clamp_check_for_no_infinite_loop3( -; CHECK-NEXT: [[I2:%.*]] = icmp sgt i32 [[I:%.*]], 1 -; CHECK-NEXT: [[I3:%.*]] = select i1 [[I2]], i32 [[I]], i32 1 ; CHECK-NEXT: br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]] ; CHECK: truelabel: +; CHECK-NEXT: [[I2:%.*]] = icmp sgt i32 [[I:%.*]], 1 +; CHECK-NEXT: [[I3:%.*]] = select i1 [[I2]], i32 [[I]], i32 1 ; CHECK-NEXT: [[I5:%.*]] = icmp slt i32 [[I3]], 2 ; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], i32 [[I3]], i32 2 ; CHECK-NEXT: [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2 diff --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll --- a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll +++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll @@ -14,11 +14,11 @@ ; CHECK-LABEL: @f( ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[T12:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: unreachable ; CHECK: bb2: +; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0 ; CHECK-NEXT: [[T9:%.*]] = load i16*, i16** @b, align 2 ; CHECK-NEXT: store i16 0, i16* [[T9]], align 2 ; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[T12_SUB]], align 8 diff --git a/llvm/test/Transforms/InstCombine/pr46680.ll b/llvm/test/Transforms/InstCombine/pr46680.ll --- a/llvm/test/Transforms/InstCombine/pr46680.ll +++ b/llvm/test/Transforms/InstCombine/pr46680.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = load i64, i64* @d, align 8 ; CHECK-NEXT: [[I1:%.*]] = icmp eq i64 [[I]], 0 -; CHECK-NEXT: [[I2:%.*]] = load i64, i64* @a, align 8 -; CHECK-NEXT: [[I3:%.*]] = icmp ne i64 [[I2]], 0 ; CHECK-NEXT: br i1 [[I1]], label [[BB13:%.*]], label [[BB4:%.*]] ; CHECK: bb4: +; CHECK-NEXT: [[I2:%.*]] = load i64, i64* @a, align 8 +; CHECK-NEXT: [[I3:%.*]] = icmp ne i64 [[I2]], 0 ; CHECK-NEXT: [[I5:%.*]] = load i16, i16* [[ARG:%.*]], align 2 ; CHECK-NEXT: [[I6:%.*]] = trunc i16 [[I5]] to i8 ; CHECK-NEXT: store i8 [[I6]], i8* @c, align 1 diff --git a/llvm/test/Transforms/InstCombine/shift-by-signext.ll b/llvm/test/Transforms/InstCombine/shift-by-signext.ll --- a/llvm/test/Transforms/InstCombine/shift-by-signext.ll +++ b/llvm/test/Transforms/InstCombine/shift-by-signext.ll @@ -69,11 +69,11 @@ define i32 @t6_twoshifts(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t6_twoshifts( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 ; CHECK-NEXT: br label [[WORK:%.*]] ; CHECK: work: ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: end: +; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 ; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] ; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]] ; CHECK-NEXT: ret i32 [[R]] @@ -151,11 +151,11 @@ } define i32 @n12_twoshifts_and_extrause(i32 %x, i8 %shamt) { ; CHECK-LABEL: @n12_twoshifts_and_extrause( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 ; CHECK-NEXT: br label [[WORK:%.*]] ; CHECK: work: ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: end: +; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 ; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] ; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]] ; CHECK-NEXT: call void @use32(i32 [[SHAMT_WIDE]]) diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll --- a/llvm/test/Transforms/InstCombine/sink_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll @@ -77,3 +77,35 @@ %sum.0 = phi i32 [ %add, %sw.bb ], [ 0, %entry ] ret i32 %sum.0 } + +declare void @f() + +declare void @g() + +declare void @use(i32) + +define void @test4(i32* %ptr, i1 %cond0) { +entry: +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 %cond0, label %split, label %end + %temp = load i32, i32* %ptr, align 4 + %cond1 = icmp eq i32 %temp, 0 + br i1 %cond0, label %split, label %end + +split: ; preds = %entry +; CHECK-LABEL: split: +; CHECK-NEXT: %temp = load i32, i32* %ptr, align 4 +; CHECK-NEXT: %cond1 = icmp eq i32 %temp, 0 + br i1 %cond1, label %body1, label %body2 + +body1: ; preds = %split + call void @f() + br label %end + +body2: ; preds = %split + call void @use(i32 %temp) + br label %end + +end: ; preds = %body2, %body1, %entry + ret void +} diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -6,12 +6,12 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) { ; CHECK-A55-LABEL: @runtime_unroll_generic( ; CHECK-A55-NEXT: entry: -; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef -; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef -; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-A55-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 ; CHECK-A55-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] ; CHECK-A55: for.body6.preheader: +; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef +; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef +; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 ; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3 ; CHECK-A55-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 diff --git a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll --- a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll +++ b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll @@ -17,12 +17,12 @@ ; CHECK-NEXT: store i32 0, i32* [[X]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[REM]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY_1:%.*]], label [[WHILE_END]] ; CHECK: while.end: ; CHECK-NEXT: ret void ; CHECK: while.body.1: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 ; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[TMP1]], 10 ; CHECK-NEXT: br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_END_1:%.*]] @@ -30,10 +30,10 @@ ; CHECK-NEXT: store i32 0, i32* [[INCDEC_PTR]], align 4 ; CHECK-NEXT: br label [[IF_END_1]] ; CHECK: if.end.1: -; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 ; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[REM]], 2 ; CHECK-NEXT: br i1 [[CMP_1]], label [[WHILE_BODY_2:%.*]], label [[WHILE_END]] ; CHECK: while.body.2: +; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 4 ; CHECK-NEXT: [[CMP1_2:%.*]] = icmp slt i32 [[TMP2]], 10 ; CHECK-NEXT: br i1 [[CMP1_2]], label [[IF_THEN_2:%.*]], label [[WHILE_END]] @@ -77,8 +77,8 @@ ; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[L86_OFF]], 24 ; CHECK-NEXT: [[DOTNOT30:%.*]] = icmp ne i32 [[L86]], 25 ; CHECK-NEXT: [[SPEC_SELECT24:%.*]] = zext i1 [[DOTNOT30]] to i32 -; CHECK-NEXT: [[COMMON_RET31_OP:%.*]] = select i1 [[SWITCH]], i32 0, i32 [[SPEC_SELECT24]] -; CHECK-NEXT: ret i32 [[COMMON_RET31_OP]] +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[SWITCH]], i32 0, i32 [[SPEC_SELECT24]] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] ; entry: br label %for.body.i.i diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -19,11 +19,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934576 ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_CRD]] @@ -52,7 +52,7 @@ ; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] @@ -72,7 +72,7 @@ ; CHECK-NEXT: store i16 [[TMP13]], i16* [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: while.end: ; CHECK-NEXT: ret void ; @@ -111,14 +111,14 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 31 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934560 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer @@ -143,7 +143,7 @@ ; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -179,7 +179,7 @@ ; CHECK-NEXT: store <8 x i8> [[TMP16]], <8 x i8>* [[TMP17]], align 2 ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX10]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC9]] -; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP14]], [[N_VEC9]] ; CHECK-NEXT: br i1 [[CMP_N20]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -199,7 +199,7 @@ ; CHECK-NEXT: store i8 [[TMP20]], i8* [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: while.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1357,11 +1357,11 @@ ; CHECK-NEXT: br i1 [[GUARD]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP2]], -4 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1380,7 +1380,7 @@ ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -20,14 +20,14 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 63 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934528 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFFSET:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer @@ -70,7 +70,7 @@ ; CHECK-NEXT: store <16 x i16> [[TMP13]], <16 x i16>* [[TMP20]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -106,7 +106,7 @@ ; CHECK-NEXT: store <8 x i16> [[TMP26]], <8 x i16>* [[TMP27]], align 2 ; CHECK-NEXT: [[INDEX_NEXT21]] = add nuw i64 [[INDEX20]], 8 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC19]] -; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP24]], [[N_VEC19]] ; CHECK-NEXT: br i1 [[CMP_N30]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -126,7 +126,7 @@ ; CHECK-NEXT: store i16 [[TMP30]], i16* [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: while.end: ; CHECK-NEXT: ret void ; @@ -166,14 +166,14 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 127 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934464 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[OFFSET:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i8> [[BROADCAST_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer @@ -216,7 +216,7 @@ ; CHECK-NEXT: store <32 x i8> [[TMP13]], <32 x i8>* [[TMP20]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -252,7 +252,7 @@ ; CHECK-NEXT: store <16 x i8> [[TMP26]], <16 x i8>* [[TMP27]], align 2 ; CHECK-NEXT: [[INDEX_NEXT21]] = add nuw i64 [[INDEX20]], 16 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC19]] -; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP24]], [[N_VEC19]] ; CHECK-NEXT: br i1 [[CMP_N30]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -272,7 +272,7 @@ ; CHECK-NEXT: store i8 [[TMP30]], i8* [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: while.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -32,15 +32,15 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP1]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> undef), !alias.scope !3 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> undef), !alias.scope !3 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -60,15 +60,15 @@ ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX14]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT19]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT21]], <8 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT21]], <8 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX14]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]] -; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]] +; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[WIDE_MASKED_GATHER22:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT19]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef) -; CHECK-NEXT: [[PREDPHI23:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER22]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT19]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER22:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT19]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) +; CHECK-NEXT: [[PREDPHI23:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER22]], <8 x i32> ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[SMAX11]], [[N_VEC13]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI23]], i32 7 ; CHECK-NEXT: br i1 [[CMP_N16]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -76,10 +76,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH5_PREHEADER:%.*]], label [[DOTPREHEADER:%.*]] ; CHECK: .lr.ph5.preheader: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0 @@ -136,10 +136,10 @@ ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[N]], 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]] ; CHECK: vector.ph10: +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 ; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4 ; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[N_RND_UP11]], 8589934588 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i32 0 @@ -266,10 +266,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -496,11 +496,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] @@ -1297,11 +1297,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 @@ -1385,11 +1385,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP3]], 3 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll @@ -300,22 +300,22 @@ define void @multiply_dont_hoist_phi(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { ; CHECK-LABEL: @multiply_dont_hoist_phi( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A:%.*]], i64 0, i64 2 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 -; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B:%.*]], i64 0, i64 2 ; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 -; CHECK-NEXT: br label [[NEXT:%.*]] -; CHECK: next: ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -22,36 +22,36 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP7:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP7:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x double>, <16 x double>* [[A:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP2]], i64 4 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x double>, <16 x double>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP7]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP6]]) -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP9]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP8]]) -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP7]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP9]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP8]]) ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] @@ -62,10 +62,10 @@ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <16 x double>, <16 x double>* [[C:%.*]], i64 0, i64 [[TMP11]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP12]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP12]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -104,40 +104,40 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 2 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP6]] +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP10]] +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] @@ -148,10 +148,10 @@ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -196,40 +196,40 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 4 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i64>, <16 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP6]] +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 4 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP10]] +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] @@ -240,10 +240,10 @@ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <32 x i64>, <32 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -323,36 +323,36 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP19:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP19:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[INNER_IV]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP5]], i64 0, i64 [[TMP13]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP14]] to <2 x float>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 4 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP14]], i64 2 ; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast float* [[VEC_GEP]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST8]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[COLS_IV]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], [[INNER_IV]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP11]], i64 0, i64 [[TMP16]] -; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP17]] to <2 x float>* -; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST11]], align 4 ; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr float, float* [[TMP17]], i64 2 ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast float* [[VEC_GEP13]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST14]], align 4 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP19]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP18]]) -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP21]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP20]]) -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP14]] to <2 x float>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 4 +; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP19]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP18]]) +; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP17]] to <2 x float>* +; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST11]], align 4 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP21]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP20]]) ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] @@ -363,10 +363,10 @@ ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr <4 x float>, <4 x float>* [[C]], i64 0, i64 [[TMP23]] ; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast float* [[TMP24]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[VEC_CAST28]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST28]], align 8 ; CHECK-NEXT: [[VEC_GEP29:%.*]] = getelementptr float, float* [[TMP24]], i64 2 ; CHECK-NEXT: [[VEC_CAST30:%.*]] = bitcast float* [[VEC_GEP29]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST30]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[VEC_CAST30]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -1381,10 +1381,6 @@ ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 -; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2 -; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]] -; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 -; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof [[PROF16]] ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 @@ -1393,6 +1389,10 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]] +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof [[PROF16]] ; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] ; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof [[PROF16]] ; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -90,10 +90,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [225 x double]* [[A:%.*]] to <225 x double>* ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* ; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0 ; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>* ; CHECK-NEXT: [[DOTPRE_PRE:%.*]] = load <225 x double>, <225 x double>* [[TMP1]], align 8 ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll @@ -139,9 +139,9 @@ define i8 @k_bb(<4 x i8> %x) { ; CHECK-LABEL: @k_bb( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3 ; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1 ; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll @@ -139,9 +139,9 @@ define i8 @k_bb(<4 x i8> %x) { ; CHECK-LABEL: @k_bb( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3 ; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1 ; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2