diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6647,7 +6647,7 @@ } static bool programUndefinedIfUndefOrPoison(const Value *V, - bool PoisonOnly); + bool PoisonOnly, const Instruction *Ctx = nullptr); static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, @@ -6733,7 +6733,7 @@ I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) return true; - if (programUndefinedIfUndefOrPoison(V, PoisonOnly)) + if (programUndefinedIfUndefOrPoison(V, PoisonOnly, CtxI)) return true; // CxtI may be null or a cloned instruction. @@ -7056,7 +7056,7 @@ } static bool programUndefinedIfUndefOrPoison(const Value *V, - bool PoisonOnly) { + bool PoisonOnly, const Instruction *CtxI) { // We currently only look for uses of values within the same basic // block, as that makes it easier to guarantee that the uses will be // executed given that Inst is executed. @@ -7066,7 +7066,10 @@ // post-dominance. const BasicBlock *BB = nullptr; BasicBlock::const_iterator Begin; - if (const auto *Inst = dyn_cast(V)) { + if (CtxI) { + BB = CtxI->getParent(); + Begin = BB->begin(); + } else if (const auto *Inst = dyn_cast(V)) { BB = Inst->getParent(); Begin = Inst->getIterator(); Begin++; diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -484,10 +484,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[C]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @use2(ptr nonnull dereferenceable(8) [[A]], ptr nonnull dereferenceable(8) [[B]]) #[[ATTR1]] +; CHECK-NEXT: tail call void @use2(ptr noundef nonnull dereferenceable(8) [[A]], ptr noundef nonnull dereferenceable(8) [[B]]) #[[ATTR1]] ; CHECK-NEXT: ret void ; CHECK: if.else: -; CHECK-NEXT: tail call void @use2(ptr nonnull dereferenceable(4) [[A]], ptr [[B]]) #[[ATTR1]] +; CHECK-NEXT: tail call void @use2(ptr noundef nonnull dereferenceable(4) [[A]], ptr [[B]]) #[[ATTR1]] ; CHECK-NEXT: ret void ; %cmp = icmp eq i8 %c, 0 diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -748,7 +748,7 @@ ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CGSCC: if.then: ; CGSCC-NEXT: tail call void @only_store(ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] -; CGSCC-NEXT: tail call void @make_alias(ptr nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] +; CGSCC-NEXT: tail call void @make_alias(ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: ; CGSCC-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 @@ -909,7 +909,7 @@ ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; CGSCC: l1: -; CGSCC-NEXT: tail call void @make_alias(ptr nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] +; CGSCC-NEXT: tail call void @make_alias(ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L2]] ; CGSCC: l2: ; CGSCC-NEXT: tail call void @only_store(ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -406,7 +406,7 @@ ; TUNIT-NEXT: [[TMP5C:%.*]] = getelementptr inbounds i32, ptr [[TMP5B]], i64 -1 ; TUNIT-NEXT: br label [[BB9]] ; TUNIT: bb6: -; TUNIT-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR14]] +; TUNIT-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret ptr [[TMP7]] ; TUNIT: bb9: ; TUNIT-NEXT: [[TMP10:%.*]] = phi ptr [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to ptr), [[BB:%.*]] ] @@ -428,7 +428,7 @@ ; CGSCC-NEXT: [[TMP5C:%.*]] = getelementptr inbounds i32, ptr [[TMP5B]], i64 -1 ; CGSCC-NEXT: br label [[BB9]] ; CGSCC: bb6: -; CGSCC-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR14]] +; CGSCC-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR14]] ; CGSCC-NEXT: ret ptr [[TMP7]] ; CGSCC: bb9: ; CGSCC-NEXT: [[TMP10:%.*]] = phi ptr [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to ptr), [[BB:%.*]] ] @@ -462,14 +462,14 @@ define internal ptr @f2(ptr %arg) { ; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f2 -; TUNIT-SAME: (ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6]] { +; TUNIT-SAME: (ptr nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP:%.*]] = tail call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret ptr [[TMP]] ; ; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f2 -; CGSCC-SAME: (ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5]] { +; CGSCC-SAME: (ptr nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: bb: ; CGSCC-NEXT: [[TMP:%.*]] = tail call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR14]] ; CGSCC-NEXT: ret ptr [[TMP]] diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -76,12 +76,12 @@ ; TUNIT-NEXT: store i32 3, ptr [[R0]], align 4 ; TUNIT-NEXT: store i32 5, ptr [[R1]], align 4 ; TUNIT-NEXT: store i32 1, ptr [[W0]], align 4 -; TUNIT-NEXT: [[CALL:%.*]] = call ptr @internal_ret1_rrw(ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL1:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL2:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL3:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], ptr nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL4:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], ptr nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]] -; TUNIT-NEXT: [[CALL5:%.*]] = call ptr @internal_ret0_nw(ptr nofree [[N0]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL:%.*]] = call ptr @internal_ret1_rrw(ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL1:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL2:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL3:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL4:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]] +; TUNIT-NEXT: [[CALL5:%.*]] = call ptr @internal_ret0_nw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] ; TUNIT-NEXT: br label [[RETURN]] ; TUNIT: return: ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ] @@ -101,12 +101,12 @@ ; CGSCC-NEXT: store i32 3, ptr [[R0]], align 4 ; CGSCC-NEXT: store i32 5, ptr [[R1]], align 4 ; CGSCC-NEXT: store i32 1, ptr [[W0]], align 4 -; CGSCC-NEXT: [[CALL:%.*]] = call ptr @internal_ret1_rrw(ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL1:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL2:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL3:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], ptr nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4:[0-9]+]] -; CGSCC-NEXT: [[CALL4:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], ptr nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] -; CGSCC-NEXT: [[CALL5:%.*]] = call ptr @internal_ret0_nw(ptr nofree [[N0]], ptr nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] +; CGSCC-NEXT: [[CALL:%.*]] = call ptr @internal_ret1_rrw(ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] +; CGSCC-NEXT: [[CALL1:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] +; CGSCC-NEXT: [[CALL2:%.*]] = call ptr @external_ret2_nrw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] +; CGSCC-NEXT: [[CALL3:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL4:%.*]] = call ptr @external_sink_ret2_nrw(ptr nofree [[N0]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], ptr nofree noundef nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL5:%.*]] = call ptr @internal_ret0_nw(ptr nofree [[N0]], ptr nofree noundef nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: ; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ] diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -2572,7 +2572,7 @@ ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 -; TUNIT-NEXT: call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]] +; TUNIT-NEXT: call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]] ; TUNIT-NEXT: ret i32 [[TMP0]] ; ; CGSCC-LABEL: define {{[^@]+}}@conditional_unknown_alloc @@ -2586,7 +2586,7 @@ ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4 -; CGSCC-NEXT: call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR23]] +; CGSCC-NEXT: call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR23]] ; CGSCC-NEXT: ret i32 [[TMP0]] ; entry: diff --git a/llvm/test/Transforms/LICM/speculate-div.ll b/llvm/test/Transforms/LICM/speculate-div.ll --- a/llvm/test/Transforms/LICM/speculate-div.ll +++ b/llvm/test/Transforms/LICM/speculate-div.ll @@ -3,6 +3,7 @@ declare void @maythrow() declare void @use(i16) +declare void @use.i32(i32) define void @sdiv_not_ok(i16 %n, i16 noundef %xx) { ; CHECK-LABEL: @sdiv_not_ok( @@ -115,6 +116,31 @@ br label %loop } +define void @zext_sdiv_not_ok3_maybe_poison_denum(i32 noundef %nn, i16 %xx) { +; CHECK-LABEL: @zext_sdiv_not_ok3_maybe_poison_denum( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = and i32 [[NN:%.*]], 123 +; CHECK-NEXT: [[X:%.*]] = or i16 [[XX:%.*]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[X]] to i32 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @maythrow() +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[N]], [[ZEXT]] +; CHECK-NEXT: call void @use.i32(i32 [[DIV]]) +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %n = and i32 %nn, 123 + %x = or i16 %xx, 1 + br label %loop +loop: + call void @maythrow() + %zext = zext i16 %x to i32 + %div = sdiv i32 %n, %zext + call void @use.i32(i32 %div) + br label %loop +} + define void @sdiv_not_ok3_maybe_poison_num(i16 %nn, i16 noundef %xx) { ; CHECK-LABEL: @sdiv_not_ok3_maybe_poison_num( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -12,19 +12,19 @@ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[MATRIXEXT:%.*]] = load double, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[MATRIXEXT:%.*]] = load double, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[CONV2:%.*]] = zext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV2]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP5]] -; CHECK-NEXT: [[MATRIXEXT4:%.*]] = load double, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[MATRIXEXT4:%.*]] = load double, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]] -; CHECK-NEXT: store double [[SUB]], ptr [[TMP9]], align 8 +; CHECK-NEXT: store double [[SUB]], ptr [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -83,99 +83,39 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) { ; CHECK-LABEL: @matrix_extract_insert_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP212_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 +; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 +; CHECK-NEXT: [[J_012_US:%.*]] = phi i32 [ [[INC13_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CONV5_US:%.*]] = zext i32 [[J_012_US]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[CONV5_US]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV6]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[CONV6]] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[K_013_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] -; CHECK-NEXT: [[CONV_US:%.*]] = zext i32 [[K_013_US]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[K_013_US]], 225 +; CHECK-NEXT: [[K_011_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[CONV_US:%.*]] = zext i32 [[K_011_US]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV_US]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 225 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[CONV_US]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP3]] ; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <225 x double>, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = extractelement <225 x double> [[TMP6]], i64 [[TMP1]] ; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[CONV_US]] -; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = extractelement <225 x double> [[TMP6]], i64 [[TMP3]] ; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] -; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[K_013_US]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP7]], align 8 +; CHECK-NEXT: [[INC_US]] = add nuw i32 [[K_011_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i32 [[INC_US]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] +; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[CONV6]], 15 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[I]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP7]] -; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] -; CHECK: for.body4.us.1: -; CHECK-NEXT: [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ] -; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15 -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[NARROW]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 [[K_013_US_1]], 210 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, ptr [[TMP9]], align 8 -; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP10]] -; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, ptr [[TMP13]], align 8 -; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]] -; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[INC_US_1]] = add nuw nsw i32 [[K_013_US_1]], 1 -; CHECK-NEXT: [[CMP2_US_1:%.*]] = icmp ult i32 [[INC_US_1]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]] -; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: -; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[CONV6]], 30 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[I]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP14]] -; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] -; CHECK: for.body4.us.2: -; CHECK-NEXT: [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30 -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[NARROW17]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]]) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, ptr [[TMP19]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, ptr [[TMP16]], align 8 -; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP17]] -; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, ptr [[TMP20]], align 8 -; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]] -; CHECK-NEXT: store double [[SUB_US_2]], ptr [[TMP20]], align 8 -; CHECK-NEXT: [[INC_US_2]] = add nuw nsw i32 [[K_013_US_2]], 1 -; CHECK-NEXT: [[CMP2_US_2:%.*]] = icmp ult i32 [[INC_US_2]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]] -; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[CONV6]], 45 -; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i32 [[I]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP21]] -; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] -; CHECK: for.body4.us.3: -; CHECK-NEXT: [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW18:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45 -; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[NARROW18]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, ptr [[TMP26]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, ptr [[TMP23]], align 8 -; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP24]] -; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, ptr [[TMP27]], align 8 -; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]] -; CHECK-NEXT: store double [[SUB_US_3]], ptr [[TMP27]], align 8 -; CHECK-NEXT: [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1 -; CHECK-NEXT: [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]] -; CHECK-NEXT: br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]] +; CHECK-NEXT: [[INC13_US]] = add nuw nsw i32 [[J_012_US]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i32 [[J_012_US]], 3 +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll @@ -71,14 +71,13 @@ ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]]) ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 [[IDX_1]] -; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 [[IDX_2]] -; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]] ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[IDX_1]] -; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[IDX_1]] +; CHECK-NEXT: store double [[SUB]], ptr [[TMP0]], align 8 ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -113,9 +113,9 @@ ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: +; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: