diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -172,9 +172,17 @@ return true; GS.StoredType = GlobalStatus::Stored; } else if (const auto *CB = dyn_cast(I)) { - if (!CB->isCallee(&U)) - return true; - GS.IsLoaded = true; + const IntrinsicInst *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::masked_load) { + if (II->isVolatile()) + return true; + GS.Ordering = + strongerOrdering(GS.Ordering, AtomicOrdering::NotAtomic); + } else { + if (!CB->isCallee(&U)) + return true; + GS.IsLoaded = true; + } } else { return true; // Any other non-load instruction might take address! } diff --git a/llvm/test/Transforms/GlobalOpt/masked-load-global.ll b/llvm/test/Transforms/GlobalOpt/masked-load-global.ll --- a/llvm/test/Transforms/GlobalOpt/masked-load-global.ll +++ b/llvm/test/Transforms/GlobalOpt/masked-load-global.ll @@ -11,6 +11,7 @@ ; Function Attrs: nofree norecurse nosync nounwind uwtable vscale_range(1,16) define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +; ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.experimental.stepvector.nxv2i64() @@ -50,24 +51,20 @@ ; CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* @lPtr, i64 0, i64 [[INDVARS_IV_I]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX12_I]], align 4 ; CHECK-NEXT: [[IDXPROM13_I:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[ARRAYIDX14_I:%.*]] = getelementptr inbounds [16 x double], [16 x double]* @psiPtr, i64 0, i64 [[IDXPROM13_I]] -; CHECK-NEXT: [[TMP14:%.*]] = load double, double* [[ARRAYIDX14_I]], align 8 -; CHECK-NEXT: [[MUL15_I:%.*]] = fmul contract double [[TMP12]], [[TMP14]] +; CHECK-NEXT: [[MUL15_I:%.*]] = fmul contract double [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[ARRAYIDX17_I:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* @uPtr, i64 0, i64 [[INDVARS_IV_I]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX17_I]], align 4 -; CHECK-NEXT: [[IDXPROM18_I:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX17_I]], align 4 +; CHECK-NEXT: [[IDXPROM18_I:%.*]] = sext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[ARRAYIDX19_I:%.*]] = getelementptr inbounds [16 x double], [16 x double]* @ApsiPtr, i64 0, i64 [[IDXPROM18_I]] -; CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[ARRAYIDX19_I]], align 8 -; CHECK-NEXT: [[ADD_I:%.*]] = fadd contract double [[MUL15_I]], [[TMP16]] +; CHECK-NEXT: [[TMP15:%.*]] = load double, double* [[ARRAYIDX19_I]], align 8 +; CHECK-NEXT: [[ADD_I:%.*]] = fadd contract double [[MUL15_I]], [[TMP15]] ; CHECK-NEXT: store double [[ADD_I]], double* [[ARRAYIDX19_I]], align 8 ; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [16 x double], [16 x double]* @upperPtr, i64 0, i64 [[INDVARS_IV_I]] -; CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[ARRAYIDX21_I]], align 8 -; CHECK-NEXT: [[ARRAYIDX25_I:%.*]] = getelementptr inbounds [16 x double], [16 x double]* @psiPtr, i64 0, i64 [[IDXPROM18_I]] -; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX25_I]], align 8 -; CHECK-NEXT: [[MUL26_I:%.*]] = fmul contract double [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[ARRAYIDX21_I]], align 8 +; CHECK-NEXT: [[MUL26_I:%.*]] = fmul contract double [[TMP16]], 0.000000e+00 ; CHECK-NEXT: [[ARRAYIDX30_I:%.*]] = getelementptr inbounds [16 x double], [16 x double]* @ApsiPtr, i64 0, i64 [[IDXPROM13_I]] -; CHECK-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX30_I]], align 8 -; CHECK-NEXT: [[ADD31_I:%.*]] = fadd contract double [[TMP19]], [[MUL26_I]] +; CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[ARRAYIDX30_I]], align 8 +; CHECK-NEXT: [[ADD31_I:%.*]] = fadd contract double [[TMP17]], [[MUL26_I]] ; CHECK-NEXT: store double [[ADD31_I]], double* [[ARRAYIDX30_I]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nuw nsw i64 [[INDVARS_IV_I]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_I]], 16