Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -786,7 +786,10 @@ DebugPassStructure, /*VerifyEach*/ false, PrintPassOpts); SI.registerCallbacks(PIC, &FAM); - PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); + bool AllowDataRaces = false; + if (LangOpts.getThreadModel() == LangOptions::ThreadModelKind::Single) + AllowDataRaces = true; + PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC, AllowDataRaces); // Enable verify-debuginfo-preserve-each for new PM. DebugifyEachInstrumentation Debugify; Index: llvm/include/llvm/Passes/PassBuilder.h =================================================================== --- llvm/include/llvm/Passes/PassBuilder.h +++ llvm/include/llvm/Passes/PassBuilder.h @@ -96,6 +96,7 @@ PipelineTuningOptions PTO; Optional PGOOpt; PassInstrumentationCallbacks *PIC; + bool AllowDataRaces; public: /// A struct to capture parsed pass pipeline names. @@ -114,7 +115,8 @@ explicit PassBuilder(TargetMachine *TM = nullptr, PipelineTuningOptions PTO = PipelineTuningOptions(), Optional PGOOpt = None, - PassInstrumentationCallbacks *PIC = nullptr); + PassInstrumentationCallbacks *PIC = nullptr, + bool AllowDataRaces = false); /// Cross register the analysis managers through their proxies. /// Index: llvm/include/llvm/Transforms/Scalar/LICM.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/LICM.h +++ llvm/include/llvm/Transforms/Scalar/LICM.h @@ -49,6 +49,7 @@ unsigned MssaOptCap; unsigned MssaNoAccForPromotionCap; bool AllowSpeculation; + bool AllowDataRaces; LICMOptions() : MssaOptCap(SetLicmMssaOptCap), @@ -60,6 +61,12 @@ : MssaOptCap(MssaOptCap), MssaNoAccForPromotionCap(MssaNoAccForPromotionCap), AllowSpeculation(AllowSpeculation) {} + + LICMOptions(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap, + bool AllowSpeculation, bool AllowDataRaces) + : MssaOptCap(MssaOptCap), + MssaNoAccForPromotionCap(MssaNoAccForPromotionCap), + AllowSpeculation(AllowSpeculation), AllowDataRaces(AllowDataRaces) {} }; /// Performs Loop Invariant Code Motion Pass. @@ -71,6 +78,10 @@ bool AllowSpeculation) : LICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap, AllowSpeculation)) {} + LICMPass(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap, + bool AllowSpeculation, bool AllowDataRaces) + : LICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap, + AllowSpeculation, AllowDataRaces)) {} LICMPass(LICMOptions Opts) : Opts(Opts) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -212,7 +212,7 @@ SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *, bool AllowSpeculation); + OptimizationRemarkEmitter *, bool AllowSpeculation, bool AllowDataRaces); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -390,8 +390,9 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, Optional PGOOpt, - PassInstrumentationCallbacks *PIC) - : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) { + PassInstrumentationCallbacks *PIC, bool AllowDataRaces) + : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC), + AllowDataRaces(AllowDataRaces) { if (TM) TM->registerPassBuilderCallbacks(*this); if (PIC && shouldPopulateClassToPassNames()) { Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -297,13 +297,13 @@ // after loop rotation. // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/false)); + /*AllowSpeculation=*/false, AllowDataRaces)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true)); + /*AllowSpeculation=*/true, AllowDataRaces)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); @@ -479,14 +479,14 @@ // after loop rotation. // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/false)); + /*AllowSpeculation=*/false, AllowDataRaces)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true)); + /*AllowSpeculation=*/true, AllowDataRaces)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); @@ -584,7 +584,7 @@ FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true), + /*AllowSpeculation=*/true, AllowDataRaces), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); @@ -1048,7 +1048,7 @@ ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true)); + /*AllowSpeculation=*/true, AllowDataRaces)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( @@ -1117,7 +1117,7 @@ RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true), + /*AllowSpeculation=*/true, AllowDataRaces), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } @@ -1674,7 +1674,7 @@ FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true), + /*AllowSpeculation=*/true, AllowDataRaces), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -109,6 +109,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt ThreadModelSingle("thread-model-single", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -190,10 +194,19 @@ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), LicmAllowSpeculation(LicmAllowSpeculation) {} + LoopInvariantCodeMotion(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation, bool LicmAllowDataRaces) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation), + LicmAllowDataRaces(LicmAllowDataRaces) {} + private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; bool LicmAllowSpeculation; + bool LicmAllowDataRaces; }; struct LegacyLICMPass : public LoopPass { @@ -267,7 +280,7 @@ OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, - Opts.AllowSpeculation); + Opts.AllowSpeculation, Opts.AllowDataRaces); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); @@ -303,7 +316,7 @@ OptimizationRemarkEmitter ORE(LN.getParent()); LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, - Opts.AllowSpeculation); + Opts.AllowSpeculation, Opts.AllowDataRaces); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -487,7 +500,8 @@ collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); + DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation, + LicmAllowDataRaces); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1910,7 +1924,8 @@ SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { + OptimizationRemarkEmitter *ORE, bool AllowSpeculation, + bool AllowDataRaces) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2111,7 +2126,7 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || AllowDataRaces || ThreadModelSingle) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -thread-model-single -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP31:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[V_PROMOTED]], [[TMP5]] ], [ [[TMP25:%.*]], [[TMP24:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP27:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP17:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP26:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP29:%.*]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[TMP24]], label [[TMP22:%.*]] +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP13]], 1 +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25]] = phi i32 [ [[TMP10]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP26]] = phi i32 [ [[TMP13]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP27]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP27]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29]], label [[TMP9]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[TMP25]], [[TMP24]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[TMP24]] ], [ [[TMP17]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[TMP30]], ptr @v, align 1 +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP31]] +; CHECK: 31: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +} Index: llvm/test/Transforms/LICM/without-allow-data-race.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/without-allow-data-race.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP28:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP25:%.*]], [[TMP23:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP16:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP24:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16]] = add nsw i32 [[TMP11]], 1 +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP17:%.*]], label [[TMP27:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP23]], label [[TMP21:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: store i32 [[TMP22]], ptr @v, align 4 +; CHECK-NEXT: br label [[TMP23]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24]] = phi i32 [ [[TMP12]], [[TMP17]] ], [ [[TMP22]], [[TMP21]] ] +; CHECK-NEXT: [[TMP25]] = add nuw nsw i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP26]], label [[TMP27]], label [[TMP9]] +; CHECK: 27: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP16]], [[TMP23]] ], [ [[TMP16]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +}