Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -769,6 +769,7 @@ LoopInfo &LI; DebugLoc DL; int Alignment; + bool UnorderedAtomic; AAMDNodes AATags; Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { @@ -794,10 +795,12 @@ SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, + bool UnorderedAtomic, const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(dl), Alignment(alignment), AATags(AATags) {} + LI(li), DL(dl), Alignment(alignment), + UnorderedAtomic(UnorderedAtomic),AATags(AATags) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -821,6 +824,8 @@ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); + if (UnorderedAtomic) + NewSI->setOrdering(AtomicOrdering::Unordered); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); @@ -889,6 +894,9 @@ // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; + // Keep track of which types of access we see + bool SawUnorderedAtomic = false; + bool SawNotAtomic = false; AAMDNodes AATags; bool HasDedicatedExits = CurLoop->hasDedicatedExits(); @@ -916,16 +924,21 @@ // it. if (const LoadInst *Load = dyn_cast(UI)) { assert(!Load->isVolatile() && "AST broken"); - if (!Load->isSimple()) - return Changed; + if (!Load->isUnordered()) + return Changed; + SawUnorderedAtomic |= Load->isAtomic(); + SawNotAtomic |= !Load->isAtomic(); } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; assert(!Store->isVolatile() && "AST broken"); - if (!Store->isSimple()) + if (!Store->isUnordered()) return Changed; + SawUnorderedAtomic |= Store->isAtomic(); + SawNotAtomic |= !Store->isAtomic(); + // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, // make sure we catch that. An additional load may be generated in the @@ -947,6 +960,8 @@ if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { GuaranteedToExecute = true; Alignment = InstAlignment; + assert((!Store->isAtomic() || Alignment > 0) && + "atomic alignment can't be zero"); } if (!GuaranteedToExecute) @@ -968,6 +983,13 @@ } } + // If we found both an unordered atomic instruction and a non-atomic memory + // access, bail. We can't blindly promote non-atomic to atomic since we + // might not be able to lower the result. We can't downgrade since that + // would violate memory model. Also, align 0 is an error for atomics. + if (SawUnorderedAtomic && SawNotAtomic) + return Changed; + // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return Changed; @@ -1003,13 +1025,16 @@ SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags); + InsertPts, PIC, *CurAST, *LI, DL, Alignment, + SawUnorderedAtomic, AATags); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + if (SawUnorderedAtomic) + PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); Index: test/Transforms/LICM/atomics.ll =================================================================== --- test/Transforms/LICM/atomics.ll +++ test/Transforms/LICM/atomics.ll @@ -59,8 +59,7 @@ ; CHECK-NEXT: br label %loop } -; Don't try to "sink" unordered stores yet; it is legal, but the machinery -; isn't there. +; We can sink an unordered store define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { entry: br label %loop @@ -74,6 +73,149 @@ end: ret i32 %vala ; CHECK-LABEL: define i32 @test4( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NOT: store +; CHECK-LABEL: end: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %vala +; CHECK: store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4 +} + +; We currently don't handle ordered atomics. +define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x release, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test5( ; CHECK: load atomic i32, i32* %y monotonic ; CHECK-NEXT: store atomic } + +; We currently don't touch volatiles +define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store volatile i32 %vala, i32* %x, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store volatile +} + +; We currently don't touch volatiles +define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic volatile i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6b( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic volatile +} + +; Mixing unorder atomics and normal loads/stores is +; current unimplemented +define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7( +; CHECK: store i32 5, i32* %x +; CHECK-NEXT: load atomic i32, i32* %y +; CHECK-NEXT: store atomic i32 +} + +; Three provably noalias locations - we can sink normal and unordered, but +; not monotonic +define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %z unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7b( +; CHECK: load atomic i32, i32* %y monotonic + +; CHECK-LABEL: end: +; CHECK: store i32 5, i32* %x +; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4 +} + + +define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + fence release + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test8( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic +; CHECK-NEXT: fence +} + +; Exact semantics of monotonic accesses are a bit vague in the C++ spec, +; for the moment, be conservative and don't touch them. +define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x monotonic, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test9( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic i32 %vala, i32* %x monotonic, align 4 +} Index: test/Transforms/LICM/scalar_promote.ll =================================================================== --- test/Transforms/LICM/scalar_promote.ll +++ test/Transforms/LICM/scalar_promote.ll @@ -185,6 +185,34 @@ ; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0 } + +define void @test7(i32 %i) { +Entry: + br label %Loop +; CHECK-LABEL: @test7( +; CHECK: Entry: +; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4 +; CHECK-NEXT: br label %Loop + + +Loop: ; preds = %Loop, %0 + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] ; [#uses=1] + %x = load atomic i32, i32* @X unordered, align 4 + %x2 = add i32 %x, 1 + store atomic i32 %x2, i32* @X unordered, align 4 + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +; CHECK: Out: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 +; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4 +; CHECK-NEXT: ret void + +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}