Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -917,6 +917,7 @@ LoopInfo &LI; DebugLoc DL; int Alignment; + bool UnorderedAtomic; AAMDNodes AATags; Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { @@ -940,10 +941,11 @@ SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, - const AAMDNodes &AATags) + bool UnorderedAtomic, const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {} + LI(li), DL(std::move(dl)), Alignment(alignment), + UnorderedAtomic(UnorderedAtomic),AATags(AATags) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -967,6 +969,8 @@ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); + if (UnorderedAtomic) + NewSI->setOrdering(AtomicOrdering::Unordered); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (AATags) @@ -1057,6 +1061,9 @@ // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; + // Keep track of which types of access we see + bool SawUnorderedAtomic = false; + bool SawNotAtomic = false; AAMDNodes AATags; const DataLayout &MDL = Preheader->getModule()->getDataLayout(); @@ -1114,8 +1121,11 @@ // it. if (LoadInst *Load = dyn_cast(UI)) { assert(!Load->isVolatile() && "AST broken"); - if (!Load->isSimple()) + if (!Load->isUnordered()) return false; + + SawUnorderedAtomic |= Load->isAtomic(); + SawNotAtomic |= !Load->isAtomic(); if (!DereferenceableInPH) DereferenceableInPH = isSafeToExecuteUnconditionally( @@ -1126,9 +1136,12 @@ if (UI->getOperand(1) != ASIV) continue; assert(!Store->isVolatile() && "AST broken"); - if (!Store->isSimple()) + if (!Store->isUnordered()) return false; + SawUnorderedAtomic |= Store->isAtomic(); + SawNotAtomic |= !Store->isAtomic(); + // If the store is guaranteed to execute, both properties are satisfied. // We may want to check if a store is guaranteed to execute even if we // already know that promotion is safe, since it may have higher @@ -1181,6 +1194,12 @@ } } + // If we found both an unordered atomic instruction and a non-atomic memory + // access, bail. We can't blindly promote non-atomic to atomic since we + // might not be able to lower the result. We can't downgrade since that + // would violate memory model. Also, align 0 is an error for atomics. + if (SawUnorderedAtomic && SawNotAtomic) + return false; // If we couldn't prove we can hoist the load, bail. if (!DereferenceableInPH) @@ -1224,12 +1243,15 @@ SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags); + InsertPts, PIC, *CurAST, *LI, DL, Alignment, + SawUnorderedAtomic, AATags); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); + if (SawUnorderedAtomic) + PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); if (AATags) Index: llvm/trunk/test/Transforms/LICM/atomics.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/atomics.ll +++ llvm/trunk/test/Transforms/LICM/atomics.ll @@ -60,8 +60,7 @@ ; CHECK-NEXT: br label %loop } -; Don't try to "sink" unordered stores yet; it is legal, but the machinery -; isn't there. +; We can sink an unordered store define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { entry: br label %loop @@ -75,6 +74,149 @@ end: ret i32 %vala ; CHECK-LABEL: define i32 @test4( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NOT: store +; CHECK-LABEL: end: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %vala +; CHECK: store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4 +} + +; We currently don't handle ordered atomics. +define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x release, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test5( ; CHECK: load atomic i32, i32* %y monotonic ; CHECK-NEXT: store atomic } + +; We currently don't touch volatiles +define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store volatile i32 %vala, i32* %x, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store volatile +} + +; We currently don't touch volatiles +define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic volatile i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6b( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic volatile +} + +; Mixing unorder atomics and normal loads/stores is +; current unimplemented +define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7( +; CHECK: store i32 5, i32* %x +; CHECK-NEXT: load atomic i32, i32* %y +; CHECK-NEXT: store atomic i32 +} + +; Three provably noalias locations - we can sink normal and unordered, but +; not monotonic +define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %z unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7b( +; CHECK: load atomic i32, i32* %y monotonic + +; CHECK-LABEL: end: +; CHECK: store i32 5, i32* %x +; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4 +} + + +define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + fence release + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test8( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic +; CHECK-NEXT: fence +} + +; Exact semantics of monotonic accesses are a bit vague in the C++ spec, +; for the moment, be conservative and don't touch them. +define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x monotonic, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test9( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic i32 %vala, i32* %x monotonic, align 4 +} Index: llvm/trunk/test/Transforms/LICM/scalar-promote.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/scalar-promote.ll +++ llvm/trunk/test/Transforms/LICM/scalar-promote.ll @@ -378,6 +378,33 @@ ret i32 %ret } +define void @test10(i32 %i) { +Entry: + br label %Loop +; CHECK-LABEL: @test10( +; CHECK: Entry: +; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4 +; CHECK-NEXT: br label %Loop + + +Loop: ; preds = %Loop, %0 + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] ; [#uses=1] + %x = load atomic i32, i32* @X unordered, align 4 + %x2 = add i32 %x, 1 + store atomic i32 %x2, i32* @X unordered, align 4 + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +; CHECK: Out: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 +; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4 +; CHECK-NEXT: ret void + +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}