@@ -281,21 +281,31 @@ class EarlyCSE {
281
281
// / that dominated values can succeed in their lookup.
282
282
ScopedHTType AvailableValues;
283
283
284
- // / \brief A scoped hash table of the current values of loads.
284
+ // / A scoped hash table of the current values of previously encounted memory
285
+ // / locations.
285
286
// /
286
- // / This allows us to get efficient access to dominating loads when we have
287
- // / a fully redundant load. In addition to the most recent load, we keep
288
- // / track of a generation count of the read, which is compared against the
289
- // / current generation count. The current generation count is incremented
287
+ // / This allows us to get efficient access to dominating loads or stores when
288
+ // / we have a fully redundant load. In addition to the most recent load, we
289
+ // / keep track of a generation count of the read, which is compared against
290
+ // / the current generation count. The current generation count is incremented
290
291
// / after every possibly writing memory operation, which ensures that we only
291
- // / CSE loads with other loads that have no intervening store.
292
+ // / CSE loads with other loads that have no intervening store. Ordering
293
+ // / events (such as fences or atomic instructions) increment the generation
294
+ // / count as well; essentially, we model these as writes to all possible
295
+ // / locations. Note that atomic and/or volatile loads and stores can be
296
+ // / present the table; it is the responsibility of the consumer to inspect
297
+ // / the atomicity/volatility if needed.
292
298
struct LoadValue {
293
299
Value *Data;
294
300
unsigned Generation;
295
301
int MatchingId;
296
- LoadValue () : Data(nullptr ), Generation(0 ), MatchingId(-1 ) {}
297
- LoadValue (Value *Data, unsigned Generation, unsigned MatchingId)
298
- : Data(Data), Generation(Generation), MatchingId(MatchingId) {}
302
+ bool IsAtomic;
303
+ LoadValue ()
304
+ : Data(nullptr ), Generation(0 ), MatchingId(-1 ), IsAtomic(false ) {}
305
+ LoadValue (Value *Data, unsigned Generation, unsigned MatchingId,
306
+ bool IsAtomic)
307
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
308
+ IsAtomic (IsAtomic) {}
299
309
};
300
310
typedef RecyclingAllocator<BumpPtrAllocator,
301
311
ScopedHashTableVal<Value *, LoadValue>>
@@ -410,6 +420,42 @@ class EarlyCSE {
410
420
}
411
421
return Inst->isAtomic ();
412
422
}
423
+ bool isAtomic () const {
424
+ if (IsTargetMemInst) {
425
+ assert (Info.IsSimple && " need to refine IsSimple in TTI" );
426
+ return false ;
427
+ }
428
+ return Inst->isAtomic ();
429
+ }
430
+ bool isUnordered () const {
431
+ if (IsTargetMemInst) {
432
+ assert (Info.IsSimple && " need to refine IsSimple in TTI" );
433
+ return true ;
434
+ }
435
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
436
+ return LI->isUnordered ();
437
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
438
+ return SI->isUnordered ();
439
+ }
440
+ // Conservative answer
441
+ return !Inst->isAtomic ();
442
+ }
443
+
444
+ bool isVolatile () const {
445
+ if (IsTargetMemInst) {
446
+ assert (Info.IsSimple && " need to refine IsSimple in TTI" );
447
+ return false ;
448
+ }
449
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
450
+ return LI->isVolatile ();
451
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
452
+ return SI->isVolatile ();
453
+ }
454
+ // Conservative answer
455
+ return true ;
456
+ }
457
+
458
+
413
459
bool isMatchingMemLoc (const ParseMemoryInst &Inst) const {
414
460
return (getPointerOperand () == Inst.getPointerOperand () &&
415
461
getMatchingId () == Inst.getMatchingId ());
@@ -561,20 +607,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
561
607
ParseMemoryInst MemInst (Inst, TTI);
562
608
// If this is a non-volatile load, process it.
563
609
if (MemInst.isValid () && MemInst.isLoad ()) {
564
- // Ignore volatile or ordered loads.
565
- if (!MemInst.isSimple ()) {
610
+ // (conservatively) we can't peak past the ordering implied by this
611
+ // operation, but we can add this load to our set of available values
612
+ if (MemInst.isVolatile () || !MemInst.isUnordered ()) {
566
613
LastStore = nullptr ;
567
- // Don't CSE across synchronization boundaries.
568
- if (Inst->mayWriteToMemory ())
569
- ++CurrentGeneration;
570
- continue ;
614
+ ++CurrentGeneration;
571
615
}
572
616
573
617
// If we have an available version of this load, and if it is the right
574
618
// generation, replace this instruction.
575
619
LoadValue InVal = AvailableLoads.lookup (MemInst.getPointerOperand ());
576
620
if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
577
- InVal.MatchingId == MemInst.getMatchingId ()) {
621
+ InVal.MatchingId == MemInst.getMatchingId () &&
622
+ // We don't yet handle removing loads with ordering of any kind.
623
+ !MemInst.isVolatile () && MemInst.isUnordered () &&
624
+ // We can't replace an atomic load with one which isn't also atomic.
625
+ InVal.IsAtomic >= MemInst.isAtomic ()) {
578
626
Value *Op = getOrCreateResult (InVal.Data , Inst->getType ());
579
627
if (Op != nullptr ) {
580
628
DEBUG (dbgs () << " EarlyCSE CSE LOAD: " << *Inst
@@ -591,7 +639,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
591
639
// Otherwise, remember that we have this instruction.
592
640
AvailableLoads.insert (
593
641
MemInst.getPointerOperand (),
594
- LoadValue (Inst, CurrentGeneration, MemInst.getMatchingId ()));
642
+ LoadValue (Inst, CurrentGeneration, MemInst.getMatchingId (),
643
+ MemInst.isAtomic ()));
595
644
LastStore = nullptr ;
596
645
continue ;
597
646
}
@@ -646,9 +695,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
646
695
647
696
if (MemInst.isValid () && MemInst.isStore ()) {
648
697
// We do a trivial form of DSE if there are two stores to the same
649
- // location with no intervening loads. Delete the earlier store.
698
+ // location with no intervening loads. Delete the earlier store. Note
699
+ // that we can delete an earlier simple store even if the following one
700
+ // is ordered/volatile/atomic store.
650
701
if (LastStore) {
651
702
ParseMemoryInst LastStoreMemInst (LastStore, TTI);
703
+ assert (LastStoreMemInst.isSimple () && " Violated invariant" );
652
704
if (LastStoreMemInst.isMatchingMemLoc (MemInst)) {
653
705
DEBUG (dbgs () << " EarlyCSE DEAD STORE: " << *LastStore
654
706
<< " due to: " << *Inst << ' \n ' );
@@ -667,11 +719,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
667
719
// the store.
668
720
AvailableLoads.insert (
669
721
MemInst.getPointerOperand (),
670
- LoadValue (Inst, CurrentGeneration, MemInst.getMatchingId ()));
722
+ LoadValue (Inst, CurrentGeneration, MemInst.getMatchingId (),
723
+ MemInst.isAtomic ()));
671
724
672
725
// Remember that this was the last normal store we saw for DSE.
726
+ // Note that we can't delete an earlier atomic or volatile store in
727
+ // favor of a later one which isn't. We could in principal remove an
728
+ // earlier unordered store if the later one is also unordered.
673
729
if (MemInst.isSimple ())
674
730
LastStore = Inst;
731
+ else
732
+ LastStore = nullptr ;
675
733
}
676
734
}
677
735
}
0 commit comments