14
14
15
15
#include " llvm/Pass.h"
16
16
#include " llvm/Analysis/CFG.h"
17
+ #include " llvm/Analysis/TargetTransformInfo.h"
17
18
#include " llvm/ADT/SetOperations.h"
18
19
#include " llvm/ADT/Statistic.h"
19
20
#include " llvm/ADT/DenseSet.h"
@@ -56,6 +57,12 @@ static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
56
57
static cl::opt<bool > PrintBasePointers (" spp-print-base-pointers" , cl::Hidden,
57
58
cl::init (false ));
58
59
60
+ // Cost threshold measuring when it is profitable to rematerialize value instead
61
+ // of relocating it
62
+ static cl::opt<unsigned >
63
+ RematerializationThreshold (" spp-rematerialization-threshold" , cl::Hidden,
64
+ cl::init (6 ));
65
+
59
66
#ifdef XDEBUG
60
67
static bool ClobberNonLive = true ;
61
68
#else
@@ -78,6 +85,7 @@ struct RewriteStatepointsForGC : public FunctionPass {
78
85
// We add and rewrite a bunch of instructions, but don't really do much
79
86
// else. We could in theory preserve a lot more analyses here.
80
87
AU.addRequired <DominatorTreeWrapperPass>();
88
+ AU.addRequired <TargetTransformInfoWrapperPass>();
81
89
}
82
90
};
83
91
} // namespace
@@ -123,6 +131,7 @@ struct GCPtrLivenessData {
123
131
// types, then update all the second type to the first type
124
132
typedef DenseMap<Value *, Value *> DefiningValueMapTy;
125
133
typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
134
+ typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
126
135
127
136
struct PartiallyConstructedSafepointRecord {
128
137
// / The set of values known to be live accross this safepoint
@@ -138,6 +147,11 @@ struct PartiallyConstructedSafepointRecord {
138
147
// / Instruction to which exceptional gc relocates are attached
139
148
// / Makes it easier to iterate through them during relocationViaAlloca.
140
149
Instruction *UnwindToken;
150
+
151
+ // / Record live values we are rematerialized instead of relocating.
152
+ // / They are not included into 'liveset' field.
153
+ // / Maps rematerialized copy to it's original value.
154
+ RematerializedValueMapTy RematerializedValues;
141
155
};
142
156
}
143
157
@@ -1389,6 +1403,31 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1389
1403
}
1390
1404
}
1391
1405
1406
+ // Helper function for the "relocationViaAlloca". Similar to the
1407
+ // "insertRelocationStores" but works for rematerialized values.
1408
+ static void
1409
+ insertRematerializationStores (
1410
+ RematerializedValueMapTy RematerializedValues,
1411
+ DenseMap<Value *, Value *> &AllocaMap,
1412
+ DenseSet<Value *> &VisitedLiveValues) {
1413
+
1414
+ for (auto RematerializedValuePair: RematerializedValues) {
1415
+ Instruction *RematerializedValue = RematerializedValuePair.first ;
1416
+ Value *OriginalValue = RematerializedValuePair.second ;
1417
+
1418
+ assert (AllocaMap.count (OriginalValue) &&
1419
+ " Can not find alloca for rematerialized value" );
1420
+ Value *Alloca = AllocaMap[OriginalValue];
1421
+
1422
+ StoreInst *Store = new StoreInst (RematerializedValue, Alloca);
1423
+ Store->insertAfter (RematerializedValue);
1424
+
1425
+ #ifndef NDEBUG
1426
+ VisitedLiveValues.insert (OriginalValue);
1427
+ #endif
1428
+ }
1429
+ }
1430
+
1392
1431
// / do all the relocation update via allocas and mem2reg
1393
1432
static void relocationViaAlloca (
1394
1433
Function &F, DominatorTree &DT, ArrayRef<Value *> live,
@@ -1406,17 +1445,38 @@ static void relocationViaAlloca(
1406
1445
// TODO-PERF: change data structures, reserve
1407
1446
DenseMap<Value *, Value *> allocaMap;
1408
1447
SmallVector<AllocaInst *, 200 > PromotableAllocas;
1448
+ // Used later to chack that we have enough allocas to store all values
1449
+ std::size_t NumRematerializedValues = 0 ;
1409
1450
PromotableAllocas.reserve (live.size ());
1410
1451
1452
+ // Emit alloca for "LiveValue" and record it in "allocaMap" and
1453
+ // "PromotableAllocas"
1454
+ auto emitAllocaFor = [&](Value *LiveValue) {
1455
+ AllocaInst *Alloca = new AllocaInst (LiveValue->getType (), " " ,
1456
+ F.getEntryBlock ().getFirstNonPHI ());
1457
+ allocaMap[LiveValue] = Alloca;
1458
+ PromotableAllocas.push_back (Alloca);
1459
+ };
1460
+
1411
1461
// emit alloca for each live gc pointer
1412
1462
for (unsigned i = 0 ; i < live.size (); i++) {
1413
- Value *liveValue = live[i];
1414
- AllocaInst *alloca = new AllocaInst (liveValue->getType (), " " ,
1415
- F.getEntryBlock ().getFirstNonPHI ());
1416
- allocaMap[liveValue] = alloca ;
1417
- PromotableAllocas.push_back (alloca );
1463
+ emitAllocaFor (live[i]);
1418
1464
}
1419
1465
1466
+ // emit allocas for rematerialized values
1467
+ for (size_t i = 0 ; i < records.size (); i++) {
1468
+ const struct PartiallyConstructedSafepointRecord &Info = records[i];
1469
+
1470
+ for (auto RematerializedValuePair: Info.RematerializedValues ) {
1471
+ Value *OriginalValue = RematerializedValuePair.second ;
1472
+ if (allocaMap.count (OriginalValue) != 0 )
1473
+ continue ;
1474
+
1475
+ emitAllocaFor (OriginalValue);
1476
+ ++NumRematerializedValues;
1477
+ }
1478
+ }
1479
+
1420
1480
// The next two loops are part of the same conceptual operation. We need to
1421
1481
// insert a store to the alloca after the original def and at each
1422
1482
// redefinition. We need to insert a load before each use. These are split
@@ -1444,6 +1504,10 @@ static void relocationViaAlloca(
1444
1504
visitedLiveValues);
1445
1505
}
1446
1506
1507
+ // Do similar thing with rematerialized values
1508
+ insertRematerializationStores (info.RematerializedValues , allocaMap,
1509
+ visitedLiveValues);
1510
+
1447
1511
if (ClobberNonLive) {
1448
1512
// As a debuging aid, pretend that an unrelocated pointer becomes null at
1449
1513
// the gc.statepoint. This will turn some subtle GC problems into
@@ -1548,7 +1612,7 @@ static void relocationViaAlloca(
1548
1612
}
1549
1613
}
1550
1614
1551
- assert (PromotableAllocas.size () == live.size () &&
1615
+ assert (PromotableAllocas.size () == live.size () + NumRematerializedValues &&
1552
1616
" we must have the same allocas with lives" );
1553
1617
if (!PromotableAllocas.empty ()) {
1554
1618
// apply mem2reg to promote alloca to SSA
@@ -1732,6 +1796,201 @@ static void splitVectorValues(Instruction *StatepointInst,
1732
1796
PromoteMemToReg (Allocas, DT);
1733
1797
}
1734
1798
1799
+ // Helper function for the "rematerializeLiveValues". It walks use chain
1800
+ // starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
1801
+ // values are visited (currently it is GEP's and casts). Returns true if it
1802
+ // sucessfully reached "BaseValue" and false otherwise.
1803
+ // Fills "ChainToBase" array with all visited values. "BaseValue" is not
1804
+ // recorded.
1805
+ static bool findRematerializableChainToBasePointer (
1806
+ SmallVectorImpl<Instruction*> &ChainToBase,
1807
+ Value *CurrentValue, Value *BaseValue) {
1808
+
1809
+ // We have found a base value
1810
+ if (CurrentValue == BaseValue) {
1811
+ return true ;
1812
+ }
1813
+
1814
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
1815
+ ChainToBase.push_back (GEP);
1816
+ return findRematerializableChainToBasePointer (ChainToBase,
1817
+ GEP->getPointerOperand (),
1818
+ BaseValue);
1819
+ }
1820
+
1821
+ if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
1822
+ Value *Def = CI->stripPointerCasts ();
1823
+
1824
+ // This two checks are basically similar. First one is here for the
1825
+ // consistency with findBasePointers logic.
1826
+ assert (!isa<CastInst>(Def) && " not a pointer cast found" );
1827
+ if (!CI->isNoopCast (CI->getModule ()->getDataLayout ()))
1828
+ return false ;
1829
+
1830
+ ChainToBase.push_back (CI);
1831
+ return findRematerializableChainToBasePointer (ChainToBase, Def, BaseValue);
1832
+ }
1833
+
1834
+ // Not supported instruction in the chain
1835
+ return false ;
1836
+ }
1837
+
1838
+ // Helper function for the "rematerializeLiveValues". Compute cost of the use
1839
+ // chain we are going to rematerialize.
1840
+ static unsigned
1841
+ chainToBasePointerCost (SmallVectorImpl<Instruction*> &Chain,
1842
+ TargetTransformInfo &TTI) {
1843
+ unsigned Cost = 0 ;
1844
+
1845
+ for (Instruction *Instr : Chain) {
1846
+ if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
1847
+ assert (CI->isNoopCast (CI->getModule ()->getDataLayout ()) &&
1848
+ " non noop cast is found during rematerialization" );
1849
+
1850
+ Type *SrcTy = CI->getOperand (0 )->getType ();
1851
+ Cost += TTI.getCastInstrCost (CI->getOpcode (), CI->getType (), SrcTy);
1852
+
1853
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
1854
+ // Cost of the address calculation
1855
+ Type *ValTy = GEP->getPointerOperandType ()->getPointerElementType ();
1856
+ Cost += TTI.getAddressComputationCost (ValTy);
1857
+
1858
+ // And cost of the GEP itself
1859
+ // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
1860
+ // allowed for the external usage)
1861
+ if (!GEP->hasAllConstantIndices ())
1862
+ Cost += 2 ;
1863
+
1864
+ } else {
1865
+ llvm_unreachable (" unsupported instruciton type during rematerialization" );
1866
+ }
1867
+ }
1868
+
1869
+ return Cost;
1870
+ }
1871
+
1872
+ // From the statepoint liveset pick values that are cheaper to recompute then to
1873
+ // relocate. Remove this values from the liveset, rematerialize them after
1874
+ // statepoint and record them in "Info" structure. Note that similar to
1875
+ // relocated values we don't do any user adjustments here.
1876
+ static void rematerializeLiveValues (CallSite CS,
1877
+ PartiallyConstructedSafepointRecord &Info,
1878
+ TargetTransformInfo &TTI) {
1879
+ const int ChainLengthThreshold = 10 ;
1880
+
1881
+ // Record values we are going to delete from this statepoint live set.
1882
+ // We can not di this in following loop due to iterator invalidation.
1883
+ SmallVector<Value *, 32 > LiveValuesToBeDeleted;
1884
+
1885
+ for (Value *LiveValue: Info.liveset ) {
1886
+ // For each live pointer find it's defining chain
1887
+ SmallVector<Instruction *, 3 > ChainToBase;
1888
+ assert (Info.PointerToBase .find (LiveValue) != Info.PointerToBase .end ());
1889
+ bool FoundChain =
1890
+ findRematerializableChainToBasePointer (ChainToBase,
1891
+ LiveValue,
1892
+ Info.PointerToBase [LiveValue]);
1893
+ // Nothing to do, or chain is too long
1894
+ if (!FoundChain ||
1895
+ ChainToBase.size () == 0 ||
1896
+ ChainToBase.size () > ChainLengthThreshold)
1897
+ continue ;
1898
+
1899
+ // Compute cost of this chain
1900
+ unsigned Cost = chainToBasePointerCost (ChainToBase, TTI);
1901
+ // TODO: We can also account for cases when we will be able to remove some
1902
+ // of the rematerialized values by later optimization passes. I.e if
1903
+ // we rematerialized several intersecting chains. Or if original values
1904
+ // don't have any uses besides this statepoint.
1905
+
1906
+ // For invokes we need to rematerialize each chain twice - for normal and
1907
+ // for unwind basic blocks. Model this by multiplying cost by two.
1908
+ if (CS.isInvoke ()) {
1909
+ Cost *= 2 ;
1910
+ }
1911
+ // If it's too expensive - skip it
1912
+ if (Cost >= RematerializationThreshold)
1913
+ continue ;
1914
+
1915
+ // Remove value from the live set
1916
+ LiveValuesToBeDeleted.push_back (LiveValue);
1917
+
1918
+ // Clone instructions and record them inside "Info" structure
1919
+
1920
+ // Walk backwards to visit top-most instructions first
1921
+ std::reverse (ChainToBase.begin (), ChainToBase.end ());
1922
+
1923
+ // Utility function which clones all instructions from "ChainToBase"
1924
+ // and inserts them before "InsertBefore". Returns rematerialized value
1925
+ // which should be used after statepoint.
1926
+ auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
1927
+ Instruction *LastClonedValue = nullptr ;
1928
+ Instruction *LastValue = nullptr ;
1929
+ for (Instruction *Instr: ChainToBase) {
1930
+ // Only GEP's and casts are suported as we need to be careful to not
1931
+ // introduce any new uses of pointers not in the liveset.
1932
+ // Note that it's fine to introduce new uses of pointers which were
1933
+ // otherwise not used after this statepoint.
1934
+ assert (isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
1935
+
1936
+ Instruction *ClonedValue = Instr->clone ();
1937
+ ClonedValue->insertBefore (InsertBefore);
1938
+ ClonedValue->setName (Instr->getName () + " .remat" );
1939
+
1940
+ // If it is not first instruction in the chain then it uses previously
1941
+ // cloned value. We should update it to use cloned value.
1942
+ if (LastClonedValue) {
1943
+ assert (LastValue);
1944
+ ClonedValue->replaceUsesOfWith (LastValue, LastClonedValue);
1945
+ #ifndef NDEBUG
1946
+ // Assert that cloned instruction does not use any instructions
1947
+ // other than LastClonedValue
1948
+ for (auto OpValue: ClonedValue->operand_values ()) {
1949
+ if (isa<Instruction>(OpValue))
1950
+ assert (OpValue == LastClonedValue &&
1951
+ " unexpected use found in rematerialized value" );
1952
+ }
1953
+ #endif
1954
+ }
1955
+
1956
+ LastClonedValue = ClonedValue;
1957
+ LastValue = Instr;
1958
+ }
1959
+ assert (LastClonedValue);
1960
+ return LastClonedValue;
1961
+ };
1962
+
1963
+ // Different cases for calls and invokes. For invokes we need to clone
1964
+ // instructions both on normal and unwind path.
1965
+ if (CS.isCall ()) {
1966
+ Instruction *InsertBefore = CS.getInstruction ()->getNextNode ();
1967
+ assert (InsertBefore);
1968
+ Instruction *RematerializedValue = rematerializeChain (InsertBefore);
1969
+ Info.RematerializedValues [RematerializedValue] = LiveValue;
1970
+ } else {
1971
+ InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction ());
1972
+
1973
+ Instruction *NormalInsertBefore =
1974
+ Invoke->getNormalDest ()->getFirstInsertionPt ();
1975
+ Instruction *UnwindInsertBefore =
1976
+ Invoke->getUnwindDest ()->getFirstInsertionPt ();
1977
+
1978
+ Instruction *NormalRematerializedValue =
1979
+ rematerializeChain (NormalInsertBefore);
1980
+ Instruction *UnwindRematerializedValue =
1981
+ rematerializeChain (UnwindInsertBefore);
1982
+
1983
+ Info.RematerializedValues [NormalRematerializedValue] = LiveValue;
1984
+ Info.RematerializedValues [UnwindRematerializedValue] = LiveValue;
1985
+ }
1986
+ }
1987
+
1988
+ // Remove rematerializaed values from the live set
1989
+ for (auto LiveValue: LiveValuesToBeDeleted) {
1990
+ Info.liveset .erase (LiveValue);
1991
+ }
1992
+ }
1993
+
1735
1994
static bool insertParsePoints (Function &F, DominatorTree &DT, Pass *P,
1736
1995
SmallVectorImpl<CallSite> &toUpdate) {
1737
1996
#ifndef NDEBUG
@@ -1867,6 +2126,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
1867
2126
}
1868
2127
holders.clear ();
1869
2128
2129
+ // In order to reduce live set of statepoint we might choose to rematerialize
2130
+ // some values instead of relocating them. This is purelly an optimization and
2131
+ // does not influence correctness.
2132
+ TargetTransformInfo &TTI =
2133
+ P->getAnalysis <TargetTransformInfoWrapperPass>().getTTI (F);
2134
+
2135
+ for (size_t i = 0 ; i < records.size (); i++) {
2136
+ struct PartiallyConstructedSafepointRecord &info = records[i];
2137
+ CallSite &CS = toUpdate[i];
2138
+
2139
+ rematerializeLiveValues (CS, info, TTI);
2140
+ }
2141
+
1870
2142
// Now run through and replace the existing statepoints with new ones with
1871
2143
// the live variables listed. We do not yet update uses of the values being
1872
2144
// relocated. We have references to live variables that need to
0 commit comments