Skip to content

Commit 5ac9179

Browse files
committedMar 10, 2017
Move memory coercion functions from GVN.cpp to VNCoercion.cpp so they can be shared between GVN and NewGVN.
Summary: These are the functions used to determine when values of loads can be extracted from stores, etc, and to perform the necessary insertions to do this. There are no changes to the functions themselves except reformatting, and one case where memdep was informed of a removed load (which was pushed into the caller). Reviewers: davide Subscribers: mgorny, llvm-commits, Prazek Differential Revision: https://reviews.llvm.org/D30478 llvm-svn: 297438
1 parent 22645ee commit 5ac9179

File tree

4 files changed

+556
-447
lines changed

4 files changed

+556
-447
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//===- VNCoercion.h - Value Numbering Coercion Utilities --------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
/// \file / This file provides routines used by LLVM's value numbering passes to
10+
/// perform various forms of value extraction from memory when the types are not
11+
/// identical. For example, given
12+
///
13+
/// store i32 8, i32 *%foo
14+
/// %a = bitcast i32 *%foo to i16
15+
/// %val = load i16, i16 *%a
16+
///
17+
/// It possible to extract the value of the load of %a from the store to %foo.
18+
/// These routines know how to tell whether they can do that (the analyze*
19+
/// routines), and can also insert the necessary IR to do it (the get*
20+
/// routines).
21+
22+
#ifndef LLVM_TRANSFORMS_UTILS_VNCOERCION_H
23+
#define LLVM_TRANSFORMS_UTILS_VNCOERCION_H
24+
#include "llvm/IR/IRBuilder.h"
25+
26+
namespace llvm {
27+
class Function;
28+
class StoreInst;
29+
class LoadInst;
30+
class MemIntrinsic;
31+
class Instruction;
32+
class Value;
33+
class Type;
34+
class DataLayout;
35+
namespace VNCoercion {
36+
/// Return true if CoerceAvailableValueToLoadType would succeed if it was
37+
/// called.
38+
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
39+
const DataLayout &DL);
40+
41+
/// If we saw a store of a value to memory, and then a load from a must-aliased
42+
/// pointer of a different type, try to coerce the stored value to the loaded
43+
/// type. LoadedTy is the type of the load we want to replace. IRB is
44+
/// IRBuilder used to insert new instructions.
45+
///
46+
/// If we can't do it, return null.
47+
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
48+
IRBuilder<> &IRB, const DataLayout &DL);
49+
50+
/// This function determines whether a value for the pointer LoadPtr can be
51+
/// extracted from the store at DepSI.
52+
///
53+
/// On success, it returns the offset into DepSI that extraction would start.
54+
/// On failure, it returns -1.
55+
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
56+
StoreInst *DepSI);
57+
58+
/// This function determines whether a value for the pointer LoadPtr can be
59+
/// extracted from the load at DepLI.
60+
///
61+
/// On success, it returns the offset into DepLI that extraction would start.
62+
/// On failure, it returns -1.
63+
int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
64+
const DataLayout &DL);
65+
66+
/// This function determines whether a value for the pointer LoadPtr can be
67+
/// extracted from the memory intrinsic at DepMI.
68+
///
69+
/// On success, it returns the offset into DepMI that extraction would start.
70+
/// On failure, it returns -1.
71+
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
72+
MemIntrinsic *DepMI, const DataLayout &DL);
73+
74+
/// If analyzeLoadFromClobberingStore returned an offset, this function can be
75+
/// used to actually perform the extraction of the bits from the store. It
76+
/// inserts instructions to do so at InsertPt, and returns the extracted value.
77+
Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
78+
Instruction *InsertPt, const DataLayout &DL);
79+
80+
/// If analyzeLoadFromClobberingLoad returned an offset, this function can be
81+
/// used to actually perform the extraction of the bits from the load, including
82+
/// any necessary load widening. It inserts instructions to do so at InsertPt,
83+
/// and returns the extracted value.
84+
Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
85+
Instruction *InsertPt);
86+
87+
/// If analyzeLoadFromClobberingMemInst returned an offset, this function can be
88+
/// used to actually perform the extraction of the bits from the memory
89+
/// intrinsic. It inserts instructions to do so at InsertPt, and returns the
90+
/// extracted value.
91+
Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
92+
Type *LoadTy, Instruction *InsertPt,
93+
const DataLayout &DL);
94+
}
95+
}
96+
#endif

‎llvm/lib/Transforms/Scalar/GVN.cpp

+19-447
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
3737
#include "llvm/Analysis/PHITransAddr.h"
3838
#include "llvm/Analysis/TargetLibraryInfo.h"
39-
#include "llvm/Analysis/ValueTracking.h"
4039
#include "llvm/IR/DataLayout.h"
4140
#include "llvm/IR/Dominators.h"
4241
#include "llvm/IR/GlobalVariable.h"
@@ -51,9 +50,12 @@
5150
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
5251
#include "llvm/Transforms/Utils/Local.h"
5352
#include "llvm/Transforms/Utils/SSAUpdater.h"
53+
#include "llvm/Transforms/Utils/VNCoercion.h"
54+
5455
#include <vector>
5556
using namespace llvm;
5657
using namespace llvm::gvn;
58+
using namespace llvm::VNCoercion;
5759
using namespace PatternMatch;
5860

5961
#define DEBUG_TYPE "gvn"
@@ -692,442 +694,6 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
692694
}
693695

694696

695-
/// Return true if CoerceAvailableValueToLoadType will succeed.
696-
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
697-
Type *LoadTy,
698-
const DataLayout &DL) {
699-
// If the loaded or stored value is an first class array or struct, don't try
700-
// to transform them. We need to be able to bitcast to integer.
701-
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
702-
StoredVal->getType()->isStructTy() ||
703-
StoredVal->getType()->isArrayTy())
704-
return false;
705-
706-
// The store has to be at least as big as the load.
707-
if (DL.getTypeSizeInBits(StoredVal->getType()) <
708-
DL.getTypeSizeInBits(LoadTy))
709-
return false;
710-
711-
return true;
712-
}
713-
714-
/// If we saw a store of a value to memory, and
715-
/// then a load from a must-aliased pointer of a different type, try to coerce
716-
/// the stored value. LoadedTy is the type of the load we want to replace.
717-
/// IRB is IRBuilder used to insert new instructions.
718-
///
719-
/// If we can't do it, return null.
720-
static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
721-
IRBuilder<> &IRB,
722-
const DataLayout &DL) {
723-
assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
724-
"precondition violation - materialization can't fail");
725-
726-
if (auto *C = dyn_cast<Constant>(StoredVal))
727-
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
728-
StoredVal = FoldedStoredVal;
729-
730-
// If this is already the right type, just return it.
731-
Type *StoredValTy = StoredVal->getType();
732-
733-
uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
734-
uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
735-
736-
// If the store and reload are the same size, we can always reuse it.
737-
if (StoredValSize == LoadedValSize) {
738-
// Pointer to Pointer -> use bitcast.
739-
if (StoredValTy->getScalarType()->isPointerTy() &&
740-
LoadedTy->getScalarType()->isPointerTy()) {
741-
StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy);
742-
} else {
743-
// Convert source pointers to integers, which can be bitcast.
744-
if (StoredValTy->getScalarType()->isPointerTy()) {
745-
StoredValTy = DL.getIntPtrType(StoredValTy);
746-
StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
747-
}
748-
749-
Type *TypeToCastTo = LoadedTy;
750-
if (TypeToCastTo->getScalarType()->isPointerTy())
751-
TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
752-
753-
if (StoredValTy != TypeToCastTo)
754-
StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
755-
756-
// Cast to pointer if the load needs a pointer type.
757-
if (LoadedTy->getScalarType()->isPointerTy())
758-
StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
759-
}
760-
761-
if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
762-
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
763-
StoredVal = FoldedStoredVal;
764-
765-
return StoredVal;
766-
}
767-
768-
// If the loaded value is smaller than the available value, then we can
769-
// extract out a piece from it. If the available value is too small, then we
770-
// can't do anything.
771-
assert(StoredValSize >= LoadedValSize &&
772-
"CanCoerceMustAliasedValueToLoad fail");
773-
774-
// Convert source pointers to integers, which can be manipulated.
775-
if (StoredValTy->getScalarType()->isPointerTy()) {
776-
StoredValTy = DL.getIntPtrType(StoredValTy);
777-
StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
778-
}
779-
780-
// Convert vectors and fp to integer, which can be manipulated.
781-
if (!StoredValTy->isIntegerTy()) {
782-
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
783-
StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
784-
}
785-
786-
// If this is a big-endian system, we need to shift the value down to the low
787-
// bits so that a truncate will work.
788-
if (DL.isBigEndian()) {
789-
uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
790-
DL.getTypeStoreSizeInBits(LoadedTy);
791-
StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
792-
}
793-
794-
// Truncate the integer to the right size now.
795-
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
796-
StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
797-
798-
if (LoadedTy != NewIntTy) {
799-
// If the result is a pointer, inttoptr.
800-
if (LoadedTy->getScalarType()->isPointerTy())
801-
StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
802-
else
803-
// Otherwise, bitcast.
804-
StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
805-
}
806-
807-
if (auto *C = dyn_cast<Constant>(StoredVal))
808-
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
809-
StoredVal = FoldedStoredVal;
810-
811-
return StoredVal;
812-
}
813-
814-
/// This function is called when we have a
815-
/// memdep query of a load that ends up being a clobbering memory write (store,
816-
/// memset, memcpy, memmove). This means that the write *may* provide bits used
817-
/// by the load but we can't be sure because the pointers don't mustalias.
818-
///
819-
/// Check this case to see if there is anything more we can do before we give
820-
/// up. This returns -1 if we have to give up, or a byte number in the stored
821-
/// value of the piece that feeds the load.
822-
static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
823-
Value *WritePtr,
824-
uint64_t WriteSizeInBits,
825-
const DataLayout &DL) {
826-
// If the loaded or stored value is a first class array or struct, don't try
827-
// to transform them. We need to be able to bitcast to integer.
828-
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
829-
return -1;
830-
831-
int64_t StoreOffset = 0, LoadOffset = 0;
832-
Value *StoreBase =
833-
GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
834-
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
835-
if (StoreBase != LoadBase)
836-
return -1;
837-
838-
// If the load and store are to the exact same address, they should have been
839-
// a must alias. AA must have gotten confused.
840-
// FIXME: Study to see if/when this happens. One case is forwarding a memset
841-
// to a load from the base of the memset.
842-
843-
// If the load and store don't overlap at all, the store doesn't provide
844-
// anything to the load. In this case, they really don't alias at all, AA
845-
// must have gotten confused.
846-
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
847-
848-
if ((WriteSizeInBits & 7) | (LoadSize & 7))
849-
return -1;
850-
uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
851-
LoadSize /= 8;
852-
853-
854-
bool isAAFailure = false;
855-
if (StoreOffset < LoadOffset)
856-
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
857-
else
858-
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
859-
860-
if (isAAFailure)
861-
return -1;
862-
863-
// If the Load isn't completely contained within the stored bits, we don't
864-
// have all the bits to feed it. We could do something crazy in the future
865-
// (issue a smaller load then merge the bits in) but this seems unlikely to be
866-
// valuable.
867-
if (StoreOffset > LoadOffset ||
868-
StoreOffset+StoreSize < LoadOffset+LoadSize)
869-
return -1;
870-
871-
// Okay, we can do this transformation. Return the number of bytes into the
872-
// store that the load is.
873-
return LoadOffset-StoreOffset;
874-
}
875-
876-
/// This function is called when we have a
877-
/// memdep query of a load that ends up being a clobbering store.
878-
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
879-
StoreInst *DepSI) {
880-
// Cannot handle reading from store of first-class aggregate yet.
881-
if (DepSI->getValueOperand()->getType()->isStructTy() ||
882-
DepSI->getValueOperand()->getType()->isArrayTy())
883-
return -1;
884-
885-
const DataLayout &DL = DepSI->getModule()->getDataLayout();
886-
Value *StorePtr = DepSI->getPointerOperand();
887-
uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
888-
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
889-
StorePtr, StoreSize, DL);
890-
}
891-
892-
/// This function is called when we have a
893-
/// memdep query of a load that ends up being clobbered by another load. See if
894-
/// the other load can feed into the second load.
895-
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
896-
LoadInst *DepLI, const DataLayout &DL){
897-
// Cannot handle reading from store of first-class aggregate yet.
898-
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
899-
return -1;
900-
901-
Value *DepPtr = DepLI->getPointerOperand();
902-
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
903-
int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
904-
if (R != -1) return R;
905-
906-
// If we have a load/load clobber an DepLI can be widened to cover this load,
907-
// then we should widen it!
908-
int64_t LoadOffs = 0;
909-
const Value *LoadBase =
910-
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
911-
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
912-
913-
unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
914-
LoadBase, LoadOffs, LoadSize, DepLI);
915-
if (Size == 0) return -1;
916-
917-
// Check non-obvious conditions enforced by MDA which we rely on for being
918-
// able to materialize this potentially available value
919-
assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
920-
assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
921-
922-
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
923-
}
924-
925-
926-
927-
static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
928-
MemIntrinsic *MI,
929-
const DataLayout &DL) {
930-
// If the mem operation is a non-constant size, we can't handle it.
931-
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
932-
if (!SizeCst) return -1;
933-
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
934-
935-
// If this is memset, we just need to see if the offset is valid in the size
936-
// of the memset..
937-
if (MI->getIntrinsicID() == Intrinsic::memset)
938-
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
939-
MemSizeInBits, DL);
940-
941-
// If we have a memcpy/memmove, the only case we can handle is if this is a
942-
// copy from constant memory. In that case, we can read directly from the
943-
// constant memory.
944-
MemTransferInst *MTI = cast<MemTransferInst>(MI);
945-
946-
Constant *Src = dyn_cast<Constant>(MTI->getSource());
947-
if (!Src) return -1;
948-
949-
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
950-
if (!GV || !GV->isConstant()) return -1;
951-
952-
// See if the access is within the bounds of the transfer.
953-
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
954-
MI->getDest(), MemSizeInBits, DL);
955-
if (Offset == -1)
956-
return Offset;
957-
958-
unsigned AS = Src->getType()->getPointerAddressSpace();
959-
// Otherwise, see if we can constant fold a load from the constant with the
960-
// offset applied as appropriate.
961-
Src = ConstantExpr::getBitCast(Src,
962-
Type::getInt8PtrTy(Src->getContext(), AS));
963-
Constant *OffsetCst =
964-
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
965-
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
966-
OffsetCst);
967-
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
968-
if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
969-
return Offset;
970-
return -1;
971-
}
972-
973-
974-
/// This function is called when we have a
975-
/// memdep query of a load that ends up being a clobbering store. This means
976-
/// that the store provides bits used by the load but we the pointers don't
977-
/// mustalias. Check this case to see if there is anything more we can do
978-
/// before we give up.
979-
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
980-
Type *LoadTy,
981-
Instruction *InsertPt, const DataLayout &DL){
982-
LLVMContext &Ctx = SrcVal->getType()->getContext();
983-
984-
uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
985-
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
986-
987-
IRBuilder<> Builder(InsertPt);
988-
989-
// Compute which bits of the stored value are being used by the load. Convert
990-
// to an integer type to start with.
991-
if (SrcVal->getType()->getScalarType()->isPointerTy())
992-
SrcVal = Builder.CreatePtrToInt(SrcVal,
993-
DL.getIntPtrType(SrcVal->getType()));
994-
if (!SrcVal->getType()->isIntegerTy())
995-
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
996-
997-
// Shift the bits to the least significant depending on endianness.
998-
unsigned ShiftAmt;
999-
if (DL.isLittleEndian())
1000-
ShiftAmt = Offset*8;
1001-
else
1002-
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1003-
1004-
if (ShiftAmt)
1005-
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
1006-
1007-
if (LoadSize != StoreSize)
1008-
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
1009-
1010-
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
1011-
}
1012-
1013-
/// This function is called when we have a
1014-
/// memdep query of a load that ends up being a clobbering load. This means
1015-
/// that the load *may* provide bits used by the load but we can't be sure
1016-
/// because the pointers don't mustalias. Check this case to see if there is
1017-
/// anything more we can do before we give up.
1018-
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
1019-
Type *LoadTy, Instruction *InsertPt,
1020-
GVN &gvn) {
1021-
const DataLayout &DL = SrcVal->getModule()->getDataLayout();
1022-
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
1023-
// widen SrcVal out to a larger load.
1024-
unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
1025-
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
1026-
if (Offset+LoadSize > SrcValStoreSize) {
1027-
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
1028-
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
1029-
// If we have a load/load clobber an DepLI can be widened to cover this
1030-
// load, then we should widen it to the next power of 2 size big enough!
1031-
unsigned NewLoadSize = Offset+LoadSize;
1032-
if (!isPowerOf2_32(NewLoadSize))
1033-
NewLoadSize = NextPowerOf2(NewLoadSize);
1034-
1035-
Value *PtrVal = SrcVal->getPointerOperand();
1036-
1037-
// Insert the new load after the old load. This ensures that subsequent
1038-
// memdep queries will find the new load. We can't easily remove the old
1039-
// load completely because it is already in the value numbering table.
1040-
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
1041-
Type *DestPTy =
1042-
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
1043-
DestPTy = PointerType::get(DestPTy,
1044-
PtrVal->getType()->getPointerAddressSpace());
1045-
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
1046-
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
1047-
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
1048-
NewLoad->takeName(SrcVal);
1049-
NewLoad->setAlignment(SrcVal->getAlignment());
1050-
1051-
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
1052-
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
1053-
1054-
// Replace uses of the original load with the wider load. On a big endian
1055-
// system, we need to shift down to get the relevant bits.
1056-
Value *RV = NewLoad;
1057-
if (DL.isBigEndian())
1058-
RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
1059-
RV = Builder.CreateTrunc(RV, SrcVal->getType());
1060-
SrcVal->replaceAllUsesWith(RV);
1061-
1062-
// We would like to use gvn.markInstructionForDeletion here, but we can't
1063-
// because the load is already memoized into the leader map table that GVN
1064-
// tracks. It is potentially possible to remove the load from the table,
1065-
// but then there all of the operations based on it would need to be
1066-
// rehashed. Just leave the dead load around.
1067-
gvn.getMemDep().removeInstruction(SrcVal);
1068-
SrcVal = NewLoad;
1069-
}
1070-
1071-
return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
1072-
}
1073-
1074-
1075-
/// This function is called when we have a
1076-
/// memdep query of a load that ends up being a clobbering mem intrinsic.
1077-
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1078-
Type *LoadTy, Instruction *InsertPt,
1079-
const DataLayout &DL){
1080-
LLVMContext &Ctx = LoadTy->getContext();
1081-
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
1082-
1083-
IRBuilder<> Builder(InsertPt);
1084-
1085-
// We know that this method is only called when the mem transfer fully
1086-
// provides the bits for the load.
1087-
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1088-
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1089-
// independently of what the offset is.
1090-
Value *Val = MSI->getValue();
1091-
if (LoadSize != 1)
1092-
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1093-
1094-
Value *OneElt = Val;
1095-
1096-
// Splat the value out to the right number of bits.
1097-
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1098-
// If we can double the number of bytes set, do it.
1099-
if (NumBytesSet*2 <= LoadSize) {
1100-
Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1101-
Val = Builder.CreateOr(Val, ShVal);
1102-
NumBytesSet <<= 1;
1103-
continue;
1104-
}
1105-
1106-
// Otherwise insert one byte at a time.
1107-
Value *ShVal = Builder.CreateShl(Val, 1*8);
1108-
Val = Builder.CreateOr(OneElt, ShVal);
1109-
++NumBytesSet;
1110-
}
1111-
1112-
return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
1113-
}
1114-
1115-
// Otherwise, this is a memcpy/memmove from a constant global.
1116-
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1117-
Constant *Src = cast<Constant>(MTI->getSource());
1118-
unsigned AS = Src->getType()->getPointerAddressSpace();
1119-
1120-
// Otherwise, see if we can constant fold a load from the constant with the
1121-
// offset applied as appropriate.
1122-
Src = ConstantExpr::getBitCast(Src,
1123-
Type::getInt8PtrTy(Src->getContext(), AS));
1124-
Constant *OffsetCst =
1125-
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1126-
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
1127-
OffsetCst);
1128-
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
1129-
return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
1130-
}
1131697

1132698

1133699
/// Given a set of loads specified by ValuesPerBlock,
@@ -1173,7 +739,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
1173739
if (isSimpleValue()) {
1174740
Res = getSimpleValue();
1175741
if (Res->getType() != LoadTy) {
1176-
Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
742+
Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1177743

1178744
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
1179745
<< *getSimpleValue() << '\n'
@@ -1184,14 +750,20 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
1184750
if (Load->getType() == LoadTy && Offset == 0) {
1185751
Res = Load;
1186752
} else {
1187-
Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn);
1188-
753+
Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt);
754+
// We would like to use gvn.markInstructionForDeletion here, but we can't
755+
// because the load is already memoized into the leader map table that GVN
756+
// tracks. It is potentially possible to remove the load from the table,
757+
// but then there all of the operations based on it would need to be
758+
// rehashed. Just leave the dead load around.
759+
gvn.getMemDep().removeInstruction(Load);
1189760
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
1190761
<< *getCoercedLoadValue() << '\n'
1191-
<< *Res << '\n' << "\n\n\n");
762+
<< *Res << '\n'
763+
<< "\n\n\n");
1192764
}
1193765
} else if (isMemIntrinValue()) {
1194-
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
766+
Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1195767
InsertPt, DL);
1196768
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1197769
<< " " << *getMemIntrinValue() << '\n'
@@ -1260,7 +832,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1260832
// Can't forward from non-atomic to atomic without violating memory model.
1261833
if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
1262834
int Offset =
1263-
AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
835+
analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
1264836
if (Offset != -1) {
1265837
Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
1266838
return true;
@@ -1278,7 +850,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1278850
// Can't forward from non-atomic to atomic without violating memory model.
1279851
if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
1280852
int Offset =
1281-
AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
853+
analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
1282854

1283855
if (Offset != -1) {
1284856
Res = AvailableValue::getLoad(DepLI, Offset);
@@ -1291,7 +863,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1291863
// forward a value on from it.
1292864
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1293865
if (Address && !LI->isAtomic()) {
1294-
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
866+
int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
1295867
DepMI, DL);
1296868
if (Offset != -1) {
1297869
Res = AvailableValue::getMI(DepMI, Offset);
@@ -1336,7 +908,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1336908
// different types if we have to. If the stored value is larger or equal to
1337909
// the loaded value, we can reuse it.
1338910
if (S->getValueOperand()->getType() != LI->getType() &&
1339-
!CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
911+
!canCoerceMustAliasedValueToLoad(S->getValueOperand(),
1340912
LI->getType(), DL))
1341913
return false;
1342914

@@ -1353,7 +925,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1353925
// If the stored value is larger or equal to the loaded value, we can reuse
1354926
// it.
1355927
if (LD->getType() != LI->getType() &&
1356-
!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
928+
!canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
1357929
return false;
1358930

1359931
// Can't forward from non-atomic to atomic without violating memory model.

‎llvm/lib/Transforms/Utils/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_llvm_library(LLVMTransformUtils
5454
UnifyFunctionExitNodes.cpp
5555
Utils.cpp
5656
ValueMapper.cpp
57+
VNCoercion.cpp
5758

5859
ADDITIONAL_HEADER_DIRS
5960
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+440
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,440 @@
1+
#include "llvm/Transforms/Utils/VNCoercion.h"
2+
#include "llvm/Analysis/AliasAnalysis.h"
3+
#include "llvm/Analysis/ConstantFolding.h"
4+
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
5+
#include "llvm/Analysis/ValueTracking.h"
6+
#include "llvm/IR/IRBuilder.h"
7+
#include "llvm/IR/IntrinsicInst.h"
8+
#include "llvm/Support/Debug.h"
9+
10+
#define DEBUG_TYPE "vncoerce"
11+
namespace llvm {
12+
namespace VNCoercion {
13+
14+
/// Return true if coerceAvailableValueToLoadType will succeed.
15+
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
16+
const DataLayout &DL) {
17+
// If the loaded or stored value is an first class array or struct, don't try
18+
// to transform them. We need to be able to bitcast to integer.
19+
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
20+
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
21+
return false;
22+
23+
// The store has to be at least as big as the load.
24+
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
25+
return false;
26+
27+
return true;
28+
}
29+
30+
/// If we saw a store of a value to memory, and
31+
/// then a load from a must-aliased pointer of a different type, try to coerce
32+
/// the stored value. LoadedTy is the type of the load we want to replace.
33+
/// IRB is IRBuilder used to insert new instructions.
34+
///
35+
/// If we can't do it, return null.
36+
Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
37+
IRBuilder<> &IRB, const DataLayout &DL) {
38+
assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
39+
"precondition violation - materialization can't fail");
40+
41+
if (auto *C = dyn_cast<Constant>(StoredVal))
42+
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
43+
StoredVal = FoldedStoredVal;
44+
45+
// If this is already the right type, just return it.
46+
Type *StoredValTy = StoredVal->getType();
47+
48+
uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
49+
uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
50+
51+
// If the store and reload are the same size, we can always reuse it.
52+
if (StoredValSize == LoadedValSize) {
53+
// Pointer to Pointer -> use bitcast.
54+
if (StoredValTy->getScalarType()->isPointerTy() &&
55+
LoadedTy->getScalarType()->isPointerTy()) {
56+
StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy);
57+
} else {
58+
// Convert source pointers to integers, which can be bitcast.
59+
if (StoredValTy->getScalarType()->isPointerTy()) {
60+
StoredValTy = DL.getIntPtrType(StoredValTy);
61+
StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
62+
}
63+
64+
Type *TypeToCastTo = LoadedTy;
65+
if (TypeToCastTo->getScalarType()->isPointerTy())
66+
TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
67+
68+
if (StoredValTy != TypeToCastTo)
69+
StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
70+
71+
// Cast to pointer if the load needs a pointer type.
72+
if (LoadedTy->getScalarType()->isPointerTy())
73+
StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
74+
}
75+
76+
if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
77+
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
78+
StoredVal = FoldedStoredVal;
79+
80+
return StoredVal;
81+
}
82+
83+
// If the loaded value is smaller than the available value, then we can
84+
// extract out a piece from it. If the available value is too small, then we
85+
// can't do anything.
86+
assert(StoredValSize >= LoadedValSize &&
87+
"canCoerceMustAliasedValueToLoad fail");
88+
89+
// Convert source pointers to integers, which can be manipulated.
90+
if (StoredValTy->getScalarType()->isPointerTy()) {
91+
StoredValTy = DL.getIntPtrType(StoredValTy);
92+
StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
93+
}
94+
95+
// Convert vectors and fp to integer, which can be manipulated.
96+
if (!StoredValTy->isIntegerTy()) {
97+
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
98+
StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
99+
}
100+
101+
// If this is a big-endian system, we need to shift the value down to the low
102+
// bits so that a truncate will work.
103+
if (DL.isBigEndian()) {
104+
uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
105+
DL.getTypeStoreSizeInBits(LoadedTy);
106+
StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
107+
}
108+
109+
// Truncate the integer to the right size now.
110+
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
111+
StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
112+
113+
if (LoadedTy != NewIntTy) {
114+
// If the result is a pointer, inttoptr.
115+
if (LoadedTy->getScalarType()->isPointerTy())
116+
StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
117+
else
118+
// Otherwise, bitcast.
119+
StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
120+
}
121+
122+
if (auto *C = dyn_cast<Constant>(StoredVal))
123+
if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
124+
StoredVal = FoldedStoredVal;
125+
126+
return StoredVal;
127+
}
128+
129+
/// This function is called when we have a
130+
/// memdep query of a load that ends up being a clobbering memory write (store,
131+
/// memset, memcpy, memmove). This means that the write *may* provide bits used
132+
/// by the load but we can't be sure because the pointers don't mustalias.
133+
///
134+
/// Check this case to see if there is anything more we can do before we give
135+
/// up. This returns -1 if we have to give up, or a byte number in the stored
136+
/// value of the piece that feeds the load.
137+
static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
138+
Value *WritePtr,
139+
uint64_t WriteSizeInBits,
140+
const DataLayout &DL) {
141+
// If the loaded or stored value is a first class array or struct, don't try
142+
// to transform them. We need to be able to bitcast to integer.
143+
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
144+
return -1;
145+
146+
int64_t StoreOffset = 0, LoadOffset = 0;
147+
Value *StoreBase =
148+
GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
149+
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
150+
if (StoreBase != LoadBase)
151+
return -1;
152+
153+
// If the load and store are to the exact same address, they should have been
154+
// a must alias. AA must have gotten confused.
155+
// FIXME: Study to see if/when this happens. One case is forwarding a memset
156+
// to a load from the base of the memset.
157+
158+
// If the load and store don't overlap at all, the store doesn't provide
159+
// anything to the load. In this case, they really don't alias at all, AA
160+
// must have gotten confused.
161+
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
162+
163+
if ((WriteSizeInBits & 7) | (LoadSize & 7))
164+
return -1;
165+
uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
166+
LoadSize /= 8;
167+
168+
bool isAAFailure = false;
169+
if (StoreOffset < LoadOffset)
170+
isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
171+
else
172+
isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
173+
174+
if (isAAFailure)
175+
return -1;
176+
177+
// If the Load isn't completely contained within the stored bits, we don't
178+
// have all the bits to feed it. We could do something crazy in the future
179+
// (issue a smaller load then merge the bits in) but this seems unlikely to be
180+
// valuable.
181+
if (StoreOffset > LoadOffset ||
182+
StoreOffset + StoreSize < LoadOffset + LoadSize)
183+
return -1;
184+
185+
// Okay, we can do this transformation. Return the number of bytes into the
186+
// store that the load is.
187+
return LoadOffset - StoreOffset;
188+
}
189+
190+
/// This function is called when we have a
191+
/// memdep query of a load that ends up being a clobbering store.
192+
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
193+
StoreInst *DepSI) {
194+
// Cannot handle reading from store of first-class aggregate yet.
195+
if (DepSI->getValueOperand()->getType()->isStructTy() ||
196+
DepSI->getValueOperand()->getType()->isArrayTy())
197+
return -1;
198+
199+
const DataLayout &DL = DepSI->getModule()->getDataLayout();
200+
Value *StorePtr = DepSI->getPointerOperand();
201+
uint64_t StoreSize =
202+
DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
203+
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
204+
DL);
205+
}
206+
207+
/// This function is called when we have a
208+
/// memdep query of a load that ends up being clobbered by another load. See if
209+
/// the other load can feed into the second load.
210+
int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
211+
const DataLayout &DL) {
212+
// Cannot handle reading from store of first-class aggregate yet.
213+
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
214+
return -1;
215+
216+
Value *DepPtr = DepLI->getPointerOperand();
217+
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
218+
int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
219+
if (R != -1)
220+
return R;
221+
222+
// If we have a load/load clobber an DepLI can be widened to cover this load,
223+
// then we should widen it!
224+
int64_t LoadOffs = 0;
225+
const Value *LoadBase =
226+
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
227+
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
228+
229+
unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
230+
LoadBase, LoadOffs, LoadSize, DepLI);
231+
if (Size == 0)
232+
return -1;
233+
234+
// Check non-obvious conditions enforced by MDA which we rely on for being
235+
// able to materialize this potentially available value
236+
assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
237+
assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
238+
239+
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
240+
}
241+
242+
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
243+
MemIntrinsic *MI, const DataLayout &DL) {
244+
// If the mem operation is a non-constant size, we can't handle it.
245+
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
246+
if (!SizeCst)
247+
return -1;
248+
uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
249+
250+
// If this is memset, we just need to see if the offset is valid in the size
251+
// of the memset..
252+
if (MI->getIntrinsicID() == Intrinsic::memset)
253+
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
254+
MemSizeInBits, DL);
255+
256+
// If we have a memcpy/memmove, the only case we can handle is if this is a
257+
// copy from constant memory. In that case, we can read directly from the
258+
// constant memory.
259+
MemTransferInst *MTI = cast<MemTransferInst>(MI);
260+
261+
Constant *Src = dyn_cast<Constant>(MTI->getSource());
262+
if (!Src)
263+
return -1;
264+
265+
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
266+
if (!GV || !GV->isConstant())
267+
return -1;
268+
269+
// See if the access is within the bounds of the transfer.
270+
int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
271+
MemSizeInBits, DL);
272+
if (Offset == -1)
273+
return Offset;
274+
275+
unsigned AS = Src->getType()->getPointerAddressSpace();
276+
// Otherwise, see if we can constant fold a load from the constant with the
277+
// offset applied as appropriate.
278+
Src =
279+
ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
280+
Constant *OffsetCst =
281+
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
282+
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
283+
OffsetCst);
284+
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
285+
if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
286+
return Offset;
287+
return -1;
288+
}
289+
290+
/// This function is called when we have a
291+
/// memdep query of a load that ends up being a clobbering store. This means
292+
/// that the store provides bits used by the load but we the pointers don't
293+
/// mustalias. Check this case to see if there is anything more we can do
294+
/// before we give up.
295+
Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
296+
Instruction *InsertPt, const DataLayout &DL) {
297+
LLVMContext &Ctx = SrcVal->getType()->getContext();
298+
299+
uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
300+
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
301+
302+
IRBuilder<> Builder(InsertPt);
303+
304+
// Compute which bits of the stored value are being used by the load. Convert
305+
// to an integer type to start with.
306+
if (SrcVal->getType()->getScalarType()->isPointerTy())
307+
SrcVal =
308+
Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
309+
if (!SrcVal->getType()->isIntegerTy())
310+
SrcVal =
311+
Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
312+
313+
// Shift the bits to the least significant depending on endianness.
314+
unsigned ShiftAmt;
315+
if (DL.isLittleEndian())
316+
ShiftAmt = Offset * 8;
317+
else
318+
ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
319+
320+
if (ShiftAmt)
321+
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
322+
323+
if (LoadSize != StoreSize)
324+
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize * 8));
325+
326+
return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
327+
}
328+
329+
/// This function is called when we have a
330+
/// memdep query of a load that ends up being a clobbering load. This means
331+
/// that the load *may* provide bits used by the load but we can't be sure
332+
/// because the pointers don't mustalias. Check this case to see if there is
333+
/// anything more we can do before we give up.
334+
Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
335+
Instruction *InsertPt) {
336+
337+
const DataLayout &DL = SrcVal->getModule()->getDataLayout();
338+
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
339+
// widen SrcVal out to a larger load.
340+
unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
341+
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
342+
if (Offset + LoadSize > SrcValStoreSize) {
343+
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
344+
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
345+
// If we have a load/load clobber an DepLI can be widened to cover this
346+
// load, then we should widen it to the next power of 2 size big enough!
347+
unsigned NewLoadSize = Offset + LoadSize;
348+
if (!isPowerOf2_32(NewLoadSize))
349+
NewLoadSize = NextPowerOf2(NewLoadSize);
350+
351+
Value *PtrVal = SrcVal->getPointerOperand();
352+
353+
// Insert the new load after the old load. This ensures that subsequent
354+
// memdep queries will find the new load. We can't easily remove the old
355+
// load completely because it is already in the value numbering table.
356+
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
357+
Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
358+
DestPTy =
359+
PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
360+
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
361+
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
362+
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
363+
NewLoad->takeName(SrcVal);
364+
NewLoad->setAlignment(SrcVal->getAlignment());
365+
366+
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
367+
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
368+
369+
// Replace uses of the original load with the wider load. On a big endian
370+
// system, we need to shift down to get the relevant bits.
371+
Value *RV = NewLoad;
372+
if (DL.isBigEndian())
373+
RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
374+
RV = Builder.CreateTrunc(RV, SrcVal->getType());
375+
SrcVal->replaceAllUsesWith(RV);
376+
377+
SrcVal = NewLoad;
378+
}
379+
380+
return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
381+
}
382+
383+
/// This function is called when we have a
384+
/// memdep query of a load that ends up being a clobbering mem intrinsic.
385+
Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
386+
Type *LoadTy, Instruction *InsertPt,
387+
const DataLayout &DL) {
388+
LLVMContext &Ctx = LoadTy->getContext();
389+
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
390+
391+
IRBuilder<> Builder(InsertPt);
392+
393+
// We know that this method is only called when the mem transfer fully
394+
// provides the bits for the load.
395+
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
396+
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
397+
// independently of what the offset is.
398+
Value *Val = MSI->getValue();
399+
if (LoadSize != 1)
400+
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize * 8));
401+
402+
Value *OneElt = Val;
403+
404+
// Splat the value out to the right number of bits.
405+
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
406+
// If we can double the number of bytes set, do it.
407+
if (NumBytesSet * 2 <= LoadSize) {
408+
Value *ShVal = Builder.CreateShl(Val, NumBytesSet * 8);
409+
Val = Builder.CreateOr(Val, ShVal);
410+
NumBytesSet <<= 1;
411+
continue;
412+
}
413+
414+
// Otherwise insert one byte at a time.
415+
Value *ShVal = Builder.CreateShl(Val, 1 * 8);
416+
Val = Builder.CreateOr(OneElt, ShVal);
417+
++NumBytesSet;
418+
}
419+
420+
return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
421+
}
422+
423+
// Otherwise, this is a memcpy/memmove from a constant global.
424+
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
425+
Constant *Src = cast<Constant>(MTI->getSource());
426+
unsigned AS = Src->getType()->getPointerAddressSpace();
427+
428+
// Otherwise, see if we can constant fold a load from the constant with the
429+
// offset applied as appropriate.
430+
Src =
431+
ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
432+
Constant *OffsetCst =
433+
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
434+
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
435+
OffsetCst);
436+
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
437+
return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
438+
}
439+
} // namespace VNCoercion
440+
} // namespace llvm

0 commit comments

Comments
 (0)
Please sign in to comment.