Changeset View
Changeset View
Standalone View
Standalone View
lib/Target/NVPTX/NVPTXLowerAlloca.cpp
Show All 17 Lines | |||||
// %A = alloca i32 | // %A = alloca i32 | ||||
// %Local = addrspacecast i32* %A to i32 addrspace(5)* | // %Local = addrspacecast i32* %A to i32 addrspace(5)* | ||||
// %Generic = addrspacecast i32 addrspace(5)* %A to i32* | // %Generic = addrspacecast i32 addrspace(5)* %A to i32* | ||||
// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32 | // store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32 | ||||
// | // | ||||
// And we will rely on NVPTXInferAddressSpaces to combine the last two | // And we will rely on NVPTXInferAddressSpaces to combine the last two | ||||
// instructions. | // instructions. | ||||
// | // | ||||
// In the case of OpenMP shared variables, perform the same transformation as | |||||
// for local variables but using the shared address space. | |||||
// | |||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
#include "NVPTX.h" | #include "NVPTX.h" | ||||
#include "NVPTXUtilities.h" | #include "NVPTXUtilities.h" | ||||
#include "llvm/IR/Function.h" | #include "llvm/IR/Function.h" | ||||
#include "llvm/IR/Instructions.h" | #include "llvm/IR/Instructions.h" | ||||
#include "llvm/IR/IntrinsicInst.h" | #include "llvm/IR/IntrinsicInst.h" | ||||
#include "llvm/IR/Module.h" | #include "llvm/IR/Module.h" | ||||
Show All 32 Lines | if (skipBasicBlock(BB)) | ||||
return false; | return false; | ||||
bool Changed = false; | bool Changed = false; | ||||
for (auto &I : BB) { | for (auto &I : BB) { | ||||
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) { | if (auto allocaInst = dyn_cast<AllocaInst>(&I)) { | ||||
Changed = true; | Changed = true; | ||||
auto PTy = dyn_cast<PointerType>(allocaInst->getType()); | auto PTy = dyn_cast<PointerType>(allocaInst->getType()); | ||||
auto ETy = PTy->getElementType(); | auto ETy = PTy->getElementType(); | ||||
auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); | |||||
auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); | // In the CUDA case, this is always a local address. | ||||
auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); | // In offloading to a device using OpenMP this may be an | ||||
// address allocated in the shared memory of the device. | |||||
auto *AddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); | |||||
bool PtrIsStored = ptrIsStored(allocaInst); | |||||
bool RequiresSharedMemory = | |||||
BB.getParent()->hasFnAttribute("has-nvptx-shared-depot"); | |||||
// Handle shared args: currently shared args are declared as | |||||
// an alloca in LLVM-IR code generation and lowered to | |||||
// shared memory. | |||||
if (PtrIsStored && RequiresSharedMemory) | |||||
AddrTy = PointerType::get(ETy, ADDRESS_SPACE_SHARED); | |||||
auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, AddrTy, ""); | |||||
auto *GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); | |||||
auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal, | auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal, | ||||
GenericAddrTy, ""); | GenericAddrTy, ""); | ||||
NewASCToLocal->insertAfter(allocaInst); | NewASCToLocal->insertAfter(allocaInst); | ||||
NewASCToGeneric->insertAfter(NewASCToLocal); | NewASCToGeneric->insertAfter(NewASCToLocal); | ||||
// If a value is shared then the additional conversions are required for | |||||
// correctness. | |||||
if (PtrIsStored && RequiresSharedMemory) { | |||||
allocaInst->replaceAllUsesWith(NewASCToGeneric); | |||||
NewASCToLocal->setOperand(0, allocaInst); | |||||
continue; | |||||
} | |||||
for (Value::use_iterator UI = allocaInst->use_begin(), | for (Value::use_iterator UI = allocaInst->use_begin(), | ||||
UE = allocaInst->use_end(); | UE = allocaInst->use_end(); | ||||
UI != UE; ) { | UI != UE; ) { | ||||
// Check Load, Store, GEP, and BitCast Uses on alloca and make them | // Check Load, Store, GEP, and BitCast Uses on alloca and make them | ||||
// use the converted generic address, in order to expose non-generic | // use the converted generic address, in order to expose non-generic | ||||
// addrspacecast to NVPTXInferAddressSpaces. For other types | // addrspacecast to NVPTXInferAddressSpaces. For other types | ||||
// of instructions this is unnecessary and may introduce redundant | // of instructions this is unnecessary and may introduce redundant | ||||
// address cast. | // address cast. | ||||
const auto &AllocaUse = *UI++; | const auto &AllocaUse = *UI++; | ||||
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser()); | auto LI = dyn_cast<LoadInst>(AllocaUse.getUser()); | ||||
if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { | if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { | ||||
LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); | LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); | ||||
continue; | continue; | ||||
} | } | ||||
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser()); | auto SI = dyn_cast<StoreInst>(AllocaUse.getUser()); | ||||
if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { | if (SI && !SI->isVolatile()){ | ||||
SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); | unsigned Idx; | ||||
if (SI->getPointerOperand() == allocaInst) | |||||
Idx = SI->getPointerOperandIndex(); | |||||
else if (SI->getValueOperand() == allocaInst) | |||||
Idx = 0; | |||||
else | |||||
continue; | continue; | ||||
SI->setOperand(Idx, NewASCToGeneric); | |||||
} | } | ||||
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser()); | auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser()); | ||||
if (GI && GI->getPointerOperand() == allocaInst) { | if (GI && GI->getPointerOperand() == allocaInst) { | ||||
GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); | GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); | ||||
continue; | continue; | ||||
} | } | ||||
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser()); | auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser()); | ||||
if (BI && BI->getOperand(0) == allocaInst) { | if (BI && BI->getOperand(0) == allocaInst) { | ||||
Show All 12 Lines |