Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "AMDGPU.h" #include #include @@ -53,6 +54,7 @@ DivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; bool HasUnsafeFPMath = false; + AMDGPUAS AMDGPUASI; /// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to /// binary operation \p V. @@ -133,6 +135,7 @@ bool visitInstruction(Instruction &I) { return false; } bool visitBinaryOperator(BinaryOperator &I); + bool visitLoadInst(LoadInst &I); bool visitICmpInst(ICmpInst &I); bool visitSelectInst(SelectInst &I); @@ -441,6 +444,31 @@ return Changed; } +bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { + bool Changed = false; + + Type * Ty = I.getType(); + VectorType *VT = dyn_cast(Ty); + + if (!VT && needsPromotionToI32(I.getType()) && DA->isUniform(&I) && + I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) { + IRBuilder<> Builder(&I); + Builder.SetCurrentDebugLocation(I.getDebugLoc()); + + Type *I32Ty = getI32Ty(Builder, I.getType()); + Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace()); + Value *BitCast= Builder.CreateBitCast(I.getOperand(0), PT); + Value *WidenLoad = Builder.CreateLoad(BitCast, PT); + Value *TruncRes = Builder.CreateTrunc(WidenLoad, I.getType()); + + I.replaceAllUsesWith(TruncRes); + I.eraseFromParent(); + return true; + } + + return Changed; +} + bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) { bool Changed = false;