Index: llvm/trunk/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -45,8 +45,6 @@ void getAnalysisUsage(AnalysisUsage &AU) const override {} private: - Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, - IRBuilder<> &Builder); Value *remapConstant(Module *M, Function *F, Constant *C, IRBuilder<> &Builder); Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F, @@ -156,46 +154,6 @@ return true; } -Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F, - GlobalVariable *GV, - IRBuilder<> &Builder) { - PointerType *GVType = GV->getType(); - Value *CVTA = nullptr; - - // See if the address space conversion requires the operand to be bitcast - // to i8 addrspace(n)* first. - EVT ExtendedGVType = EVT::getEVT(GV->getValueType(), true); - if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) { - // A bitcast to i8 addrspace(n)* on the operand is needed. - LLVMContext &Context = M->getContext(); - unsigned int AddrSpace = GVType->getAddressSpace(); - Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace); - CVTA = Builder.CreateBitCast(GV, DestTy, "cvta"); - // Insert the address space conversion. - Type *ResultType = - PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC); - Function *CVTAFunction = Intrinsic::getDeclaration( - M, Intrinsic::nvvm_ptr_global_to_gen, {ResultType, DestTy}); - CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta"); - // Another bitcast from i8 * to * is - // required. - DestTy = - PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC); - CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta"); - } else { - // A simple CVTA is enough. - SmallVector ParamTypes; - ParamTypes.push_back(PointerType::get(GV->getValueType(), - llvm::ADDRESS_SPACE_GENERIC)); - ParamTypes.push_back(GVType); - Function *CVTAFunction = Intrinsic::getDeclaration( - M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); - CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta"); - } - - return CVTA; -} - Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C, IRBuilder<> &Builder) { // If the constant C has been converted already in the given function F, just @@ -207,17 +165,17 @@ Value *NewValue = C; if (isa(C)) { - // If the constant C is a global variable and is found in GVMap, generate a - // set set of instructions that convert the clone of C with the global - // address space specifier to a generic pointer. - // The constant C cannot be used here, as it will be erased from the - // module eventually. And the clone of C with the global address space - // specifier cannot be used here either, as it will affect the types of - // other instructions in the function. Hence, this address space conversion - // is required. + // If the constant C is a global variable and is found in GVMap, substitute + // + // addrspacecast GVMap[C] to addrspace(0) + // + // for our use of C. GVMapTy::iterator I = GVMap.find(cast(C)); if (I != GVMap.end()) { - NewValue = getOrInsertCVTA(M, F, I->second, Builder); + GlobalVariable *GV = I->second; + NewValue = Builder.CreateAddrSpaceCast( + GV, + PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC)); } } else if (isa(C)) { // If any element in the constant vector or aggregate C is or uses a global Index: llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll +++ llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll @@ -5,13 +5,6 @@ @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 @scalar = internal addrspace(3) global float 0.000000e+00, align 4 -@generic_scalar = internal global float 0.000000e+00, align 4 - -define float @ld_from_shared() { - %1 = addrspacecast float* @generic_scalar to float addrspace(3)* - %2 = load float, float addrspace(3)* %1 - ret float %2 -} ; Verifies nvptx-favor-non-generic correctly optimizes generic address space ; usage to non-generic address space usage for the patterns we claim to handle: Index: llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll +++ llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll @@ -16,11 +16,11 @@ ;CHECK-LABEL: @func() ;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]] entry: -; References to the variables must be converted back to generic address space via llvm intrinsic call -; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8({{.*}} addrspace(1)* @.str +; References to the variables must be converted back to generic address space. +; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*) %0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1 call void @extfunc(i8 signext %0) -; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)* @static_var +; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*) %1 = load i8, i8* @static_var, align 1 call void @extfunc(i8 signext %1) ret void Index: llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ llvm/trunk/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -7,14 +7,17 @@ ; CHECK: .global .align 4 .u32 myglobal = 42; @myglobal = internal global i32 42, align 4 -; CHECK: .global .align 4 .u32 myconst = 42; -@myconst = internal constant i32 42, align 4 +; CHECK: .global .align 4 .u32 myconst = 420; +@myconst = internal constant i32 420, align 4 define void @foo(i32* %a, i32* %b) { -; CHECK: cvta.global.u32 +; Expect one load -- @myconst isn't loaded from, because we know its value +; statically. +; CHECK: ld.global.u32 +; CHECK: st.global.u32 +; CHECK: st.global.u32 %ld1 = load i32, i32* @myglobal -; CHECK: cvta.global.u32 %ld2 = load i32, i32* @myconst store i32 %ld1, i32* %a store i32 %ld2, i32* %b