Index: lib/IR/ConstantFold.cpp =================================================================== --- lib/IR/ConstantFold.cpp +++ lib/IR/ConstantFold.cpp @@ -529,7 +529,10 @@ // Try hard to fold cast of cast because they are often eliminable. if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy)) return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy); - } else if (CE->getOpcode() == Instruction::GetElementPtr) { + } else if (CE->getOpcode() == Instruction::GetElementPtr && + // Do not fold addrspacecast (gep 0, .., 0). It might make the + // addrspacecast uncanonicalized. + opc != Instruction::AddrSpaceCast) { // If all of the indexes in the GEP are null values, there is no pointer // adjustment going on. We might as well cast the source pointer. bool isAllNull = true; Index: lib/IR/Constants.cpp =================================================================== --- lib/IR/Constants.cpp +++ lib/IR/Constants.cpp @@ -1698,6 +1698,17 @@ assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && "Invalid constantexpr addrspacecast!"); + // Canonicalize addrspacecasts between different pointer types by first + // bitcasting the pointer type and then converting the address space. + PointerType *SrcScalarTy = cast(C->getType()->getScalarType()); + PointerType *DstScalarTy = cast(DstTy->getScalarType()); + Type *DstElemTy = DstScalarTy->getElementType(); + if (SrcScalarTy->getElementType() != DstElemTy) { + Type *MidTy = PointerType::get(DstElemTy, SrcScalarTy->getAddressSpace()); + if (DstTy->isVectorTy()) // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, DstTy->getVectorNumElements()); + C = getBitCast(C, MidTy); + } return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy); } Index: test/Assembler/addrspacecast-alias.ll =================================================================== --- test/Assembler/addrspacecast-alias.ll +++ test/Assembler/addrspacecast-alias.ll @@ -4,4 +4,4 @@ @i = internal addrspace(1) global i8 42 @ia = alias internal addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*) -; CHECK: @ia = alias internal addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*) +; CHECK: @ia = alias internal addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*) Index: test/CodeGen/NVPTX/access-non-generic.ll =================================================================== --- test/CodeGen/NVPTX/access-non-generic.ll +++ test/CodeGen/NVPTX/access-non-generic.ll @@ -74,13 +74,13 @@ ret float %sum5 } -; Verifies nvptx-favor-non-generic keeps addrspacecasts between pointers of -; different element types. +; When hoisting an addrspacecast between different pointer types, replace the +; addrspacecast with a bitcsat. define i32 @ld_int_from_float() { ; IR-LABEL: @ld_int_from_float -; IR: addrspacecast +; IR: bitcast ; PTX-LABEL: ld_int_from_float( -; PTX: cvta.shared.u{{(32|64)}} +; PTX: ld.shared.u{{(32|64)}} %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4 ret i32 %1 } Index: test/Other/constant-fold-gep.ll =================================================================== --- test/Other/constant-fold-gep.ll +++ test/Other/constant-fold-gep.ll @@ -457,7 +457,7 @@ %p = getelementptr inbounds i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2 ret i8* %p -; OPT: ret i8* getelementptr (i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2) +; OPT: ret i8* getelementptr (i8* addrspacecast (i8 addrspace(12)* getelementptr inbounds ([4 x i8] addrspace(12)* @p12, i32 0, i32 0) to i8*), i32 2) } define i8* @same_addrspace() nounwind noinline { Index: test/Transforms/InstCombine/constant-fold-address-space-pointer.ll =================================================================== --- test/Transforms/InstCombine/constant-fold-address-space-pointer.ll +++ test/Transforms/InstCombine/constant-fold-address-space-pointer.ll @@ -230,3 +230,13 @@ %b = load i32 addrspace(1)* %a, align 4 ret i32 %b } + +@shared_mem = external addrspace(3) global [0 x i8] + +define float @canonicalize_addrspacecast(i32 %i) { +; CHECK-LABEL: @canonicalize_addrspacecast +; CHECK-NEXT: getelementptr inbounds float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 %i + %p = getelementptr inbounds float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i + %v = load float* %p + ret float %v +}