Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -1137,6 +1137,15 @@ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } } + + // ConstantFold a compare instruction with addrspacecast as the + // first operand. + // fold: icmp (addrspacecast x), y -> icmp x, y + if (CE0->getOpcode() == Instruction::AddrSpaceCast) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); + } } if (ConstantExpr *CE1 = dyn_cast(Ops1)) { Index: test/Transforms/InstCombine/OptimizeAddrspaceCast.ll =================================================================== --- test/Transforms/InstCombine/OptimizeAddrspaceCast.ll +++ test/Transforms/InstCombine/OptimizeAddrspaceCast.ll @@ -0,0 +1,28 @@ +; Test to make sure that the addrspacecast is constantfolded correctly +; during InstCombine pass. +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-i64:64-f80:128-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" + +@testKernel.some_value = internal addrspace(3) global i32 undef, align 4 + +; Function Attrs: nounwind +define void @testKernel(i32 addrspace(1)* %results) nounwind { + %1 = alloca i32 addrspace(1)*, align 8 + %ptr = alloca i32 addrspace(4)*, align 8 + %lptr = alloca i32 addrspace(3)*, align 8 + store i32 addrspace(1)* %results, i32 addrspace(1)** %1, align 8 + store i32 7, i32 addrspace(3)* @testKernel.some_value, align 4 + store i32 addrspace(4)* null, i32 addrspace(4)** %ptr, align 8 + store i32 addrspace(3)* @testKernel.some_value, i32 addrspace(3)** %lptr, align 8 + %2 = load i32 addrspace(4)*, i32 addrspace(4)** %ptr, align 8 + %3 = load i32 addrspace(3)*, i32 addrspace(3)** %lptr, align 8 + %4 = addrspacecast i32 addrspace(3)* %3 to i32 addrspace(4)* + %5 = icmp ne i32 addrspace(4)* %2, %4 + %6 = zext i1 %5 to i32 + %7 = load i32 addrspace(1)*, i32 addrspace(1)** %1, align 8 + %8 = getelementptr inbounds i32, i32 addrspace(1)* %7, i64 0 + store i32 %6, i32 addrspace(1)* %8, align 4 +; CHECK: store i32 1, i32 addrspace(1)* %results + ret void +}