diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -271,6 +271,31 @@ (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; + // Don't combine a inttoptr followed by a bitcast to another pointer type if + // the intermediate pointer has multiple uses. Such combine is very unfriendly + // to later passes like ScalarEvolutionAnalysis and LoadStoreVectorizer. For + // example, this may change the IR from: + // + // %p1 = inttoptr %addr to i32 * + // %i = load i32, i32 * %p1 + // %p2 = bitcast i32 * %p1 to float * + // %p3 = getelementptr float, float * %p2, i64 1 + // %f = load float, float * %p3 + // + // into: + // + // %p1 = inttoptr %addr to i32 * + // %p2 = inttoptr %addr to float * + // %i = load i32, i32 * %p1 + // %p3 = getelementptr float, float * %p2, i64 1 + // %f = load float, float * %p3 + // + // This causes above mentioned passes fail to reason that the two pointers + // are consecutive, thus fail to vectorize the two loads. + if (firstOp == Instruction::IntToPtr && Res == Instruction::IntToPtr && + !CI1->hasOneUse()) + Res = 0; + return Instruction::CastOps(Res); } diff --git a/llvm/test/Transforms/InstCombine/inttoptr_followed_by_bitcast.ll b/llvm/test/Transforms/InstCombine/inttoptr_followed_by_bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/inttoptr_followed_by_bitcast.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @inttoptr_followed_by_bitcast(i32 %i0, i32 %i1, float %i2) { +; CHECK-LABEL: @inttoptr_followed_by_bitcast( +; CHECK: [[PTR1:%.*]] = inttoptr i64 [[I:%.*]] to i32 addrspace(1)* +; CHECK: [[PTR2:%.*]] = bitcast i32 addrspace(1)* [[PTR1]] to float addrspace(1)* +; CHECK: [[F:%.*]] = load float, float addrspace(1)* [[PTR2]], align 4 + + %i3 = zext i32 %i0 to i64 + %i4 = shl i32 %i1, 3 + %i5 = and i32 %i4, -64 + %i6 = zext i32 %i5 to i64 + %i7 = add nuw nsw i64 %i3, %i6 + + %ip = inttoptr i64 %i7 to i32 addrspace(1)* + %fp = bitcast i32 addrspace(1)* %ip to float addrspace(1)* + %if0 = load float, float addrspace(1)* %fp, align 4 + + %ip2 = getelementptr i32, i32 addrspace(1)* %ip, i64 1 + %i8 = load i32, i32 addrspace(1)* %ip2, align 4 + + %mul0 = fmul reassoc nnan nsz arcp contract afn float %if0, %i2 + + %ip3 = bitcast i32 addrspace(1)* %ip to float addrspace(1)* + store float %mul0, float addrspace(1)* %ip3, align 4 + + ret i32 %i8 +}