diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6295,6 +6295,39 @@ return Constant::getNullValue(ReturnType); break; } + case Intrinsic::ptrmask: { + // Fail loudly in case this is ever changed. + // TODO: If vector types are supported the logic that checks if the mask is + // useless should be updated to use generic constants. + assert(!Op0->getType()->isVectorTy() && !Op1->getType()->isVectorTy() && + "These simplifications where written at a time when ptrmask did not " + "support vector types and may not work for vectors"); + if (auto R = trySimplifyingAndLikeUsingOp(Op1, Q, Op0->getType())) + return R; + + if (auto R = trySimplifyingAndLikeUsingOp(Op0, Q, Op0->getType())) + return R; + + if (Op1->getType()->getScalarSizeInBits() == + Q.DL.getPointerTypeSizeInBits(Op0->getType())) { + if (match(Op1, m_PtrToInt(m_Specific(Op0)))) + return Op0; + + if (match(Op1, m_AllOnes())) + return Op0; + + const APInt *C; + if (match(Op1, m_APInt(C))) { + KnownBits PtrKnown = + computeKnownBits(Op0, Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT); + // See if we only masking off bits we know are already zero due to + // alignment. + if ((*C | PtrKnown.Zero).isAllOnes()) + return Op0; + } + } + break; + } case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll @@ -211,8 +211,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(ptr addrspace(3) %src.ptr) { ; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000( -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 0) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) null, align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr @@ -320,8 +319,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(3) %src.ptr) { ; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff( -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -1) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR:%.*]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr @@ -333,7 +331,7 @@ ; Make sure non-constant masks can also be handled. define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr, ptr addrspace(1) %mask.ptr) { ; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask( -; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range !0 +; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1 diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll --- a/llvm/test/Transforms/InstCombine/align-addr.ll +++ b/llvm/test/Transforms/InstCombine/align-addr.ll @@ -186,8 +186,7 @@ ; TODO: Should be able to drop the ptrmask define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) { ; CHECK-LABEL: @ptrmask_align8_ptr_align8( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) -; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 8 +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 8 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) @@ -199,8 +198,7 @@ ; TODO: Should be able to drop the ptrmask define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) { ; CHECK-LABEL: @ptrmask_align8_ptr_align16( -; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) -; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 16 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -7,8 +7,7 @@ define ptr @ptrmask_simplify_poison_mask(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_poison_mask ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 poison) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr poison ; %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 poison) ret ptr %r @@ -17,8 +16,7 @@ define ptr @ptrmask_simplify_undef_mask(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_undef_mask ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[P]], i32 undef) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr null ; %r = call ptr @llvm.ptrmask.p0.i32(ptr %p, i32 undef) ret ptr %r @@ -27,8 +25,7 @@ define ptr @ptrmask_simplify_0_mask(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_0_mask ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 0) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr null ; %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 0) ret ptr %r @@ -37,8 +34,7 @@ define ptr @ptrmask_simplify_1s_mask(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_1s_mask ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -1) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr [[P]] ; %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -1) ret ptr %r @@ -57,8 +53,7 @@ define ptr @ptrmask_simplify_poison_ptr(i64 %m) { ; CHECK-LABEL: define ptr @ptrmask_simplify_poison_ptr ; CHECK-SAME: (i64 [[M:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 [[M]]) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr poison ; %r = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 %m) ret ptr %r @@ -67,8 +62,7 @@ define ptr @ptrmask_simplify_undef_ptr(i32 %m) { ; CHECK-LABEL: define ptr @ptrmask_simplify_undef_ptr ; CHECK-SAME: (i32 [[M:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr undef, i32 [[M]]) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr null ; %r = call ptr @llvm.ptrmask.p0.i32(ptr undef, i32 %m) ret ptr %r @@ -77,8 +71,7 @@ define ptr @ptrmask_simplify_null_ptr(i64 %m) { ; CHECK-LABEL: define ptr @ptrmask_simplify_null_ptr ; CHECK-SAME: (i64 [[M:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 [[M]]) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr null ; %r = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 %m) ret ptr %r @@ -87,9 +80,7 @@ define ptr @ptrmask_simplify_ptrmask(ptr %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_ptrmask ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[M:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]]) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr [[P]] ; %m = ptrtoint ptr %p to i64 %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) @@ -111,8 +102,7 @@ define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused ; CHECK-SAME: (ptr align 64 [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr [[P]] ; %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) ret ptr %r @@ -133,8 +123,7 @@ ; CHECK-SAME: (ptr [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64) ; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 32 -; CHECK-NEXT: [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32) -; CHECK-NEXT: ret ptr [[R]] +; CHECK-NEXT: ret ptr [[PGEP]] ; %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64) %pgep = getelementptr i8, ptr %pm0, i64 32