diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2005,6 +2005,50 @@ return true; } +// Try to generate llvm.ptrmask instead of inttoptr(and(ptrtoint, C)) if +// possible. +static llvm::Value *tryToGeneratePtrMask(Expr *E, QualType &DestTy, + CodeGenFunction &CGF, + ScalarExprEmitter *Emitter, + CGBuilderTy &Builder) { + + auto *BO = dyn_cast(E->IgnoreParens()); + if (!BO || BO->getOpcode() != BinaryOperatorKind::BO_And) + return nullptr; + + auto *CE = dyn_cast(BO->getLHS()->IgnoreParens()); + if (!CE || CE->getCastKind() != CK_PointerToIntegral) + return nullptr; + + // Check if the RHS of the AND is a constant that only masks away high and/or + // low bits that must be zero when accessing it, because of ABI alignment + // requirements or a restriction of the meaningful bits of a pointer + // through the data layout. + llvm::APSInt CV; + ASTContext &Ctx = CGF.getContext(); + auto &DL = CGF.CGM.getDataLayout(); + uint64_t AllOnes = ~((uint64_t)0); + uint64_t MeaningfulPtrBits = + (AllOnes >> (64 - DL.getMaxPointerSizeInBits())) & + (AllOnes << llvm::Log2_64(DL.getPointerABIAlignment(static_cast( + CE->getSubExpr()->getType()->getPointeeType().getAddressSpace())))); + if (!BO->getRHS()->isIntegerConstantExpr(CV, Ctx) || + (CV.getZExtValue() & MeaningfulPtrBits) != MeaningfulPtrBits) + return nullptr; + + // We can generate a ptrmask call. Now visit the relevant operands and emit + // the call. + Value *Ptr = Emitter->Visit(const_cast(CE->getSubExpr())); + Value *Const = Emitter->Visit(const_cast(BO->getRHS())); + auto *PtrMask = Builder.CreateIntrinsic( + llvm::Intrinsic::ptrmask, + {Ptr->getType(), Ptr->getType(), Const->getType()}, {Ptr, Const}); + auto DestLLVMTy = Emitter->ConvertType(DestTy); + if (DestLLVMTy == Ptr->getType()) + return PtrMask; + return Builder.CreateBitCast(PtrMask, DestLLVMTy); +}; + // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts // have to handle a more broad range of conversions than explicit casts, as they // handle things like function to ptr-to-function decay etc. @@ -2212,8 +2256,12 @@ return Visit(const_cast(E)); case CK_IntegralToPointer: { - Value *Src = Visit(const_cast(E)); + // Try to generate llvm.ptrmask instead of inttoptr(and(ptrtoint, C)) if + // possible. + if (Value *PtrMask = tryToGeneratePtrMask(E, DestTy, CGF, this, Builder)) + return PtrMask; + Value *Src = Visit(const_cast(E)); // First, convert to the correct width so that we control the kind of // extension. auto DestLLVMTy = ConvertType(DestTy); @@ -3248,11 +3296,10 @@ // The index is not pointer-sized. // The pointer type is not byte-sized. // - if (BinaryOperator::isNullPointerArithmeticExtension(CGF.getContext(), - op.Opcode, - expr->getLHS(), - expr->getRHS())) + if (BinaryOperator::isNullPointerArithmeticExtension( + CGF.getContext(), op.Opcode, expr->getLHS(), expr->getRHS())) { return CGF.Builder.CreateIntToPtr(index, pointer->getType()); + } if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to diff --git a/clang/test/CodeGen/x86-ptrmask.c b/clang/test/CodeGen/x86-ptrmask.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/x86-ptrmask.c @@ -0,0 +1,59 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -triple x86_64-macos-apple -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: define i8* @strip_2_low_bits(i8* %p) #0 { +// CHECK-NEXT: entry: +// CHECK-NEXT: %p.addr = alloca i8*, align 8 +// CHECK-NEXT: store i8* %p, i8** %p.addr, align 8 +// CHECK-NEXT: %0 = load i8*, i8** %p.addr, align 8 +// CHECK-NEXT: %1 = call i8* @llvm.ptrmask.p0i8.p0i8.i64(i8* %0, i64 -4) +// CHECK-NEXT: ret i8* %1 +// CHECK-NEXT: } +char *strip_2_low_bits(char *p) { + return (char *)(((intptr_t)p) & ~((1 << 2) - 1)); +} + +// CHECK-LABEL: define i8* @strip_3_low_bits(i8* %p) #0 { +// CHECK-NEXT: entry: +// CHECK-NEXT: %p.addr = alloca i8*, align 8 +// CHECK-NEXT: store i8* %p, i8** %p.addr, align 8 +// CHECK-NEXT: %0 = load i8*, i8** %p.addr, align 8 +// CHECK-NEXT: %1 = call i8* @llvm.ptrmask.p0i8.p0i8.i64(i8* %0, i64 -8) +// CHECK-NEXT: ret i8* %1 +// CHECK-NEXT: } +char *strip_3_low_bits(char *p) { + return (char *)(((intptr_t)p) & ~((1 << 3) - 1)); +} + +// CHECK-LABEL: define i8* @strip_3_low_bits_const_add(i8* %p) #0 { +// CHECK-NEXT: entry: +// CHECK-NEXT: %p.addr = alloca i8*, align 8 +// CHECK-NEXT: store i8* %p, i8** %p.addr, align 8 +// CHECK-NEXT: %0 = load i8*, i8** %p.addr, align 8 +// CHECK-NEXT: %1 = call i8* @llvm.ptrmask.p0i8.p0i8.i64(i8* %0, i64 -8) +// CHECK-NEXT: ret i8* %1 +// CHECK-NEXT: } +char *strip_3_low_bits_const_add(char *p) { + return (char *)(((intptr_t)(p)) & ~((1 << (1 + 2)) - 1)); +} + +// Just make sure we do not emit ptrmask calls when it is not valid. +// CHECK-LABEL: define i8* @strip_4_low_bits(i8* %p) #0 { +// CHECK-NOT: @llvm.ptrmask +char *strip_4_low_bits(char *p) { + return (char *)(((intptr_t)p) & ~((1 << 4) - 1)); +} + +// CHECK-LABEL: define i8* @strip_4_low_bits_const_add(i8* %p) #0 { +// CHECK-NOT: @llvm.ptrmask +char *strip_4_low_bits_const_add(char *p) { + return (char *)(((intptr_t)p) & ~((1 << (2 + 2)) - 1)); +} + +// CHECK-LABEL: define i8* @strip_variable(i8* %p, i64 %m) #0 { +// CHECK-NOT: @llvm.ptrmask +char *strip_variable(char *p, intptr_t m) { + return (char *)(((intptr_t)p) & m); +}