Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -2671,6 +2671,37 @@ unsigned width = cast(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); auto PtrTy = cast(pointer->getType()); + + // Some versions of glibc and gcc use idioms (particularly in their malloc + // routines) that add a pointer-sized integer (known to be a pointer value) + // to a null pointer in order to cast the value back to an integer or as + // part of a pointer alignment algorithm. This is undefined behavior, but + // we'd like to be able to compile programs that use it. + // + // Normally, we'd generate a GEP with a null-pointer base here in response + // to that code, but it's also UB to dereference a pointer created that + // way. Instead (as an acknowledged hack to tolerate the idiom) we will + // generate a direct cast of the integer value to a pointer. + // + // The idiom (p = nullptr + N) is not met if any of the following are true: + // + // The operation is subtraction. + // The index is not pointer-sized. + // The pointer type is not byte-sized. + // The index operand is a constant. + // + if (isa(pointer) && !isSubtraction && + (width == DL.getTypeSizeInBits(PtrTy)) && + !isa(index)) { + // The pointer type might come back as null, so it's deferred until here. + const PointerType *pointerType + = pointerOperand->getType()->getAs(); + if (pointerType && pointerType->getPointeeType()->isCharType()) { + // (nullptr + N) -> inttoptr N to + return CGF.Builder.CreateIntToPtr(index, pointer->getType()); + } + } + if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. Index: test/CodeGen/nullptr-arithmetic.c =================================================================== --- test/CodeGen/nullptr-arithmetic.c +++ test/CodeGen/nullptr-arithmetic.c @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -S %s -emit-llvm -o - | FileCheck %s + +#include + +// This test is meant to verify code that handles the 'p = nullptr + n' idiom +// used by some versions of glibc and gcc. This is undefined behavior but +// it is intended there to act like a conversion from a pointer-sized integer +// to a pointer, and we would like to tolerate that. + +#define NULLPTRI8 ((int8_t*)0) + +// This should get the inttoptr instruction. +int8_t *test1(intptr_t n) { + return NULLPTRI8 + n; +} +// CHECK-LABEL: test1 +// CHECK: inttoptr +// CHECK-NOT: getelementptr + +// This doesn't meet the idiom because the offset type isn't pointer-sized. +int8_t *test2(int16_t n) { + return NULLPTRI8 + n; +} +// CHECK-LABEL: test2 +// CHECK: getelementptr +// CHECK-NOT: inttoptr + +// This doesn't meet the idiom because the offset is constant. +int8_t *test3() { + return NULLPTRI8 + 16; +} +// CHECK-LABEL: test3 +// CHECK: getelementptr +// CHECK-NOT: inttoptr + +// This doesn't meet the idiom because the element type is larger than a byte. +int16_t *test4(intptr_t n) { + return (int16_t*)0 + n; +} +// CHECK-LABEL: test4 +// CHECK: getelementptr +// CHECK-NOT: inttoptr + +// This doesn't meet the idiom because the offset is subtracted. +int8_t* test5(intptr_t n) { + return NULLPTRI8 - n; +} +// CHECK-LABEL: test5 +// CHECK: getelementptr +// CHECK-NOT: inttoptr +