Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -327,10 +327,25 @@ const DataLayout &DL) { do { Type *SrcTy = C->getType(); + uint64_t DestSize = DL.getTypeSizeInBits(DestTy); + uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy); + if (SrcSize < DestSize) + return nullptr; + + // Catch the obvious splat cases (since all-zeros can coerce non-integral + // pointers legally). + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! + return Constant::getAllOnesValue(DestTy); // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { + // But be careful not to coerce non-integral pointers illegally. + if (SrcSize == DestSize && + DL.isNonIntegralPointerType(SrcTy->getScalarType()) == + DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. Index: lib/Transforms/Utils/VNCoercion.cpp =================================================================== --- lib/Transforms/Utils/VNCoercion.cpp +++ lib/Transforms/Utils/VNCoercion.cpp @@ -319,21 +319,17 @@ if (Offset == -1) return Offset; - // Don't coerce non-integral pointers to integers or vice versa, and the - // memtransfer is implicitly a raw byte code - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) - // TODO: Can allow nullptrs from constant zeros - return -1; - unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) return Offset; @@ -500,16 +496,18 @@ // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast(SrcInst); Constant *Src = cast(MTI->getSource()); - unsigned AS = Src->getType()->getPointerAddressSpace(); + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); } Index: test/Transforms/GVN/non-integral-pointers.ll =================================================================== --- test/Transforms/GVN/non-integral-pointers.ll +++ test/Transforms/GVN/non-integral-pointers.ll @@ -169,7 +169,14 @@ ret i8 addrspace(4)* %ref } + + @NonZeroConstant = constant <4 x i64> +@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> < + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)> @ZeroConstant = constant <4 x i64> zeroinitializer @@ -190,6 +197,54 @@ ret i8 addrspace(4)* %ref } +define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcopy2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]] +; CHECK-NEXT: ret i64 addrspace(4)* [[REF]] +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + +; TODO: missed optimization +define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc +; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; +entry: + %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc + ret i8 addrspace(4)* %ref +} + +define i64 addrspace(4)* @forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3) +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_memcpy_vload( ; CHECK-NEXT: entry: @@ -206,16 +261,62 @@ ret <1 x i8 addrspace(4)*> %ref } +define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcpy_vload2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]] +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + ret <4 x i64 addrspace(4)*> %ref +} + +define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcpy_vload3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64> [[REF]] +; +entry: + %loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc + ret <4 x i64> %ref +} + +define <1 x i64 addrspace(4)*> @forward_memcpy_vload3(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcpy_vload3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: ret <1 x i64 addrspace(4)*> +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + %val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0 + %ret = insertelement <1 x i64 addrspace(4)*> undef, i64 addrspace(4)* %val, i32 0 + ret <1 x i64 addrspace(4)*> %ret +} ; Can forward since we can do so w/o breaking types -; TODO: missed optimization define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @forward_memcpy_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]] -; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; CHECK-NEXT: ret i8 addrspace(4)* null ; entry: %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* Index: test/Transforms/GlobalOpt/evaluate-call-errors.ll =================================================================== --- test/Transforms/GlobalOpt/evaluate-call-errors.ll +++ test/Transforms/GlobalOpt/evaluate-call-errors.ll @@ -65,7 +65,7 @@ } define internal %struct.Foo* @_ZL3foov() { - ret %struct.Foo* null + ret %struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1) } define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 { @@ -73,7 +73,7 @@ store %struct.Q* %0, %struct.Q** %2, align 8 %3 = load %struct.Q*, %struct.Q** %2, align 8 %4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0 - %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* null) + %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1)) store i32 %5, i32* %4, align 4 ret void }