Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -98,6 +98,9 @@ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! return Constant::getAllOnesValue(DestTy); + if (DL.isNonIntegralPointerType(C->getType()->getScalarType()) != + DL.isNonIntegralPointerType(DestTy->getScalarType())) + return nullptr; if (auto *VTy = dyn_cast(C->getType())) { // Handle a vector->scalar integer/fp cast. @@ -323,10 +326,25 @@ const DataLayout &DL) { do { Type *SrcTy = C->getType(); + uint64_t DestSize = DL.getTypeSizeInBits(DestTy); + uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy); + if (SrcSize < DestSize) + return nullptr; + + // Catch the obvious splat cases (since all-zeros can coerce non-integral + // pointers legally). + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! + return Constant::getAllOnesValue(DestTy); // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { + // But be careful not to coerce non-integral pointers illegally. + if (SrcSize == DestSize && + DL.isNonIntegralPointerType(SrcTy->getScalarType()) == + DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -878,11 +878,12 @@ const DataLayout &DL = LI->getModule()->getDataLayout(); + Instruction *DepInst = DepInfo.getInst(); if (DepInfo.isClobber()) { // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. - if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { + if (StoreInst *DepSI = dyn_cast(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. if (Address && LI->isAtomic() <= DepSI->isAtomic()) { int Offset = @@ -898,7 +899,7 @@ // load i32* P // load i8* (P+1) // if we have this, replace the later with an extraction from the former. - if (LoadInst *DepLI = dyn_cast(DepInfo.getInst())) { + if (LoadInst *DepLI = dyn_cast(DepInst)) { // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. // Can't forward from non-atomic to atomic without violating memory model. @@ -915,7 +916,7 @@ // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. - if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { + if (MemIntrinsic *DepMI = dyn_cast(DepInst)) { if (Address && !LI->isAtomic()) { int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, DL); @@ -929,8 +930,7 @@ LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; LI->printAsOperand(dbgs()); - Instruction *I = DepInfo.getInst(); - dbgs() << " is clobbered by " << *I << '\n';); + dbgs() << " is clobbered by " << *DepInst << '\n';); if (ORE->allowExtraAnalysis(DEBUG_TYPE)) reportMayClobberedLoad(LI, DepInfo, DT, ORE); @@ -938,8 +938,6 @@ } assert(DepInfo.isDef() && "follows from above"); - Instruction *DepInst = DepInfo.getInst(); - // Loading the allocation -> undef. if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. @@ -958,8 +956,7 @@ // Reject loads and stores that are to the same address but are of // different types if we have to. If the stored value is larger or equal to // the loaded value, we can reuse it. - if (S->getValueOperand()->getType() != LI->getType() && - !canCoerceMustAliasedValueToLoad(S->getValueOperand(), + if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), DL)) return false; @@ -975,8 +972,7 @@ // If the types mismatch and we can't handle it, reject reuse of the load. // If the stored value is larger or equal to the loaded value, we can reuse // it. - if (LD->getType() != LI->getType() && - !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) + if (!canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) return false; // Can't forward from non-atomic to atomic without violating memory model. Index: lib/Transforms/Utils/VNCoercion.cpp =================================================================== --- lib/Transforms/Utils/VNCoercion.cpp +++ lib/Transforms/Utils/VNCoercion.cpp @@ -11,16 +11,18 @@ namespace llvm { namespace VNCoercion { -/// Return true if coerceAvailableValueToLoadType will succeed. -bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, +bool canCoerceMustAliasedValueToLoad(Type *StoredTy, Type *LoadTy, const DataLayout &DL) { + if (StoredTy == LoadTy) + return true; + // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. if (LoadTy->isStructTy() || LoadTy->isArrayTy() || - StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + StoredTy->isStructTy() || StoredTy->isArrayTy()) return false; - uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType()); + uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy); // The store size must be byte-aligned to support future type casts. if (llvm::alignTo(StoreSize, 8) != StoreSize) @@ -31,19 +33,24 @@ return false; // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // As a special case, allow coercion of memset used to initialize - // an array w/null. Despite non-integral pointers not generally having a - // specific bit pattern, we do assume null is zero. - if (auto *CI = dyn_cast(StoredVal)) - return CI->isNullValue(); + if (DL.isNonIntegralPointerType(StoredTy->getScalarType()) != + DL.isNonIntegralPointerType(LoadTy->getScalarType())) return false; - } - + return true; } +/// Return true if coerceAvailableValueToLoadType will succeed. +bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL) { + Type *StoredTy = StoredVal->getType(); + if (auto *CI = dyn_cast(StoredVal)) + if (CI->isNullValue()) + if (StoredTy == LoadTy || DL.getTypeSizeInBits(StoredTy) >= DL.getTypeSizeInBits(LoadTy)) + return true; + return canCoerceMustAliasedValueToLoad(StoredTy, LoadTy, DL); +} + template static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, HelperClass &Helper, @@ -160,11 +167,6 @@ Value *WritePtr, uint64_t WriteSizeInBits, const DataLayout &DL) { - // If the loaded or stored value is a first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy()) - return -1; - int64_t StoreOffset = 0, LoadOffset = 0; Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); @@ -214,21 +216,9 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL) { auto *StoredVal = DepSI->getValueOperand(); - - // Cannot handle reading from store of first-class aggregate yet. - if (StoredVal->getType()->isStructTy() || - StoredVal->getType()->isArrayTy()) + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // Allow casts of zero values to null as a special case - auto *CI = dyn_cast(StoredVal); - if (!CI || !CI->isNullValue()) - return -1; - } - Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); @@ -241,39 +231,36 @@ /// the other load can feed into the second load. int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const DataLayout &DL) { - // Cannot handle reading from store of first-class aggregate yet. - if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) - return -1; - - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) - return -1; - Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); - int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); - if (R != -1) - return R; + if (canCoerceMustAliasedValueToLoad(DepLI->getType(), LoadTy, DL)) { + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); + int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); + if (R != -1) + return R; + } // If we have a load/load clobber an DepLI can be widened to cover this load, // then we should widen it! - int64_t LoadOffs = 0; - const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + if (canCoerceMustAliasedValueToLoad(LoadTy, DepLI->getType(), DL)) { + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( + LoadBase, LoadOffs, LoadSize, DepLI); + if (Size == 0) + return -1; - unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - LoadBase, LoadOffs, LoadSize, DepLI); - if (Size == 0) - return -1; + // Check non-obvious conditions enforced by MDA which we rely on for being + // able to materialize this potentially available value + assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); + assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); - // Check non-obvious conditions enforced by MDA which we rely on for being - // able to materialize this potentially available value - assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); - assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); + } - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); + return -1; } int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, @@ -287,13 +274,16 @@ // If this is memset, we just need to see if the offset is valid in the size // of the memset.. if (MI->getIntrinsicID() == Intrinsic::memset) { - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - auto *CI = dyn_cast(cast(MI)->getValue()); - if (!CI || !CI->isZero()) - return -1; - } - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), - MemSizeInBits, DL); + Value *StoredVal = cast(MI)->getValue(); + if (auto *CI = dyn_cast(StoredVal)) + if (CI->isNullValue()) + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + Type *StoreTy = IntegerType::get(LoadTy->getContext(), MemSizeInBits); + if (canCoerceMustAliasedValueToLoad(StoreTy, LoadTy, DL)) + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + return -1; } // If we have a memcpy/memmove, the only case we can handle is if this is a @@ -306,7 +296,7 @@ return -1; GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, DL)); - if (!GV || !GV->isConstant()) + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return -1; // See if the access is within the bounds of the transfer. @@ -315,21 +305,17 @@ if (Offset == -1) return Offset; - // Don't coerce non-integral pointers to integers or vice versa, and the - // memtransfer is implicitly a raw byte code - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) - // TODO: Can allow nullptrs from constant zeros - return -1; - unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) return Offset; @@ -341,6 +327,9 @@ HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = SrcVal->getType()->getContext(); + if (auto *CI = dyn_cast(GetUnderlyingObject(SrcVal, DL))) + if (CI->isNullValue()) + return Constant::getNullValue(LoadTy); // If two pointers are in the same address space, they have the same size, // so we don't need to do any truncation, etc. This avoids introducing @@ -468,6 +457,12 @@ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and // independently of what the offset is. T *Val = cast(MSI->getValue()); + if (auto *CI = dyn_cast(Val)) { + // memset(P, '\0', 1234) -> just directly create the null value for *P + // by-passing any later validity checks + if (CI->isNullValue()) + return Constant::getNullValue(LoadTy); + } if (LoadSize != 1) Val = Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); @@ -496,16 +491,18 @@ // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast(SrcInst); Constant *Src = cast(MTI->getSource()); - unsigned AS = Src->getType()->getPointerAddressSpace(); + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); } Index: test/Transforms/GVN/non-integral-pointers.ll =================================================================== --- test/Transforms/GVN/non-integral-pointers.ll +++ test/Transforms/GVN/non-integral-pointers.ll @@ -169,53 +169,105 @@ ret i8 addrspace(4)* %ref } + + @NonZeroConstant = constant <4 x i64> +@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> < + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)> @ZeroConstant = constant <4 x i64> zeroinitializer ; Can't forward as the load might be dead. (Pretend we wrote out the alwaysfalse idiom above.) -define i8 addrspace(4)* @neg_forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) { +define i64 addrspace(4)* @neg_forward_memcopy(i64 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_memcopy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]] -; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]] +; CHECK-NEXT: ret i64 addrspace(4)* [[REF]] ; entry: - %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) - %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc - ret i8 addrspace(4)* %ref + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref } -define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) { +define i64 addrspace(4)* @forward_memcopy(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3) +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + +define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_memcpy_vload( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <1 x i8 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* -; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* [[LOC]] -; CHECK-NEXT: ret <1 x i8 addrspace(4)*> [[REF]] +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]] ; entry: - %loc.bc = bitcast <1 x i8 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* - call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) - %ref = load <1 x i8 addrspace(4)*>, <1 x i8 addrspace(4)*> addrspace(4)* %loc - ret <1 x i8 addrspace(4)*> %ref + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + ret <4 x i64 addrspace(4)*> %ref } +define <4 x i64> @neg_forward_memcpy_vload2(<4 x i64> addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcpy_vload2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64> [[REF]] +; +entry: + %loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc + ret <4 x i64> %ref +} + +define <1 x i64 addrspace(4)*> @forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcpy_vload2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: ret <1 x i64 addrspace(4)*> +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + %val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0 + %ret = insertelement <1 x i64 addrspace(4)*> undef, i64 addrspace(4)* %val, i32 0 + ret <1 x i64 addrspace(4)*> %ret +} ; Can forward since we can do so w/o breaking types -; TODO: missed optimization define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @forward_memcpy_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]] -; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; CHECK-NEXT: ret i8 addrspace(4)* null ; entry: %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* Index: test/Transforms/GlobalOpt/evaluate-call-errors.ll =================================================================== --- test/Transforms/GlobalOpt/evaluate-call-errors.ll +++ test/Transforms/GlobalOpt/evaluate-call-errors.ll @@ -65,7 +65,7 @@ } define internal %struct.Foo* @_ZL3foov() { - ret %struct.Foo* null + ret %struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1) } define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 { @@ -73,7 +73,7 @@ store %struct.Q* %0, %struct.Q** %2, align 8 %3 = load %struct.Q*, %struct.Q** %2, align 8 %4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0 - %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* null) + %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1)) store i32 %5, i32* %4, align 4 ret void }