diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -9,22 +9,18 @@ namespace llvm { namespace VNCoercion { -static bool isFirstClassAggregateOrScalableType(Type *Ty) { - return Ty->isStructTy() || Ty->isArrayTy() || isa(Ty); +static bool isBitCastable(Type *Ty) { + return !(Ty->isStructTy() || Ty->isArrayTy() || isa(Ty)); } -/// Return true if coerceAvailableValueToLoadType will succeed. -bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, - const DataLayout &DL) { - Type *StoredTy = StoredVal->getType(); - +bool canCoerceMustAliasedTypeToLoad(Type *StoredTy, Type *LoadTy, + const DataLayout &DL) { if (StoredTy == LoadTy) return true; // If the loaded/stored value is a first class array/struct, or scalable type, // don't try to transform them. We need to be able to bitcast to integer. - if (isFirstClassAggregateOrScalableType(LoadTy) || - isFirstClassAggregateOrScalableType(StoredTy)) + if (!isBitCastable(LoadTy) || !isBitCastable(StoredTy)) return false; uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize(); @@ -40,29 +36,40 @@ bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); // Don't coerce non-integral pointers to integers or vice versa. - if (StoredNI != LoadNI) { - // As a special case, allow coercion of memset used to initialize - // an array w/null. Despite non-integral pointers not generally having a - // specific bit pattern, we do assume null is zero. - if (auto *CI = dyn_cast(StoredVal)) - return CI->isNullValue(); + if (StoredNI != LoadNI) return false; - } else if (StoredNI && LoadNI && - StoredTy->getPointerAddressSpace() != - LoadTy->getPointerAddressSpace()) { + else if (StoredNI && LoadNI && + StoredTy->getPointerAddressSpace() != + LoadTy->getPointerAddressSpace()) return false; - } - // The implementation below uses inttoptr for vectors of unequal size; we // can't allow this for non integral pointers. We could teach it to extract - // exact subvectors if desired. + // exact subvectors if desired. if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) return false; return true; } +/// Return true if coerceAvailableValueToLoadType will succeed. +bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL) { + Type *StoredTy = StoredVal->getType(); + // If we know the value is available too, then, unlike for + // canCoerceMustAliasedTypeToLoad, we expect the caller to not need to + // or try to check that the size/offset is inbounds. + if (auto *CI = dyn_cast(StoredVal)) + // All callers are expected to be able to detect and handle the zeros case. + if (CI->isNullValue()) + if (isa(StoredTy) == + isa(LoadTy) && + DL.getTypeSizeInBits(StoredTy).getKnownMinSize() >= + DL.getTypeSizeInBits(LoadTy).getKnownMinSize()) + return true; + return canCoerceMustAliasedTypeToLoad(StoredTy, LoadTy, DL); +} + template static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, HelperClass &Helper, @@ -72,8 +79,11 @@ if (auto *C = dyn_cast(StoredVal)) StoredVal = ConstantFoldConstant(C, DL); - // If this is already the right type, just return it. + // If this is already the right type, just return it, + // before trying to check the size of the SVE. Type *StoredValTy = StoredVal->getType(); + if (StoredValTy == LoadedTy) + return StoredVal; uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize(); uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize(); @@ -111,7 +121,7 @@ // extract out a piece from it. If the available value is too small, then we // can't do anything. assert(StoredValSize >= LoadedValSize && - "canCoerceMustAliasedValueToLoad fail"); + "canCoerceMustAliasedTypeToLoad fail"); // Convert source pointers to integers, which can be manipulated. if (StoredValTy->isPtrOrPtrVectorTy()) { @@ -177,11 +187,6 @@ Value *WritePtr, uint64_t WriteSizeInBits, const DataLayout &DL) { - // If the loaded/stored value is a first class array/struct, or scalable type, - // don't try to transform them. We need to be able to bitcast to integer. - if (isFirstClassAggregateOrScalableType(LoadTy)) - return -1; - int64_t StoreOffset = 0, LoadOffset = 0; Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); @@ -215,8 +220,9 @@ StoreInst *DepSI, const DataLayout &DL) { auto *StoredVal = DepSI->getValueOperand(); - // Cannot handle reading from store of first-class aggregate or scalable type. - if (isFirstClassAggregateOrScalableType(StoredVal->getType())) + // We need to be able to look at the size of the result. + if (isa(StoredVal->getType()) || + isa(LoadTy)) return -1; if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) @@ -224,7 +230,7 @@ Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = - DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize(); + DL.getTypeSizeInBits(StoredVal->getType()).getFixedSize(); return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, DL); } @@ -319,21 +325,26 @@ /// the other load can feed into the second load. int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const DataLayout &DL) { - // Cannot handle reading from store of first-class aggregate yet. - if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) - return -1; + Value *DepPtr = DepLI->getPointerOperand(); - if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) + // We need to be able to look at the size of the result. + if (isa(DepLI->getType()) || + isa(LoadTy)) return -1; - Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize(); - int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); - if (R != -1) - return R; + if (canCoerceMustAliasedTypeToLoad(DepLI->getType(), LoadTy, DL)) { + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize(); + int R = + analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); + if (R != -1) + return R; + } - // If we have a load/load clobber an DepLI can be widened to cover this load, + // If we have a load/load clobber and DepLI can be widened to cover this load, // then we should widen it! + if (!canCoerceMustAliasedTypeToLoad(LoadTy, DepLI->getType(), DL)) + return -1; + int64_t LoadOffs = 0; const Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); @@ -360,16 +371,25 @@ return -1; uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; + // We need to be able to look at the size of the result. + if (isa(LoadTy)) + return -1; + // If this is memset, we just need to see if the offset is valid in the size // of the memset.. if (MI->getIntrinsicID() == Intrinsic::memset) { - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - auto *CI = dyn_cast(cast(MI)->getValue()); - if (!CI || !CI->isZero()) - return -1; - } - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), - MemSizeInBits, DL); + Value *StoredVal = cast(MI)->getValue(); + bool CanCast = ([&]() { + if (auto *CI = dyn_cast(StoredVal)) + if (CI->isNullValue()) + return true; + Type *StoreTy = IntegerType::get(LoadTy->getContext(), MemSizeInBits); + return canCoerceMustAliasedTypeToLoad(StoreTy, LoadTy, DL); + })(); + if (CanCast) + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + return -1; } // If we have a memcpy/memmove, the only case we can handle is if this is a @@ -404,6 +424,9 @@ HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = SrcVal->getType()->getContext(); + if (auto *CI = dyn_cast(getUnderlyingObject(SrcVal))) + if (CI->isNullValue()) + return Constant::getNullValue(LoadTy); // If two pointers are in the same address space, they have the same size, // so we don't need to do any truncation, etc. This avoids introducing @@ -534,6 +557,14 @@ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and // independently of what the offset is. T *Val = cast(MSI->getValue()); + if (auto *CI = dyn_cast(Val)) { + // memset(P, '\0', 1234) -> just directly create the null value for the + // use of *P, bypassing any later validity checks for using BitCast to + // make this constant + if (CI->isNullValue()) + return Constant::getNullValue(LoadTy); + } + if (LoadSize != 1) Val = Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -108,6 +108,21 @@ ret i8 addrspace(4)* %ref } +define [2 x i8 addrspace(4)*] @forward_memset_zero_fca([2 x i8 addrspace(4)*] addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memset_zero_fca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast [2 x i8 addrspace(4)*] addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret [2 x i8 addrspace(4)*] zeroinitializer +; + entry: + %loc.bc = bitcast [2 x i8 addrspace(4)*] addrspace(4)* %loc to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8 0, i64 16, i1 false) + %ref = load [2 x i8 addrspace(4)*], [2 x i8 addrspace(4)*] addrspace(4)* %loc + ret [2 x i8 addrspace(4)*] %ref +} + + ; Can't forward as the load might be dead. (Pretend we wrote out the alwaysfalse idiom above.) define i8 addrspace(4)* @neg_forward_store(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_store( diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll --- a/llvm/test/Transforms/GVN/vscale.ll +++ b/llvm/test/Transforms/GVN/vscale.ll @@ -139,8 +139,8 @@ ; Analyze Load from clobbering Store. -define @store_forward_to_load(* %p) { -; CHECK-LABEL: @store_forward_to_load( +define @nullstore_forward_to_load(* %p) { +; CHECK-LABEL: @nullstore_forward_to_load( ; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 ; CHECK-NEXT: ret zeroinitializer ; @@ -149,6 +149,29 @@ ret %load } +define @nullstore_forward_to_load_casted(* %p) { +; CHECK-LABEL: @nullstore_forward_to_load_casted( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 +; CHECK-NEXT: ret zeroinitializer +; + store zeroinitializer, * %p + %conv = bitcast * %p to * + %load = load , * %conv + ret %load +} + +define @nullstore_forward_to_load_trunc(* %p) { +; CHECK-LABEL: @nullstore_forward_to_load_trunc( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 +; CHECK-NEXT: ret zeroinitializer +; + store zeroinitializer, * %p + %conv = bitcast * %p to * + %load = load , * %conv + ret %load +} + + define @store_forward_to_load_sideeffect(* %p) { ; CHECK-LABEL: @store_forward_to_load_sideeffect( ; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 @@ -177,6 +200,30 @@ ret i32 %load } +define @store_forward_to_load_casted(* %p, %v) { +; CHECK-LABEL: @store_forward_to_load_casted( +; CHECK-NEXT: store %v, * [[P:%.*]], align 16 +; CHECK-NEXT: %conv = bitcast * %p to * +; CHECK-NEXT: %load = load , * %conv +; CHECK-NEXT: ret %load +; + store %v, * %p + %conv = bitcast * %p to * + %load = load , * %conv + ret %load +} + +define @store_forward_to_load(* %p, %v) { +; CHECK-LABEL: @store_forward_to_load( +; CHECK-NEXT: store %v, * [[P:%.*]], align 16 +; CHECK-NEXT: ret %v +; + store %v, * %p + %load = load , * %p + ret %load +} + + ; Analyze Load from clobbering MemInst. declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) @@ -194,6 +241,20 @@ ret i32 %load } +define @memset_clobber_load_vscale( *%p) { +; CHECK-LABEL: @memset_clobber_load_vscale( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 0, i64 128, i1 false) +; CHECK-NEXT: %load = load , * %p +; CHECK-NEXT: ret %load +; + %conv = bitcast * %p to i8* + call void @llvm.memset.p0i8.i64(i8* %conv, i8 0, i64 128, i1 false) + %load = load , * %p + ret %load +} + + define i32 @memset_clobber_load_vscaled_base( *%p) { ; CHECK-LABEL: @memset_clobber_load_vscaled_base( ; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8*