diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -30,7 +30,7 @@ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize(); // The store size must be byte-aligned to support future type casts. - if (llvm::alignTo(StoreSize, 8) != StoreSize) + if (StoreSize & 7) return false; // The store has to be at least as big as the load. @@ -125,13 +125,13 @@ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); } - // If this is a big-endian system, we need to shift the value down to the low + // If this is a big-endian system, we need to shift the bytes down to the low // bits so that a truncate will work. if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() - - DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize(); + uint64_t ShiftBytes = DL.getTypeStoreSize(StoredValTy).getFixedSize() - + DL.getTypeStoreSize(LoadedTy).getFixedSize(); StoredVal = Helper.CreateLShr( - StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); + StoredVal, ConstantInt::get(StoredVal->getType(), ShiftBytes * 8)); } // Truncate the integer to the right size now. @@ -189,17 +189,16 @@ if (StoreBase != LoadBase) return -1; - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); - - if ((WriteSizeInBits & 7) | (LoadSize & 7)) + if (WriteSizeInBits & 7) // Shifting the input is not handled. return -1; - uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. - LoadSize /= 8; + uint64_t StoreSize = (WriteSizeInBits + 7) / 8; // Convert to bytes. + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); + LoadSize = (LoadSize + 7) / 8; - // If the Load isn't completely contained within the stored bits, we don't - // have all the bits to feed it. We could do something crazy in the future - // (issue a smaller load then merge the bits in) but this seems unlikely to be - // valuable. + // If the loaded bytes aren't completely contained within the stored bytes, + // we don't have all the bits to feed it. We could do something crazy in the + // future (issue a smaller load then merge the bits in) but this seems + // unlikely to be valuable. if (StoreOffset > LoadOffset || StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize)) return -1; @@ -286,7 +285,7 @@ // This is the size of the load to try. Start with the next larger power of // two. - unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U; + unsigned NewLoadByteSize = (LI->getType()->getPrimitiveSizeInBits() + 7) / 8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); while (true) { @@ -414,29 +413,29 @@ return SrcVal; } - uint64_t StoreSize = - (DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8; + uint64_t StoreSize = DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPtrOrPtrVectorTy()) SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize)); - // Shift the bits to the least significant depending on endianness. - unsigned ShiftAmt; + // Shift the bytes to the least significant bits so that a truncate will + // work, depending on the endian. + unsigned ShiftBytes; if (DL.isLittleEndian()) - ShiftAmt = Offset * 8; + ShiftBytes = Offset; else - ShiftAmt = (StoreSize - LoadSize - Offset) * 8; - if (ShiftAmt) - SrcVal = Helper.CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt)); + ShiftBytes = (StoreSize - LoadSize) / 8 - Offset; + if (ShiftBytes) + SrcVal = Helper.CreateLShr( + SrcVal, ConstantInt::get(SrcVal->getType(), ShiftBytes * 8)); if (LoadSize != StoreSize) - SrcVal = Helper.CreateTruncOrBitCast(SrcVal, - IntegerType::get(Ctx, LoadSize * 8)); + SrcVal = + Helper.CreateTruncOrBitCast(SrcVal, IntegerType::get(Ctx, LoadSize)); return SrcVal; } @@ -526,7 +525,7 @@ Type *LoadTy, HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; + uint64_t LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); // We know that this method is only called when the mem transfer fully // provides the bits for the load. diff --git a/llvm/test/Transforms/GVN/pr10820.ll b/llvm/test/Transforms/GVN/pr10820.ll --- a/llvm/test/Transforms/GVN/pr10820.ll +++ b/llvm/test/Transforms/GVN/pr10820.ll @@ -1,8 +1,5 @@ -; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s - -target datalayout = -"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" +; RUN: opt -mtriple="x86_64-unknown-linux-gnu" --data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" < %s -basic-aa -gvn -S | FileCheck %s +; RUN: opt -mtriple="powerpc64-unknown-linux-gnu" --data-layout="E-m:e-i64:64-n32:64" < %s -basic-aa -gvn -S | FileCheck %s @g = external global i31 @@ -16,3 +13,17 @@ store i31 %0, i31* undef, align 1 unreachable } + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i7 @memset_forward(i7 *%p) { +; CHECK-LABEL: @memset_forward( +; CHECK-NEXT: [[CONV:%.*]] = bitcast i7* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 -120, i64 1, i1 false) +; CHECK-NEXT: ret i7 8 +; + %conv = bitcast i7* %p to i8* + call void @llvm.memset.p0i8.i64(i8* %conv, i8 -120, i64 1, i1 false) + %load = load i7, i7* %p + ret i7 %load +} diff --git a/llvm/test/Transforms/NewGVN/pr10820-xfail.ll b/llvm/test/Transforms/NewGVN/pr10820-xfail.ll deleted file mode 100644 --- a/llvm/test/Transforms/NewGVN/pr10820-xfail.ll +++ /dev/null @@ -1,19 +0,0 @@ -; XFAIL: * -; RUN: opt < %s -basic-aa -newgvn -S | FileCheck %s -; NewGVN fails this due to missing load coercion -target datalayout = -"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -@g = external global i31 - -define void @main() nounwind uwtable { -entry: -; CHECK: store i32 - store i32 402662078, i32* bitcast (i31* @g to i32*), align 8 -; CHECK-NOT: load i31 - %0 = load i31, i31* @g, align 8 -; CHECK: store i31 - store i31 %0, i31* undef, align 1 - unreachable -} diff --git a/llvm/test/Transforms/NewGVN/pr10820.ll b/llvm/test/Transforms/NewGVN/pr10820.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/pr10820.ll @@ -0,0 +1,29 @@ +; RUN: opt -mtriple="x86_64-unknown-linux-gnu" --data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" < %s -basic-aa -newgvn -S | FileCheck %s +; RUN: opt -mtriple="powerpc64-unknown-linux-gnu" --data-layout="E-m:e-i64:64-n32:64" < %s -basic-aa -newgvn -S | FileCheck %s + +@g = external global i31 + +define void @main() nounwind uwtable { +entry: +; CHECK: store i32 + store i32 402662078, i32* bitcast (i31* @g to i32*), align 8 +; CHECK-NOT: load i31 + %0 = load i31, i31* @g, align 8 +; CHECK: store i31 + store i31 %0, i31* undef, align 1 + unreachable +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i7 @memset_forward(i7 *%p) { +; CHECK-LABEL: @memset_forward( +; CHECK-NEXT: [[CONV:%.*]] = bitcast i7* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 -120, i64 1, i1 false) +; CHECK-NEXT: ret i7 8 +; + %conv = bitcast i7* %p to i8* + call void @llvm.memset.p0i8.i64(i8* %conv, i8 -120, i64 1, i1 false) + %load = load i7, i7* %p + ret i7 %load +}