diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -30,7 +30,7 @@ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize(); // The store size must be byte-aligned to support future type casts. - if (llvm::alignTo(StoreSize, 8) != StoreSize) + if (StoreSize & 7) return false; // The store has to be at least as big as the load. @@ -121,13 +121,13 @@ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); } - // If this is a big-endian system, we need to shift the value down to the low + // If this is a big-endian system, we need to shift the bytes down to the low // bits so that a truncate will work. if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() - - DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize(); + uint64_t ShiftAmt = DL.getTypeStoreSize(StoredValTy).getFixedSize() - + DL.getTypeStoreSize(LoadedTy).getFixedSize(); StoredVal = Helper.CreateLShr( - StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); + StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt * 8)); } // Truncate the integer to the right size now. @@ -195,10 +195,10 @@ // must have gotten confused. uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); - if ((WriteSizeInBits & 7) | (LoadSize & 7)) + if (WriteSizeInBits & 7) return -1; uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. - LoadSize /= 8; + LoadSize = (LoadSize + 7) / 8; bool isAAFailure = false; if (StoreOffset < LoadOffset) @@ -299,7 +299,7 @@ // This is the size of the load to try. Start with the next larger power of // two. - unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U; + unsigned NewLoadByteSize = (LI->getType()->getPrimitiveSizeInBits() + 7) / 8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); while (true) { @@ -436,29 +436,29 @@ return SrcVal; } - uint64_t StoreSize = - (DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8; + uint64_t StoreSize = DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPtrOrPtrVectorTy()) SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize)); - // Shift the bits to the least significant depending on endianness. + // Shift the bytes to the least significant bits so that a truncate will + // work, depending on the endian. unsigned ShiftAmt; if (DL.isLittleEndian()) - ShiftAmt = Offset * 8; + ShiftAmt = Offset; else - ShiftAmt = (StoreSize - LoadSize - Offset) * 8; + ShiftAmt = (StoreSize - LoadSize + 7) / 8 - Offset; if (ShiftAmt) - SrcVal = Helper.CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt)); + SrcVal = Helper.CreateLShr( + SrcVal, ConstantInt::get(SrcVal->getType(), ShiftAmt * 8)); if (LoadSize != StoreSize) - SrcVal = Helper.CreateTruncOrBitCast(SrcVal, - IntegerType::get(Ctx, LoadSize * 8)); + SrcVal = + Helper.CreateTruncOrBitCast(SrcVal, IntegerType::get(Ctx, LoadSize)); return SrcVal; } @@ -548,7 +548,7 @@ Type *LoadTy, HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8; // We know that this method is only called when the mem transfer fully // provides the bits for the load. diff --git a/llvm/test/Transforms/GVN/pr10820.ll b/llvm/test/Transforms/GVN/pr10820.ll --- a/llvm/test/Transforms/GVN/pr10820.ll +++ b/llvm/test/Transforms/GVN/pr10820.ll @@ -1,8 +1,6 @@ ; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s - -target datalayout = -"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" +; RUN: opt -mtriple="x86_64-unknown-linux-gnu" --data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" < %s -basic-aa -gvn -S | FileCheck %s +; RUN: opt -mtriple="powerpc64-unknown-linux-gnu" --data-layout="E-m:e-i64:64-n32:64" < %s -basic-aa -gvn -S | FileCheck %s @g = external global i31 @@ -16,3 +14,17 @@ store i31 %0, i31* undef, align 1 unreachable } + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i7 @memset_forward(i7 *%p) { +; CHECK-LABEL: @memset_forward( +; CHECK-NEXT: [[CONV:%.*]] = bitcast i7* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 -120, i64 1, i1 false) +; CHECK-NEXT: ret i7 8 +; + %conv = bitcast i7* %p to i8* + call void @llvm.memset.p0i8.i64(i8* %conv, i8 -120, i64 1, i1 false) + %load = load i7, i7* %p + ret i7 %load +} diff --git a/llvm/test/Transforms/NewGVN/pr10820-xfail.ll b/llvm/test/Transforms/NewGVN/pr10820-xfail.ll deleted file mode 100644 --- a/llvm/test/Transforms/NewGVN/pr10820-xfail.ll +++ /dev/null @@ -1,19 +0,0 @@ -; XFAIL: * -; RUN: opt < %s -basic-aa -newgvn -S | FileCheck %s -; NewGVN fails this due to missing load coercion -target datalayout = -"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -@g = external global i31 - -define void @main() nounwind uwtable { -entry: -; CHECK: store i32 - store i32 402662078, i32* bitcast (i31* @g to i32*), align 8 -; CHECK-NOT: load i31 - %0 = load i31, i31* @g, align 8 -; CHECK: store i31 - store i31 %0, i31* undef, align 1 - unreachable -} diff --git a/llvm/test/Transforms/NewGVN/pr10820.ll b/llvm/test/Transforms/NewGVN/pr10820.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/pr10820.ll @@ -0,0 +1,15 @@ +; RUN: opt -mtriple="x86_64-unknown-linux-gnu" --data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" < %s -basic-aa -newgvn -S | FileCheck %s +; RUN: opt -mtriple="powerpc64-unknown-linux-gnu" --data-layout="E-m:e-i64:64-n32:64" < %s -basic-aa -newgvn -S | FileCheck %s + +@g = external global i31 + +define void @main() nounwind uwtable { +entry: +; CHECK: store i32 + store i32 402662078, i32* bitcast (i31* @g to i32*), align 8 +; CHECK-NOT: load i31 + %0 = load i31, i31* @g, align 8 +; CHECK: store i31 + store i31 %0, i31* undef, align 1 + unreachable +}