diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -456,8 +456,15 @@ // Check to see if this stored value is of the same byte-splattable value. Value *StoredByte = isBytewiseValue(StoredVal, DL); + // If the previously stored value is undef, we can replace it with the + // newly stored value (which may be undef) + // Inversely, if the newly stored value is undef, we can replace it with + // the previously stored one if (isa(ByteVal) && StoredByte) ByteVal = StoredByte; + else if (StoredByte && isa(StoredByte)) + StoredByte = ByteVal; + if (ByteVal != StoredByte) break; diff --git a/llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll b/llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -memcpyopt -opaque-pointers -S -verify-memoryssa | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%S = type { { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 } } + +define void @alternating(%S* %0) { +; CHECK-LABEL: @alternating( +; CHECK-NEXT: start +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: getelementptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +start: + store i8 0, ptr %0, align 1 + %1 = getelementptr inbounds { i8, i8 }, ptr %0, i32 0, i32 1 + store i8 undef, ptr %1, align 1 + %2 = getelementptr inbounds %S, ptr %0, i32 0, i32 1 + store i8 0, ptr %2, align 1 + %3 = getelementptr inbounds { i8, i8 }, ptr %2, i32 0, i32 1 + store i8 undef, ptr %3, align 1 + %4 = getelementptr inbounds %S, ptr %0, i32 0, i32 2 + store i8 0, ptr %4, align 1 + %5 = getelementptr inbounds { i8, i8 }, ptr %4, i32 0, i32 1 + store i8 undef, ptr %5, align 1 + %6 = getelementptr inbounds %S, ptr %0, i32 0, i32 3 + store i8 0, ptr %6, align 1 + %7 = getelementptr inbounds { i8, i8 }, ptr %6, i32 0, i32 1 + store i8 undef, ptr %7, align 1 + %8 = getelementptr inbounds %S, ptr %0, i32 0, i32 4 + store i8 0, ptr %8, align 1 + %9 = getelementptr inbounds { i8, i8 }, ptr %8, i32 0, i32 1 + store i8 undef, ptr %9, align 1 + %10 = getelementptr inbounds %S, ptr %0, i32 0, i32 5 + store i8 0, ptr %10, align 1 + %11 = getelementptr inbounds { i8, i8 }, ptr %10, i32 0, i32 1 + store i8 undef, ptr %11, align 1 + %12 = getelementptr inbounds %S, ptr %0, i32 0, i32 6 + store i8 0, ptr %12, align 1 + %13 = getelementptr inbounds { i8, i8 }, ptr %12, i32 0, i32 1 + store i8 undef, ptr %13, align 1 + %14 = getelementptr inbounds %S, ptr %0, i32 0, i32 7 + store i8 0, ptr %14, align 1 + %15 = getelementptr inbounds { i8, i8 }, ptr %14, i32 0, i32 1 + store i8 undef, ptr %15, align 1 + ret void +} + +declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)