diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6868,8 +6868,13 @@ Addr = Builder.CreateGEP( SplitStoreType, Addr, ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); - Builder.CreateAlignedStore(V, Addr, - Upper ? SI.getAlign() / 2 : SI.getAlign()); + MaybeAlign Alignment = SI.getAlign(); + if (Upper && Alignment) { + // When the original store is aligned, the lower part has the same alignment. + // Find the best alignment for the upper part. + Alignment = commonAlignment(Alignment, HalfValBitSize / 8); + } + Builder.CreateAlignedStore(V, Addr, Alignment); }; CreateSplitStore(LValue, false); diff --git a/llvm/test/CodeGen/X86/split-store-unaligned.ll b/llvm/test/CodeGen/X86/split-store-unaligned.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/split-store-unaligned.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare -mtriple=x86_64-unknown-unknown -force-split-store -S < %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-w64-windows-gnu" + +define void @ttml_read_coords(float %x, i64* %p) { +; CHECK-LABEL: @ttml_read_coords( +; CHECK-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32 +; CHECK-NEXT: [[Z:%.*]] = zext i32 0 to i64 +; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32 +; CHECK-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64 +; CHECK-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32* +; CHECK-NEXT: store i32 [[B]], i32* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1 +; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 1 +; CHECK-NEXT: ret void +; + %b = bitcast float %x to i32 + %z = zext i32 0 to i64 + %s = shl nuw nsw i64 %z, 32 + %z2 = zext i32 %b to i64 + %o = or i64 %s, %z2 + store i64 %o, i64* %p, align 1 + ret void +}