Index: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -49,12 +50,18 @@ STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); +STATISTIC(NumModifiedStores, "Number of stores modified"); static cl::opt EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE")); +static cl::opt +EnablePartialStoreMerging("enable-dse-partial-store-merging", + cl::init(true), cl::Hidden, + cl::desc("Enable partial store merging in DSE")); + //===----------------------------------------------------------------------===// // Helper functions @@ -287,14 +294,22 @@ } namespace { -enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown }; +enum OverwriteResult { + OW_Begin, + OW_Complete, + OW_End, + OW_PartialEarlierWithFullLater, + OW_Unknown +}; } /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the -/// beginning of the 'Earlier' location is overwritten by 'Later', or -/// 'OW_Unknown' if nothing can be determined. +/// beginning of the 'Earlier' location is overwritten by 'Later'. +/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was +/// overwritten by a latter (smaller) store which doesn't write outside the big +/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined. static OverwriteResult isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, @@ -427,6 +442,19 @@ } } + // Check for an earlier store which writes to all the memory locations that + // the later store writes to. + if (EnablePartialStoreMerging && LaterOff >= EarlierOff && + int64_t(EarlierOff + Earlier.Size) > LaterOff && + uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) { + DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff + << ", " << int64_t(EarlierOff + Earlier.Size) + << ") by a later store [" << LaterOff << ", " + << int64_t(LaterOff + Later.Size) << ")\n"); + // TODO: Maybe come up with a better name? + return OW_PartialEarlierWithFullLater; + } + // Another interesting case is if the later store overwrites the end of the // earlier store. // @@ -1094,6 +1122,8 @@ // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. + // Also try to merge two stores if a later one only touches memory written + // to by the earlier one. if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; @@ -1123,6 +1153,72 @@ bool IsOverwriteEnd = (OR == OW_End); MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, InstWriteOffset, LaterSize, IsOverwriteEnd); + } else if (EnablePartialStoreMerging && + OR == OW_PartialEarlierWithFullLater) { + auto *Earlier = dyn_cast(DepWrite); + auto *Later = dyn_cast(Inst); + if (Earlier && isa(Earlier->getValueOperand()) && + Later && isa(Later->getValueOperand())) { + // If the store we find is: + // a) partially overwritten by the store to 'Loc' + // b) the later store is fully contained in the earlier one and + // c) they both have a constant value + // Merge the two stores, replacing the earlier store's value with a + // merge of both values. + // TODO: Deal with other constant types (vectors, etc), and probably + // some mem intrinsics (if needed) + + APInt EarlierValue = + cast(Earlier->getValueOperand())->getValue(); + APInt LaterValue = + cast(Later->getValueOperand())->getValue(); + unsigned LaterBits = LaterValue.getBitWidth(); + assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); + LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); + + // Offset of the smaller store inside the larger store + unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; + unsigned LShiftAmount = + DL.isBigEndian() + ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits + : BitOffsetDiff; + APInt Mask = + APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, + LShiftAmount + LaterBits); + // Clear the bits we'll be replacing, then OR with the smaller + // store, shifted appropriately. + APInt Merged = + (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); + DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite + << "\n Later: " << *Inst + << "\n Merged Value: " << Merged << '\n'); + + auto *SI = new StoreInst( + ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), + Earlier->getPointerOperand(), false, Earlier->getAlignment(), + Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); + + unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_nontemporal}; + SI->copyMetadata(*DepWrite, MDToKeep); + ++NumModifiedStores; + + // Remove earlier, wider, store + size_t Idx = InstrOrdering.lookup(DepWrite); + InstrOrdering.erase(DepWrite); + InstrOrdering.insert(std::make_pair(SI, Idx)); + + // Delete the old stores and now-dead instructions that feed them. + deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering); + deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, + &InstrOrdering); + MadeChange = true; + + // We erased DepWrite and Inst (Loc); start over. + break; + } } } Index: llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -dse -S | FileCheck %s +; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Ensure that the dead store is deleted in this case. It is wholely Index: llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-bgq-linux" Index: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" + +define void @byte_by_byte_replacement(i32 *%ptr) { +; CHECK-LABEL: @byte_by_byte_replacement( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 151653132, i32* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + ;; This store's value should be modified as it should be better to use one + ;; larger store than several smaller ones. + ;; store will turn into 0x090A0B0C == 151653132 + store i32 305419896, i32* %ptr ; 0x12345678 + %bptr = bitcast i32* %ptr to i8* + %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 + %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + + ;; We should be able to merge these four stores with the i32 above + ; value (and bytes) stored before ; 0x12345678 + store i8 9, i8* %bptr ; 09 + store i8 10, i8* %bptr1 ; 0A + store i8 11, i8* %bptr2 ; 0B + store i8 12, i8* %bptr3 ; 0C + ; 0x090A0B0C + + ret void +} + +define void @word_replacement(i64 *%ptr) { +; CHECK-LABEL: @word_replacement( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 72638273700655232, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 72623859790382856, i64* %ptr ; 0x0102030405060708 + + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 + %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 + + ;; We should be able to merge these two stores with the i64 one above + ; value (and bytes) stored before ; 0x0102030405060708 + store i16 4128, i16* %wptr1 ; 1020 + store i16 28800, i16* %wptr3 ; 7080 + ; 0x0102102005067080 + + ret void +} + + +define void @differently_sized_replacements(i64 *%ptr) { +; CHECK-LABEL: @differently_sized_replacements( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 289077004501059343, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6 + %wptr = bitcast i64* %ptr to i16* + %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f + store i8 7, i8* %bptr6 ; 07 + store i16 1541, i16* %wptr2 ; 0605 + store i32 67305985, i32* %dptr ; 04030201 + ; 0x040302010605070f + ret void +} + + +define void @multiple_replacements_to_same_byte(i64 *%ptr) { +; CHECK-LABEL: @multiple_replacements_to_same_byte( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 289077004602248719, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f + store i8 7, i8* %bptr3 ; 07 + store i16 1541, i16* %wptr1 ; 0605 + store i32 67305985, i32* %dptr ; 04030201 + ; 0x040302010c0d0e0f + ret void +} + +define void @merged_merges(i64 *%ptr) { +; CHECK-LABEL: @merged_merges( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 289081428418563599, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f + store i32 67305985, i32* %dptr ; 04030201 + store i16 1541, i16* %wptr1 ; 0605 + store i8 7, i8* %bptr3 ; 07 + ; 0x040306070c0d0e0f + ret void +} + +define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) { +; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8* +; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 +; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 +; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32* +; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64* +; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1 +; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1 +; CHECK-NEXT: ret i8 0 +; +entry: + + store i64 0, i64* %ptr + + %bptr = bitcast i64* %ptr to i8* + %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %dptr = bitcast i8* %bptrm1 to i32* + %qptr = bitcast i8* %bptr3 to i64* + + store i32 1234, i32* %dptr, align 1 + store i64 5678, i64* %qptr, align 1 + + ret i8 0 +} + +;; Test case from PR31777 +%union.U = type { i64 } + +define void @foo(%union.U* nocapture %u) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i64 11821949021847552, i64* [[I]], align 8 +; CHECK-NEXT: ret void +; +entry: + %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0 + store i64 0, i64* %i, align 8 + %s = bitcast %union.U* %u to i16* + store i16 42, i16* %s, align 8 + ret void +} Index: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" + +define void @byte_by_byte_replacement(i32 *%ptr) { +; CHECK-LABEL: @byte_by_byte_replacement( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 202050057, i32* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + ;; This store's value should be modified as it should be better to use one + ;; larger store than several smaller ones. + ;; store will turn into 0x0C0B0A09 == 202050057 + store i32 305419896, i32* %ptr ; 0x12345678 + %bptr = bitcast i32* %ptr to i8* + %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 + %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + + ;; We should be able to merge these four stores with the i32 above + ; value (and bytes) stored before ; 0x12345678 + store i8 9, i8* %bptr ; 09 + store i8 10, i8* %bptr1 ; 0A + store i8 11, i8* %bptr2 ; 0B + store i8 12, i8* %bptr3 ; 0C + ; 0x0C0B0A09 + ret void +} + +define void @word_replacement(i64 *%ptr) { +; CHECK-LABEL: @word_replacement( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 8106482645252179720, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 72623859790382856, i64* %ptr ; 0x0102030405060708 + + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 + %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 + + ;; We should be able to merge these two stores with the i64 one above + ; value (not bytes) stored before ; 0x0102030405060708 + store i16 4128, i16* %wptr1 ; 1020 + store i16 28800, i16* %wptr3 ; 7080 + ; 0x7080030410200708 + ret void +} + + +define void @differently_sized_replacements(i64 *%ptr) { +; CHECK-LABEL: @differently_sized_replacements( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 578437695752307201, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6 + %wptr = bitcast i64* %ptr to i16* + %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f + store i8 7, i8* %bptr6 ; 07 + store i16 1541, i16* %wptr2 ; 0605 + store i32 67305985, i32* %dptr ; 04030201 + ; 0x0807060504030201 + ret void +} + + +define void @multiple_replacements_to_same_byte(i64 *%ptr) { +; CHECK-LABEL: @multiple_replacements_to_same_byte( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 579005069522043393, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f + store i8 7, i8* %bptr3 ; 07 + store i16 1541, i16* %wptr1 ; 0605 + store i32 67305985, i32* %dptr ; 04030201 + ; 0x08090a0b04030201 + ret void +} + +define void @merged_merges(i64 *%ptr) { +; CHECK-LABEL: @merged_merges( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 579005069572506113, i64* [[PTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f + + %bptr = bitcast i64* %ptr to i8* + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %wptr = bitcast i64* %ptr to i16* + %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 + %dptr = bitcast i64* %ptr to i32* + + ;; We should be able to merge all these stores with the i64 one above + ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f + store i32 67305985, i32* %dptr ; 04030201 + store i16 1541, i16* %wptr1 ; 0605 + store i8 7, i8* %bptr3 ; 07 + ; 0x08090a0b07050201 + ret void +} + +define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) { +; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8* +; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 +; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 +; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32* +; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64* +; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1 +; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1 +; CHECK-NEXT: ret i8 0 +; +entry: + + ; Also check that alias.scope metadata doesn't get dropped + store i64 0, i64* %ptr, !alias.scope !32 + + %bptr = bitcast i64* %ptr to i8* + %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 + %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 + %dptr = bitcast i8* %bptrm1 to i32* + %qptr = bitcast i8* %bptr3 to i64* + + store i32 1234, i32* %dptr, align 1 + store i64 5678, i64* %qptr, align 1 + + ret i8 0 +} + +;; Test case from PR31777 +%union.U = type { i64 } + +define void @foo(%union.U* nocapture %u) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i64 42, i64* [[I]], align 8 +; CHECK-NEXT: ret void +; +entry: + %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0 + store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29 + %s = bitcast %union.U* %u to i16* + store i16 42, i16* %s, align 8 + ret void +} + +; Don't crash by operating on stale data if we merge (kill) the last 2 stores. + +define void @PR34074(i32* %x, i64* %y) { +; CHECK-LABEL: @PR34074( +; CHECK-NEXT: store i64 42, i64* %y +; CHECK-NEXT: store i32 4, i32* %x +; CHECK-NEXT: ret void +; + store i64 42, i64* %y ; independent store + %xbc = bitcast i32* %x to i8* + store i32 0, i32* %x ; big store of constant + store i8 4, i8* %xbc ; small store with mergeable constant + ret void +} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer") +!2 = !{} +!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U") +!12 = !{!13, !17} +!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64) +!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16) +!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer") +!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16) +!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19) +!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) +!20 = !{!21} +!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10) +!22 = !DILocation(line: 10, column: 8, scope: !7) + +!26 = !{!27, !27, i64 0} +!27 = !{!"omnipotent char", !28, i64 0} +!28 = !{!"Simple C++ TBAA"} + +!29 = !{i32 1} + +; Domains and scopes which might alias +!30 = !{!30} +!31 = !{!31, !30} + +!32 = !{!32} +!33 = !{!33, !32}