Index: lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp =================================================================== --- lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -641,21 +641,23 @@ if (BlockingStoresDispSizeMap.size() <= 1) return; - int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first; - unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second; - SmallVector ForRemoval; - for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin()); + SmallVector, 0> DispSizeStack; + for (auto DispSizePair = BlockingStoresDispSizeMap.begin(); DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) { int64_t CurrDisp = DispSizePair->first; unsigned CurrSize = DispSizePair->second; - if (CurrDisp + CurrSize <= PrevDisp + PrevSize) { - ForRemoval.push_back(PrevDisp); + while (DispSizeStack.size()) { + int64_t PrevDisp = DispSizeStack.back().first; + unsigned PrevSize = DispSizeStack.back().second; + if (CurrDisp + CurrSize > PrevDisp + PrevSize) + break; + DispSizeStack.pop_back(); } - PrevDisp = CurrDisp; - PrevSize = CurrSize; + DispSizeStack.push_back(*DispSizePair); } - for (auto Disp : ForRemoval) - BlockingStoresDispSizeMap.erase(Disp); + BlockingStoresDispSizeMap.clear(); + for (auto Disp : DispSizeStack) + BlockingStoresDispSizeMap.insert(Disp); } bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) { Index: test/CodeGen/X86/pr38743.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr38743.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s + +%0 = type { %1 } +%1 = type { %2 } +%2 = type { %3 } +%3 = type { %4 } +%4 = type { %5 } +%5 = type { i64, i64, i8* } +%6 = type { %7, [23 x i8] } +%7 = type { i8 } + +@.str.16 = external dso_local unnamed_addr constant [16 x i8], align 1 +@.str.17 = external dso_local unnamed_addr constant [12 x i8], align 1 +@.str.18 = external dso_local unnamed_addr constant [15 x i8], align 1 + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0 + +define void @pr38743() #1 align 2 { +bb: + %tmp = alloca %0, align 16 + %tmp1 = bitcast %0* %tmp to i8* + switch i32 undef, label %bb11 [ + i32 1, label %bb2 + i32 4, label %bb5 + i32 2, label %bb5 + i32 3, label %bb8 + ] + +bb2: ; preds = %bb + %tmp3 = bitcast %0* %tmp to %6* + %tmp4 = getelementptr inbounds %6, %6* %tmp3, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp4, i8* align 1 getelementptr inbounds ([16 x i8], [16 x i8]* @.str.16, i64 0, i64 0), i64 15, i1 false) + br label %bb12 + +bb5: ; preds = %bb, %bb + %tmp6 = bitcast %0* %tmp to %6* + %tmp7 = getelementptr inbounds %6, %6* %tmp6, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp7, i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str.17, i64 0, i64 0), i64 11, i1 false) + br label %bb12 + +bb8: ; preds = %bb + %tmp9 = bitcast %0* %tmp to %6* + %tmp10 = getelementptr inbounds %6, %6* %tmp9, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp10, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @.str.18, i64 0, i64 0), i64 14, i1 false) + br label %bb12 + +bb11: ; preds = %bb + unreachable + +bb12: ; preds = %bb8, %bb5, %bb2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 undef, i8* nonnull align 16 %tmp1, i64 24, i1 false) #2 + ret void +} + +attributes #0 = { argmemonly nounwind } +attributes #1 = { "target-features"="+sse,+sse2,+sse3,+sse4.2" } +attributes #2 = { nounwind }