Index: llvm/trunk/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ llvm/trunk/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -641,21 +641,22 @@ if (BlockingStoresDispSizeMap.size() <= 1) return; - int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first; - unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second; - SmallVector ForRemoval; - for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin()); - DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) { - int64_t CurrDisp = DispSizePair->first; - unsigned CurrSize = DispSizePair->second; - if (CurrDisp + CurrSize <= PrevDisp + PrevSize) { - ForRemoval.push_back(PrevDisp); + SmallVector, 0> DispSizeStack; + for (auto DispSizePair : BlockingStoresDispSizeMap) { + int64_t CurrDisp = DispSizePair.first; + unsigned CurrSize = DispSizePair.second; + while (DispSizeStack.size()) { + int64_t PrevDisp = DispSizeStack.back().first; + unsigned PrevSize = DispSizeStack.back().second; + if (CurrDisp + CurrSize > PrevDisp + PrevSize) + break; + DispSizeStack.pop_back(); } - PrevDisp = CurrDisp; - PrevSize = CurrSize; + DispSizeStack.push_back(DispSizePair); } - for (auto Disp : ForRemoval) - BlockingStoresDispSizeMap.erase(Disp); + BlockingStoresDispSizeMap.clear(); + for (auto Disp : DispSizeStack) + BlockingStoresDispSizeMap.insert(Disp); } bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) { Index: llvm/trunk/test/CodeGen/X86/pr38743.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr38743.ll +++ llvm/trunk/test/CodeGen/X86/pr38743.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +%0 = type { %1 } +%1 = type { %2 } +%2 = type { %3 } +%3 = type { %4 } +%4 = type { %5 } +%5 = type { i64, i64, i8* } +%6 = type { %7, [23 x i8] } +%7 = type { i8 } + +@.str.16 = external dso_local unnamed_addr constant [16 x i8], align 1 +@.str.17 = external dso_local unnamed_addr constant [12 x i8], align 1 +@.str.18 = external dso_local unnamed_addr constant [15 x i8], align 1 + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0 + +define void @pr38743() #1 align 2 { +; CHECK-LABEL: pr38743: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cmpl $3, %eax +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.3: # %bb5 +; CHECK-NEXT: movzwl .str.17+{{.*}}(%rip), %eax +; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %bb8 +; CHECK-NEXT: movq .str.18+{{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: movq .str.16+{{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: .LBB0_5: # %bb12 +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq %rax, (%rax) +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %dil +; CHECK-NEXT: movb %al, (%rax) +; CHECK-NEXT: movq %rcx, 1(%rax) +; CHECK-NEXT: movw %dx, 9(%rax) +; CHECK-NEXT: movl %esi, 11(%rax) +; CHECK-NEXT: movb %dil, 15(%rax) +; CHECK-NEXT: retq +bb: + %tmp = alloca %0, align 16 + %tmp1 = bitcast %0* %tmp to i8* + switch i32 undef, label %bb11 [ + i32 1, label %bb2 + i32 4, label %bb5 + i32 2, label %bb5 + i32 3, label %bb8 + ] + +bb2: ; preds = %bb + %tmp3 = bitcast %0* %tmp to %6* + %tmp4 = getelementptr inbounds %6, %6* %tmp3, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp4, i8* align 1 getelementptr inbounds ([16 x i8], [16 x i8]* @.str.16, i64 0, i64 0), i64 15, i1 false) + br label %bb12 + +bb5: ; preds = %bb, %bb + %tmp6 = bitcast %0* %tmp to %6* + %tmp7 = getelementptr inbounds %6, %6* %tmp6, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp7, i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str.17, i64 0, i64 0), i64 10, i1 false) + br label %bb12 + +bb8: ; preds = %bb + %tmp9 = bitcast %0* %tmp to %6* + %tmp10 = getelementptr inbounds %6, %6* %tmp9, i64 0, i32 1, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp10, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @.str.18, i64 0, i64 0), i64 14, i1 false) + br label %bb12 + +bb11: ; preds = %bb + unreachable + +bb12: ; preds = %bb8, %bb5, %bb2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 undef, i8* nonnull align 16 %tmp1, i64 24, i1 false) #2 + ret void +} + +attributes #0 = { argmemonly nounwind } +attributes #1 = { "target-features"="+sse,+sse2,+sse3,+sse4.2" } +attributes #2 = { nounwind }