Index: llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp =================================================================== --- llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp +++ llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp @@ -721,7 +721,7 @@ // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector Regs; + SmallVector Regs; for (int rx : used) { assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; @@ -731,16 +731,11 @@ continue; } // Sorted insertion. - bool Inserted = false; - for (SmallVectorImpl::iterator i = Regs.begin(), e = Regs.end(); - i != e && !Inserted; ++i) { - if (LR.Def < i->Def) { - Inserted = true; - Regs.insert(i, LR); - } - } - if (!Inserted) - Regs.push_back(LR); + auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, + [](const LiveReg *LHS, const LiveReg *RHS) { + return LHS->Def < RHS->Def; + }); + Regs.insert(I, &LR); } // doms are now sorted in order of appearance. Try to merge them all, giving @@ -748,14 +743,14 @@ DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { - dv = Regs.pop_back_val().Value; + dv = Regs.pop_back_val()->Value; // Force the first dv to match the current instruction. dv->AvailableDomains = dv->getCommonDomains(available); assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } - DomainValue *Latest = Regs.pop_back_val().Value; + DomainValue *Latest = Regs.pop_back_val()->Value; // Skip already merged values. if (Latest == dv || Latest->Next) continue; Index: llvm/trunk/test/CodeGen/X86/pr30284.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr30284.ll +++ llvm/trunk/test/CodeGen/X86/pr30284.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=avx512dq | FileCheck %s + +define void @f_f___un_3C_unf_3E_un_3C_unf_3E_() { +; CHECK-LABEL: f_f___un_3C_unf_3E_un_3C_unf_3E_: +; CHECK: # BB#0: +; CHECK-NEXT: vmovapd 0, %zmm0 +; CHECK-NEXT: vmovapd 64, %zmm1 +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16] +; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: vorpd %zmm2, %zmm1, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm1, 64 +; CHECK-NEXT: vmovapd %zmm0, 0 +; CHECK-NEXT: retl + %a_load22 = load <16 x i64>, <16 x i64>* null, align 1 + %bitop = or <16 x i64> %a_load22, + %v.i = load <16 x i64>, <16 x i64>* null + %v1.i41 = select <16 x i1> undef, <16 x i64> %bitop, <16 x i64> %v.i + store <16 x i64> %v1.i41, <16 x i64>* null + ret void +}