Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -1434,12 +1434,15 @@ // Check if either the dest or source is local. If it's live across a back // edge, it's not local. Note that if both vregs are live across the back // edge, we cannot successfully contrain the copy without cyclic scheduling. - unsigned LocalReg = DstReg; - unsigned GlobalReg = SrcReg; + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; LiveInterval *LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { - LocalReg = SrcReg; - GlobalReg = DstReg; + LocalReg = DstReg; + GlobalReg = SrcReg; LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) return; Index: llvm/trunk/test/CodeGen/X86/pr21792.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr21792.ll +++ llvm/trunk/test/CodeGen/X86/pr21792.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s +; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by +; PR21792 + +@stuff = external constant [256 x double], align 16 + +define void @func(<4 x float> %vx) { +entry: + %tmp2 = bitcast <4 x float> %vx to <2 x i64> + %and.i = and <2 x i64> %tmp2, + %tmp3 = bitcast <2 x i64> %and.i to <4 x i32> + %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0 + %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64 + %add.ptr = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext + %tmp4 = bitcast i8* %add.ptr to double* + %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1 + %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64 + %add.ptr6 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5 + %tmp5 = bitcast i8* %add.ptr6 to double* + %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2 + %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64 + %add.ptr15 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14 + %tmp6 = bitcast i8* %add.ptr15 to double* + %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3 + %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64 + %add.ptr20 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19 + %tmp7 = bitcast i8* %add.ptr20 to double* + %add.ptr46 = getelementptr inbounds i8* bitcast (double* getelementptr inbounds ([256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext + %tmp16 = bitcast i8* %add.ptr46 to double* + %add.ptr51 = getelementptr inbounds i8* bitcast (double* getelementptr inbounds ([256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5 + %tmp17 = bitcast i8* %add.ptr51 to double* + call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17) + ret void +; CHECK-LABEL: func: +; CHECK: pextrq $1, %xmm0, +; CHECK-NEXT: movd %xmm0, %r[[AX:..]] +; CHECK-NEXT: movslq %e[[AX]], +; CHECK-NEXT: sarq $32, %r[[AX]] +} + +declare void @toto(double*, double*, double*, double*, double*, double*) Index: llvm/trunk/test/CodeGen/X86/vector-idiv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-idiv.ll +++ llvm/trunk/test/CodeGen/X86/vector-idiv.ll @@ -841,19 +841,18 @@ ; SSE-LABEL: test8: ; SSE: # BB#0: ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] -; SSE-NEXT: movdqa %xmm2, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movdqa %xmm0, %xmm3 ; SSE-NEXT: psrad $31, %xmm3 ; SSE-NEXT: pand %xmm2, %xmm3 -; SSE-NEXT: paddd %xmm1, %xmm3 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: pmuludq %xmm2, %xmm1 -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE-NEXT: pmuludq %xmm2, %xmm4 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3] +; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] +; SSE-NEXT: psrad $31, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm2, %xmm3 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE-NEXT: pmuludq %xmm4, %xmm2 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] ; SSE-NEXT: psubd %xmm3, %xmm1 ; SSE-NEXT: paddd %xmm0, %xmm1 Index: llvm/trunk/test/CodeGen/X86/widen_load-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_load-2.ll +++ llvm/trunk/test/CodeGen/X86/widen_load-2.ll @@ -76,10 +76,9 @@ ; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]] ; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]] ; CHECK-NEXT: paddd %[[R0]], %[[R1]] -; CHECK-NEXT: movdqa %[[R1]], %[[R0]] -; CHECK-NEXT: pshufb {{.*}}, %[[R0]] -; CHECK-NEXT: pmovzxdq %[[R0]], %[[R0]] ; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}}) +; CHECK-NEXT: pshufb {{.*}}, %[[R1]] +; CHECK-NEXT: pmovzxdq %[[R1]], %[[R0]] ; CHECK-NEXT: movd %[[R0]], (%{{.*}}) %a = load %i16vec3* %ap, align 16 %b = load %i16vec3* %bp, align 16 @@ -144,10 +143,9 @@ ; CHECK: pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]] ; CHECK-NEXT: pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]] ; CHECK-NEXT: paddd %[[R0]], %[[R1]] -; CHECK-NEXT: movdqa %[[R1]], %[[R0]] -; CHECK-NEXT: pshufb {{.*}}, %[[R0]] -; CHECK-NEXT: pmovzxwq %[[R0]], %[[R0]] ; CHECK-NEXT: pextrb $8, %[[R1]], 2(%{{.*}}) +; CHECK-NEXT: pshufb {{.*}}, %[[R1]] +; CHECK-NEXT: pmovzxwq %[[R1]], %[[R0]] ; CHECK-NEXT: movd %[[R0]], %e[[R2:[abcd]]]x ; CHECK-NEXT: movw %[[R2]]x, (%{{.*}}) %a = load %i8vec3* %ap, align 16 @@ -206,10 +204,9 @@ ; CHECK-NEXT: pinsrd $2, %e[[R0]]x, %[[X1]] ; CHECK-NEXT: pextrd $3, %[[X0]], %e[[R0:[abcd]]]x ; CHECK-NEXT: pinsrd $3, %e[[R0]]x, %[[X1]] -; CHECK-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]] -; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X2]] -; CHECK-NEXT: pmovzxwq %[[X2]], %[[X3:xmm[0-9]+]] ; CHECK-NEXT: pextrb $8, %[[X1]], 2(%{{.*}}) +; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X1]] +; CHECK-NEXT: pmovzxwq %[[X1]], %[[X3:xmm[0-9]+]] ; CHECK-NEXT: movd %[[X3]], %e[[R0:[abcd]]]x ; CHECK-NEXT: movw %[[R0]]x, (%{{.*}})