Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp =================================================================== --- lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -604,7 +604,8 @@ // We don't need to fix the PHI if the common dominator of the // two incoming blocks terminates with a uniform branch. - if (MI.getNumExplicitOperands() == 5) { + bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); + if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); @@ -649,8 +650,7 @@ // is no chance for values to be over-written. SmallSet Visited; - if (phiHasVGPROperands(MI, MRI, TRI, TII) || - !phiHasBreakDef(MI, MRI, Visited)) { + if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { DEBUG(dbgs() << "Fixing PHI: " << MI); TII->moveToVALU(MI); } Index: test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}vgpr_to_sgpr_phi: +; GCN: BB0_1: +; GCN: v_add_i32_e32 +; GCN: s_branch BB0_1 +define void @vgpr_to_sgpr_phi(i32* nocapture %arg) local_unnamed_addr #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb8, %bb + %tmp = phi i32 [ 8, %bb ], [ %tmp9, %bb8 ] + %tmp2 = add nsw i32 %tmp, -1 + %tmp3 = getelementptr inbounds i32, i32* %arg, i32 %tmp2 + br i1 undef, label %bb4, label %bb8 + +bb4: ; preds = %bb1 + %tmp5 = load i32, i32* %tmp3, align 4 + %tmp6 = tail call i32 @llvm.amdgcn.ubfe.i32(i32 %tmp5, i32 16, i32 8) #2 + %tmp7 = zext i32 %tmp6 to i64 + br label %bb8 + +bb8: ; preds = %bb4, %bb1 + %tmp9 = add nsw i32 %tmp, 2 + br label %bb1 +} + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind readnone }