Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -1952,8 +1952,7 @@ } def SI_ELSE : PseudoInstSI < - (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), - [(set i64:$dst, (int_amdgcn_else i64:$src, bb:$target))]> { + (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix)> { let Constraints = "$src = $dst"; } @@ -2132,6 +2131,11 @@ let Predicates = [isGCN] in { +def : Pat< + (int_amdgcn_else i64:$src, bb:$target), + (SI_ELSE $src, $target, 0) +>; + def : Pat < (int_AMDGPU_kilp), (SI_KILL 0xbf800000) Index: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -84,7 +84,7 @@ bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB); void If(MachineInstr &MI); - void Else(MachineInstr &MI, bool ExecModified); + void Else(MachineInstr &MI); void Break(MachineInstr &MI); void IfBreak(MachineInstr &MI); void ElseBreak(MachineInstr &MI); @@ -252,7 +252,7 @@ MI.eraseFromParent(); } -void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) { +void SILowerControlFlow::Else(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); unsigned Dst = MI.getOperand(0).getReg(); @@ -262,7 +262,7 @@ TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) .addReg(Src); // Saved EXEC - if (ExecModified) { + if (MI.getOperand(3).getImm() != 0) { // Adjust the saved exec to account for the modifications during the flow // block that contains the ELSE. This can happen when WQM mode is switched // off. @@ -427,7 +427,6 @@ MachineBasicBlock *EmptyMBBAtEnd = nullptr; MachineBasicBlock::iterator I, Next; - bool ExecModified = false; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); @@ -438,9 +437,6 @@ if (TII->isFLAT(MI)) NeedFlat = true; - if (I->modifiesRegister(AMDGPU::EXEC, TRI)) - ExecModified = true; - switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: @@ -449,7 +445,7 @@ break; case AMDGPU::SI_ELSE: - Else(MI, ExecModified); + Else(MI); break; case AMDGPU::SI_BREAK: Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -434,6 +434,9 @@ State = Needs; } + + if (MI.getOpcode() == AMDGPU::SI_ELSE && State == StateExact) + MI.getOperand(3).setImm(1); } if ((BI.OutNeeds & StateWQM) && State != StateWQM) { Index: llvm/trunk/test/CodeGen/AMDGPU/else.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/else.ll +++ llvm/trunk/test/CodeGen/AMDGPU/else.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: {{^}}else_no_execfix: +; CHECK: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]], +; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]] +; CHECK-NEXT: ; mask branch +define amdgpu_ps float @else_no_execfix(i32 %z, float %v) { +main_body: + %cc = icmp sgt i32 %z, 5 + br i1 %cc, label %if, label %else + +if: + %v.if = fmul float %v, 2.0 + br label %end + +else: + %v.else = fmul float %v, 3.0 + br label %end + +end: + %r = phi float [ %v.if, %if ], [ %v.else, %else ] + ret float %r +} + +; CHECK-LABEL: {{^}}else_execfix_leave_wqm: +; CHECK: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]], +; CHECK-NEXT: s_and_b64 exec, exec, +; CHECK-NEXT: s_and_b64 [[DST]], exec, [[DST]] +; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]] +; CHECK-NEXT: ; mask branch +define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) { +main_body: + %cc = icmp sgt i32 %z, 5 + br i1 %cc, label %if, label %else + +if: + %v.if = fmul float %v, 2.0 + br label %end + +else: + %c = fmul float %v, 3.0 + %c.i = bitcast float %c to i32 + %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %v.else = extractelement <4 x float> %tex, i32 0 + br label %end + +end: + %r = phi float [ %v.if, %if ], [ %v.else, %else ] + call void @llvm.amdgcn.buffer.store.f32(float %r, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) + ret void +} + +declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind + +declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone