diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -722,6 +722,7 @@ let hasSideEffects = 0; let SALU = 1; let SOPK = 1; + let FixedSize = 1; let SchedRW = [WriteSALU]; let UseNamedOperandTable = 1; string Mnemonic = opName; diff --git a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll @@ -0,0 +1,95 @@ +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -debug-only=branch-relaxation %s -o - 2>&1 | FileCheck %s + +; CHECK: Basic blocks after relaxation +; CHECK: %bb.0 offset=00000000 size=0x6c +; CHECK: %bb.1 offset=0000006c size=0x3c +; CHECK: %bb.2 offset=000000a8 size=0x2c +; CHECK: %bb.3 offset=000000d4 size=0 +; CHECK: %bb.4 offset=000000d4 size=0x28 + +; CHECK: foo: +; Function Attrs: convergent noinline nounwind +define void @foo(i32 %a, i32 %b, float %c, float %d) unnamed_addr #8 { +; CHECK-NEXT: bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: s_or_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: v_sqrt_f32_e64 v2, v2 +; CHECK-NEXT: v_and_b32_e64 v1, 1, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s5, v1, 1 +; CHECK-NEXT: s_mov_b32 s4, -1 +; CHECK-NEXT: s_xor_b32 s4, s5, s4 +; CHECK-NEXT: v_writelane_b32 v0, s4, 0 +; CHECK-NEXT: v_writelane_b32 v0, s4, 1 +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_writelane_b32 v0, s4, 2 +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB0_2 + %1 = and i32 %a, 50331648 + %2 = icmp eq i32 %b, 0 + %3 = fadd reassoc nnan ninf nsz contract float %c, %d + %4 = fcmp olt float %c, 0x39F0000000000000 + %5 = trunc i32 %a to i1 + %6 = select i1 %5, float 0x41F0000000000000, float 1.000000e+00 + %7 = fmul float %6, %6 + %8 = tail call i1 @llvm.amdgcn.class.f32(float %7, i32 608) + %9 = select i1 %5, float 0x3EF0000000000000, float 1.000000e+00 + %10 = tail call float @llvm.sqrt.f32(float %c) + %11 = trunc i32 %b to i1 + br i1 %11, label %bb.1, label %bb.2 + +bb.1: +; CHECK-NEXT: %bb.1: +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s4, v0, 0 +; CHECK-NEXT: s_mov_b32 s5, 0x1f8 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_cmp_class_f32_e64 s5, v1, s5 +; CHECK-NEXT: s_andn2_b32 s4, s4, exec_lo +; CHECK-NEXT: s_and_b32 s5, s5, exec_lo +; CHECK-NEXT: s_or_b32 s4, s4, s5 +; CHECK-NEXT: v_writelane_b32 v0, s4, 1 + %12 = tail call i1 @llvm.amdgcn.class.f32(float %d, i32 504) + %13 = fcmp reassoc nnan ninf nsz contract ogt float %10, 0.000000e+00 + %or.cond3.i.i = select i1 %12, i1 true, i1 false + br i1 %or.cond3.i.i, label %bb.2, label %bb.3 + +bb.2: +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: v_readlane_b32 s4, v0, 2 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; CHECK-NEXT: v_readlane_b32 s5, v0, 1 +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_writelane_b32 v0, s4, 3 +; CHECK-NEXT: s_and_b32 s4, s4, s5 +; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB0_4 + %14 = fdiv reassoc nnan ninf nsz contract float 1.000000e+00, %c + %15 = fcmp olt float %c, 0x39F0000000000000 + %16 = fmul float %14, %d + %k = trunc i32 %b to i1 + %17 = select i1 %k, float %16, float %c + br label %bb.3 + +bb.3: +; CHECK-NEXT: %bb.3: +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: v_readlane_b32 s4, v0, 3 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; CHECK-NEXT: s_or_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %18 = fadd reassoc nnan ninf nsz contract float %c, %d + %19 = fadd reassoc nnan ninf nsz contract float %18, %c + ret void +} + +declare i1 @llvm.amdgcn.class.f32(float, i32) +declare float @llvm.sqrt.f32(float)