Index: lib/Target/R600/SIInstrInfo.td =================================================================== --- lib/Target/R600/SIInstrInfo.td +++ lib/Target/R600/SIInstrInfo.td @@ -362,12 +362,12 @@ : SOPC_Helper; class SOPK_32 op, string opName, list pattern> : SOPK < - op, (outs SReg_32:$dst), (ins i16imm:$src0), + op, (outs SReg_32:$dst), (ins u16imm:$src0), opName#" $dst, $src0", pattern >; class SOPK_64 op, string opName, list pattern> : SOPK < - op, (outs SReg_64:$dst), (ins i16imm:$src0), + op, (outs SReg_64:$dst), (ins u16imm:$src0), opName#" $dst, $src0", pattern >; Index: lib/Target/R600/SILowerControlFlow.cpp =================================================================== --- lib/Target/R600/SILowerControlFlow.cpp +++ lib/Target/R600/SILowerControlFlow.cpp @@ -586,6 +586,8 @@ MachineBasicBlock::iterator Start = MBB.getFirstNonPHI(); const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32); + assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes)); + BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO) .addImm(StackOffset); Index: lib/Target/R600/SIShrinkInstructions.cpp =================================================================== --- lib/Target/R600/SIShrinkInstructions.cpp +++ lib/Target/R600/SIShrinkInstructions.cpp @@ -189,6 +189,19 @@ Next = std::next(I); MachineInstr &MI = *I; + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. + if (MI.getOpcode() == AMDGPU::S_MOV_B32) { + const MachineOperand &Src = MI.getOperand(1); + + // TODO: Handle FPImm? + if (Src.isImm()) { + if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src)) { + MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + continue; + } + } + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; Index: test/CodeGen/R600/flat-address-space.ll =================================================================== --- test/CodeGen/R600/flat-address-space.ll +++ test/CodeGen/R600/flat-address-space.ll @@ -156,8 +156,8 @@ ; Check for prologue initializing special SGPRs pointing to scratch. ; CHECK-LABEL: {{^}}store_flat_scratch: ; CHECK: S_MOVK_I32 flat_scratch_lo, 0 -; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40 -; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0 +; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0x28{{$}} +; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0x0{{$}} ; CHECK: FLAT_STORE_DWORD ; CHECK: S_BARRIER ; CHECK: FLAT_LOAD_DWORD Index: test/CodeGen/R600/or.ll =================================================================== --- test/CodeGen/R600/or.ll +++ test/CodeGen/R600/or.ll @@ -116,7 +116,7 @@ ; SI-LABEL: {{^}}vector_or_i64_loadimm: ; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f -; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f +; SI-DAG: S_MOVK_I32 [[HI_S_IMM:s[0-9]+]], 0x146f ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] Index: test/CodeGen/R600/s_movk_i32.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/s_movk_i32.ll @@ -0,0 +1,184 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}s_movk_i32_k0: +; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k1: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k2: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 64{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k3: +; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k4: +; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k5: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k6: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 63{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 270582939713 ; 65 | (63 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k7: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}} +; SI-DAG: S_MOVK_I32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + + +; SI-LABEL: {{^}}s_movk_i32_k8: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k9: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k10: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k11: +; SI-DAG: S_MOVK_I32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k12: +; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}} +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: S_ENDPGM +define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001 + store i64 %or, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/R600/smrd.ll =================================================================== --- test/CodeGen/R600/smrd.ll +++ test/CodeGen/R600/smrd.ll @@ -24,7 +24,7 @@ ; SMRD load with an offset greater than the largest possible immediate. ; CHECK-LABEL: {{^}}smrd2: -; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400 +; CHECK: S_MOVK_I32 s[[OFFSET:[0-9]]], 0x400 ; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] ; CHECK: S_ENDPGM define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { @@ -81,7 +81,7 @@ ; largets possible immediate. ; immediate offset. ; CHECK-LABEL: {{^}}smrd_load_const2: -; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400 +; CHECK: S_MOVK_I32 s[[OFFSET:[0-9]]], 0x400 ; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: