Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -194,6 +194,18 @@ return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); } +/// \returns true if the constant in \p Src should be replaced with a bitreverse +/// of an inline immediate. +static bool isReverseInlineImm(const SIInstrInfo *TII, + const MachineOperand &Src, + int32_t &ReverseImm) { + if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src, 4)) + return false; + + ReverseImm = reverseBits(static_cast(Src.getImm())); + return ReverseImm >= -16 && ReverseImm <= 64; +} + /// Copy implicit register operands from specified instruction to this /// instruction that are not part of the instruction definition. static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, @@ -240,14 +252,11 @@ MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { - int64_t Imm = Src.getImm(); - if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) { - int32_t ReverseImm = reverseBits(static_cast(Imm)); - if (ReverseImm >= -16 && ReverseImm <= 64) { - MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); - Src.setImm(ReverseImm); - continue; - } + int32_t ReverseImm; + if (isReverseInlineImm(TII, Src, ReverseImm)) { + MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); + Src.setImm(ReverseImm); + continue; } } } @@ -318,10 +327,17 @@ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { - const MachineOperand &Src = MI.getOperand(1); + MachineOperand &Src = MI.getOperand(1); - if (Src.isImm() && isKImmOperand(TII, Src)) - MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + if (Src.isImm()) { + int32_t ReverseImm; + if (isKImmOperand(TII, Src)) + MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + else if (isReverseInlineImm(TII, Src, ReverseImm)) { + MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); + Src.setImm(ReverseImm); + } + } continue; } Index: test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll =================================================================== --- test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll +++ test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll @@ -156,3 +156,66 @@ store i64 508, i64 addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}s_materialize_0_i32: +; GCN: s_mov_b32 s{{[0-9]+}}, 0{{$}} +define void @s_materialize_0_i32() { + call void asm sideeffect "; use $0", "s"(i32 0) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_1_i32: +; GCN: s_mov_b32 s{{[0-9]+}}, 1{{$}} +define void @s_materialize_1_i32() { + call void asm sideeffect "; use $0", "s"(i32 1) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_neg1_i32: +; GCN: s_mov_b32 s{{[0-9]+}}, -1{{$}} +define void @s_materialize_neg1_i32() { + call void asm sideeffect "; use $0", "s"(i32 -1) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_signbit_i32: +; GCN: s_brev_b32 s{{[0-9]+}}, 1{{$}} +define void @s_materialize_signbit_i32() { + call void asm sideeffect "; use $0", "s"(i32 -2147483648) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_rev_64_i32: +; GCN: s_brev_b32 s{{[0-9]+}}, 64{{$}} +define void @s_materialize_rev_64_i32() { + call void asm sideeffect "; use $0", "s"(i32 33554432) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_rev_65_i32: +; GCN: s_mov_b32 s{{[0-9]+}}, 0x82000000{{$}} +define void @s_materialize_rev_65_i32() { + call void asm sideeffect "; use $0", "s"(i32 -2113929216) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_rev_neg16_i32: +; GCN: s_brev_b32 s{{[0-9]+}}, -16{{$}} +define void @s_materialize_rev_neg16_i32() { + call void asm sideeffect "; use $0", "s"(i32 268435455) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_rev_neg17_i32: +; GCN: s_mov_b32 s{{[0-9]+}}, 0xf7ffffff{{$}} +define void @s_materialize_rev_neg17_i32() { + call void asm sideeffect "; use $0", "s"(i32 -134217729) + ret void +} + +; GCN-LABEL: {{^}}s_materialize_rev_1.0_i32: +; GCN: s_movk_i32 s{{[0-9]+}}, 0x1fc{{$}} +define void @s_materialize_rev_1.0_i32() { + call void asm sideeffect "; use $0", "s"(i32 508) + ret void +} Index: test/CodeGen/AMDGPU/fcopysign.f32.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f32.ll +++ test/CodeGen/AMDGPU/fcopysign.f32.ll @@ -1,8 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - declare float @llvm.copysign.f32(float, float) nounwind readnone declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone @@ -15,7 +14,7 @@ ; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30 ; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] ; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]] -; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2 ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/fcopysign.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f64.ll +++ test/CodeGen/AMDGPU/fcopysign.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.copysign.f64(double, double) nounwind readnone @@ -12,7 +12,7 @@ ; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 ; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] ; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] -; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2 ; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] ; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] ; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} Index: test/CodeGen/AMDGPU/llvm.round.f64.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.f64.ll +++ test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}round_f64: ; SI: s_endpgm @@ -20,7 +20,7 @@ ; SI-DAG: v_cmp_eq_i32 -; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff +; SI-DAG: s_brev_b32 [[BFIMASK:s[0-9]+]], -2{{$}} ; SI-DAG: v_cmp_gt_i32 ; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]] Index: test/CodeGen/AMDGPU/llvm.round.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.ll +++ test/CodeGen/AMDGPU/llvm.round.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}round_f32: ; SI-DAG: s_load_dword [[SX:s[0-9]+]] -; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff +; SI-DAG: s_brev_b32 [[K:s[0-9]+]], -2{{$}} ; SI-DAG: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]] ; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] ; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]