Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -226,6 +226,30 @@ continue; } + if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) { + // If this has a literal constant source that is the same as the + // reversed bits reversed of an inline immediate, replace with a + // bitreverse of that constant. This saves 4 bytes in the common case of + // materializing sign bits. + + // Test if we are after regalloc. We only want to do this after any + // optimizations happen because this will confuse them. + // XXX - not exactly a check for post-regalloc run. + MachineOperand &Src = MI.getOperand(1); + if (Src.isImm() && + TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + int64_t Imm = Src.getImm(); + if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) { + int32_t ReverseImm = reverseBits(static_cast(Imm)); + if (ReverseImm >= -16 && ReverseImm <= 64) { + MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); + Src.setImm(ReverseImm); + continue; + } + } + } + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; Index: test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll @@ -0,0 +1,158 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Test that materialization constants that are the bit reversed of +; inline immediates are replaced with bfrev of the inline immediate to +; save code size. + +; GCN-LABEL: {{^}}materialize_0_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_0_i32(i32 addrspace(1)* %out) { + store i32 0, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_0_i64: +; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_0_i64(i64 addrspace(1)* %out) { + store i64 0, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_neg1_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], -1{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_neg1_i32(i32 addrspace(1)* %out) { + store i32 -1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_neg1_i64: +; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}} +; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], -1{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_neg1_i64(i64 addrspace(1)* %out) { + store i64 -1, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_signbit_i32: +; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], 1{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_signbit_i32(i32 addrspace(1)* %out) { + store i32 -2147483648, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_signbit_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} +; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 1{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_signbit_i64(i64 addrspace(1)* %out) { + store i64 -9223372036854775808, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_neg16_i32: +; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], -16{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_neg16_i32(i32 addrspace(1)* %out) { + store i32 268435455, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_neg16_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}} +; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], -16{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_neg16_i64(i64 addrspace(1)* %out) { + store i64 1152921504606846975, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_neg17_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xf7ffffff{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_neg17_i32(i32 addrspace(1)* %out) { + store i32 -134217729, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_neg17_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0xf7ffffff{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_neg17_i64(i64 addrspace(1)* %out) { + store i64 -576460752303423489, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_64_i32: +; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], 64{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_64_i32(i32 addrspace(1)* %out) { + store i32 33554432, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_64_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} +; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 64{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_64_i64(i64 addrspace(1)* %out) { + store i64 144115188075855872, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_65_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x82000000{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_65_i32(i32 addrspace(1)* %out) { + store i32 -2113929216, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_65_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0x82000000{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_65_i64(i64 addrspace(1)* %out) { + store i64 -9079256848778919936, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_3_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], -2.0{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_3_i32(i32 addrspace(1)* %out) { + store i32 -1073741824, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_3_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], -2.0{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_3_i64(i64 addrspace(1)* %out) { + store i64 -4611686018427387904, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_1.0_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1fc{{$}} +; GCN: buffer_store_dword [[K]] +define void @materialize_rev_1.0_i32(i32 addrspace(1)* %out) { + store i32 508, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}materialize_rev_1.0_i64: +; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0x1fc{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}} +define void @materialize_rev_1.0_i64(i64 addrspace(1)* %out) { + store i64 508, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/AMDGPU/fmaxnum.ll =================================================================== --- test/CodeGen/AMDGPU/fmaxnum.ll +++ test/CodeGen/AMDGPU/fmaxnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.maxnum.f32(float, float) #0 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 @@ -207,7 +207,7 @@ ; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0 ; SI-NOT: v_max_f32_e32 -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; SI: buffer_store_dword [[REG]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] @@ -221,7 +221,7 @@ ; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0 ; SI-NOT: v_max_f32_e32 -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; SI: buffer_store_dword [[REG]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] Index: test/CodeGen/AMDGPU/fminnum.ll =================================================================== --- test/CodeGen/AMDGPU/fminnum.ll +++ test/CodeGen/AMDGPU/fminnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.minnum.f32(float, float) #0 @@ -206,7 +206,7 @@ ; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0 ; SI-NOT: v_min_f32_e32 -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; SI: buffer_store_dword [[REG]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] @@ -220,7 +220,7 @@ ; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0 ; SI-NOT: v_min_f32_e32 -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; SI: buffer_store_dword [[REG]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] Index: test/CodeGen/AMDGPU/fneg-fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f64.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f64.ll @@ -44,7 +44,7 @@ } ; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} ; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double @@ -57,7 +57,7 @@ ; FUNC-LABEL: {{^}}fneg_fabs_f64: ; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}} ; SI: s_load_dwordx2 -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} ; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]] ; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]] ; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}} @@ -69,7 +69,7 @@ } ; FUNC-LABEL: {{^}}fneg_fabs_v2f64: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} ; SI-NOT: 0x80000000 ; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] ; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] @@ -81,7 +81,7 @@ } ; FUNC-LABEL: {{^}}fneg_fabs_v4f64: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 +; SI: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} ; SI-NOT: 0x80000000 ; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] ; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -23,7 +23,7 @@ ; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64: ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000 +; CHECK-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}} ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { store i64 -9223372036854775808, i64 addrspace(1) *%out @@ -31,7 +31,7 @@ } ; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32: -; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; CHECK: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { store i32 -2147483648, i32 addrspace(1)* %out @@ -47,7 +47,7 @@ } ; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32: -; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; CHECK: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { store float -0.0, float addrspace(1)* %out @@ -520,7 +520,7 @@ ; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64: ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000 +; CHECK-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}} ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { store double -0.0, double addrspace(1)* %out