Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -597,6 +597,14 @@ VT == MVT::f64; } +/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the +/// type for ISD::SELECT. +LLVM_READONLY +static bool selectSupportsSourceMods(const SDNode *N) { + // TODO: Only applies if select will be vector + return N->getValueType(0) == MVT::f32; +} + // Most FP instructions support source modifiers, but this could be refined // slightly. LLVM_READONLY @@ -631,8 +639,7 @@ } } case ISD::SELECT: - // TODO: Only applies if select will be vector - return N->getValueType(0) == MVT::f32; + return selectSupportsSourceMods(N); default: return true; } @@ -3742,7 +3749,8 @@ // TODO: Support vector constants. ConstantFPSDNode *CRHS = dyn_cast(RHS); - if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { + if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS && + !selectSupportsSourceMods(N.getNode())) { SDLoc SL(N); // If one side is an fneg/fabs and the other is a constant, we can push the // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. Index: llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -205,8 +205,8 @@ ; GCN: buffer_load_dword [[X:v[0-9]+]] ; GCN: buffer_load_dword [[Y:v[0-9]+]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc -; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}} +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef @@ -225,8 +225,8 @@ ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc -; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}} +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef @@ -639,8 +639,8 @@ ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]] -; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]] +; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef @@ -659,8 +659,8 @@ ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]] -; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]] +; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef @@ -677,8 +677,8 @@ ; GCN: buffer_load_dword [[X:v[0-9]+]] ; GCN: buffer_load_dword [[Y:v[0-9]+]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc -; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}} +; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef @@ -696,9 +696,9 @@ ; GCN: buffer_load_dword [[Y:v[0-9]+]] ; GCN: s_cmp_lg_u32 -; GCN: s_cselect_b64 vcc, -1, 0 -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc -; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] +; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]] +; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 { %x = load volatile float, ptr addrspace(1) undef %y = load volatile float, ptr addrspace(1) undef