Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2689,8 +2689,82 @@ return SDValue(); } +static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, + unsigned Op, + const SDLoc &SL, + SDValue Cond, + SDValue N1, + SDValue N2) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N1.getValueType(); + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, + N1.getOperand(0), N2.getOperand(0)); + DCI.AddToWorklist(NewSelect.getNode()); + return DAG.getNode(Op, SL, VT, NewSelect); +} + +// Pull a free FP operation out of a select so it may fold into uses. +// +// select c, (fneg x), (fneg y) -> fneg (select c, x, y) +// select c, (fneg x), k -> fneg (select c, x, (fneg k)) +// +// select c, (fabs x), (fabs y) -> fabs (select c, x, y) +// select c, (fabs x), +k -> fabs (select c, x, k) +static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, + SDValue N) { + SelectionDAG &DAG = DCI.DAG; + SDValue Cond = N.getOperand(0); + SDValue LHS = N.getOperand(1); + SDValue RHS = N.getOperand(2); + + EVT VT = N.getValueType(); + if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || + (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { + return distributeOpThroughSelect(DCI, LHS.getOpcode(), + SDLoc(N), Cond, LHS, RHS); + } + + bool Inv = false; + if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { + std::swap(LHS, RHS); + Inv = true; + } + + // TODO: Support vector constants. + ConstantFPSDNode *CRHS = dyn_cast(RHS); + if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { + SDLoc SL(N); + // If one side is an fneg/fabs and the other is a constant, we can push the + // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. + SDValue NewLHS = LHS.getOperand(0); + SDValue NewRHS = RHS; + + // TODO: Skip for operations where other combines can absord the fneg. + + if (LHS.getOpcode() == ISD::FNEG) + NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else if (CRHS->isNegative()) + return SDValue(); + + if (Inv) + std::swap(NewLHS, NewRHS); + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, + Cond, NewLHS, NewRHS); + DCI.AddToWorklist(NewSelect.getNode()); + return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); + } + + return SDValue(); +} + + SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const { + if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) + return Folded; + SDValue Cond = N->getOperand(0); if (Cond.getOpcode() != ISD::SETCC) return SDValue(); Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -18,7 +18,7 @@ ; VI: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -1.0 ; VI: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_cndmask_b32_e32 -; VI: v_add_f32_e32 +; VI: v_add_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_mul_f32_e32 ; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 define void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 { @@ -113,7 +113,7 @@ ; VI: v_add_f16_e64 v{{[0-9]+}}, s{{[0-9]+}}, -1.0 ; VI: v_cmp_gt_f16_e64 vcc, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_cndmask_b32_e32 -; VI: v_add_f16_e32 +; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_mul_f16_e32 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -0,0 +1,727 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; GCN-LABEL: {{^}}add_select_fabs_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] +define void @add_select_fabs_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fabs.x, float %fabs.y + %add = fadd float %select, %z + store float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] +; GCN: buffer_load_dword [[W:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] +; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]] +define void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %w = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fabs.x, float %fabs.y + %add0 = fadd float %select, %z + %add1 = fadd float %fabs.x, %w + store volatile float %add0, float addrspace(1)* undef + store volatile float %add1, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]] +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] + +; GCN: buffer_store_dword [[ADD]] +; GCN: buffer_store_dword [[X_ABS]] +define void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fabs.x, float %fabs.y + %add0 = fadd float %select, %z + store volatile float %add0, float addrspace(1)* undef + store volatile float %fabs.x, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] +; GCN: buffer_load_dword [[W:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] +; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]] +define void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %w = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fabs.x, float %fabs.y + %add0 = fadd float %select, %z + %add1 = fadd float %fabs.y, %w + store volatile float %add0, float addrspace(1)* undef + store volatile float %add1, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fabs_var_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_fabs_var_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float %fabs.x, float %y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fabs_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] +define void @add_select_fabs_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float %fabs, float -1.0 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; FIXME: fabs should fold away +; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]] +define void @add_select_fabs_negk_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -2.0, float -1.0 + %fabs = call float @llvm.fabs.f32(float %select) + %add = fadd float %fabs, %x + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_posk_posk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]] +define void @add_select_posk_posk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 2.0, float 1.0 + %add = fadd float %select, %x + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negk_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] +; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] +define void @add_select_negk_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float -1.0, float %fabs + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000 + +; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] +; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] +define void @add_select_negliteralk_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float -1024.0, float %fabs + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fabs_posk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc +; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] +define void @add_select_fabs_posk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + + %cmp = icmp eq i32 %c, 0 + %fabs = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float %fabs, float 1.0 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_posk_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc +; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] +define void @add_select_posk_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs = call float @llvm.fabs.f32(float %x) + %select = select i1 %cmp, float 1.0, float %fabs + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] +define void @add_select_fneg_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %fneg.y = fsub float -0.0, %y + %select = select i1 %cmp, float %fneg.x, float %fneg.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] +; GCN: buffer_load_dword [[W:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] +; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[X]], [[W]] +define void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %w = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %fneg.y = fsub float -0.0, %y + %select = select i1 %cmp, float %fneg.x, float %fneg.y + %add0 = fadd float %select, %z + %add1 = fadd float %fneg.x, %w + store volatile float %add0, float addrspace(1)* undef + store volatile float %add1, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]] +; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[SELECT]], [[Z]] + +; GCN: buffer_store_dword [[ADD]] +; GCN: buffer_store_dword [[NEG_X]] +define void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %fneg.y = fsub float -0.0, %y + %select = select i1 %cmp, float %fneg.x, float %fneg.y + %add0 = fadd float %select, %z + store volatile float %add0, float addrspace(1)* undef + store volatile float %fneg.x, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] +; GCN: buffer_load_dword [[W:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc +; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] +; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[Y]], [[W]] +define void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %w = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %fneg.y = fsub float -0.0, %y + %select = select i1 %cmp, float %fneg.x, float %fneg.y + %add0 = fadd float %select, %z + %add1 = fadd float %fneg.y, %w + store volatile float %add0, float addrspace(1)* undef + store volatile float %add1, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_var_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_fneg_var_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float %fneg.x, float %y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_fneg_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float %fneg.x, float -1.0 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_fneg_inv2pi_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: +; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc +; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc + +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_fneg_neginv2pi_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negk_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_cmp_eq_u32_e64 +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]] +define void @add_select_negk_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -2.0, float -1.0 + %add = fadd float %select, %x + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32: +; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000 +; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000 +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_cmp_eq_u32_e64 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]] +define void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -2048.0, float -4096.0 + %add = fadd float %select, %x + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] +define void @add_select_fneg_negk_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -2.0, float -1.0 + %fneg.x = fsub float -0.0, %select + %add = fadd float %fneg.x, %x + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negk_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_negk_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float -1.0, float %fneg.x + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fneg_posk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_fneg_posk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float %fneg.x, float 1.0 + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_posk_fneg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] +define void @add_select_posk_fneg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.0, %x + %select = select i1 %cmp, float 1.0, float %fneg.x + %add = fadd float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]] +; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_negfabs_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]] +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_fabs_negfabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y + %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_neg_fabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]] +; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_neg_fabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.000000e+00, %x + %fabs.y = call float @llvm.fabs.f32(float %y) + %select = select i1 %cmp, float %fneg.x, float %fabs.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_fabs_neg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] +; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc +; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] +define void @add_select_fabs_neg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.y = fsub float -0.000000e+00, %y + %select = select i1 %cmp, float %fabs.x, float %fneg.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_neg_negfabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] +define void @add_select_neg_negfabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fneg.x = fsub float -0.000000e+00, %x + %fabs.y = call float @llvm.fabs.f32(float %y) + %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y + %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}add_select_negfabs_neg_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc +; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] +define void @add_select_negfabs_neg_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %fneg.y = fsub float -0.000000e+00, %y + %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x + %add = fadd float %select, %z + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN-DAG: v_cmp_eq_u32_e64 vcc, +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] +define void @mul_select_negfabs_posk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %select = select i1 %cmp, float %fneg.fabs.x, float 4.0 + %add = fmul float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 +; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] +define void @mul_select_posk_negfabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %select = select i1 %cmp, float 4.0, float %fneg.fabs.x + %add = fmul float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] +define void @mul_select_negfabs_negk_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %select = select i1 %cmp, float %fneg.fabs.x, float -4.0 + %add = fmul float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_cmp_ne_u32_e64 vcc +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] +define void @mul_select_negk_negfabs_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fabs.x = call float @llvm.fabs.f32(float %x) + %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x + %select = select i1 %cmp, float -4.0, float %fneg.fabs.x + %add = fmul float %select, %y + store volatile float %add, float addrspace(1)* undef + ret void +} + +declare float @llvm.fabs.f32(float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }