Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -230,6 +230,7 @@ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; @@ -2285,6 +2286,15 @@ return isNoNanSrc(Src); } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + if (In.getValueType() == MVT::f32) + return SelectVOP3Mods(In, Src, SrcMods); + Src = In; + SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);; + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) return false; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1140,6 +1140,8 @@ def VOP3NoMods : ComplexPattern; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern; +// VOP3Mods, but only allowed for f32 operands. +def VOP3Mods_f32 : ComplexPattern; def VOP3OMods : ComplexPattern; Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -791,8 +791,8 @@ multiclass SelectPat { def : GCNPat < - (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods), - (VOP3Mods vt:$src2, i32:$src2_mods))), + (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods), + (VOP3Mods_f32 vt:$src2, i32:$src2_mods))), (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0) >; } Index: llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll +++ llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll @@ -3,6 +3,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare half @llvm.fabs.f16(half) +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr: ; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0 @@ -416,5 +419,50 @@ ret void } +; Source modifiers abs/neg only work for f32 + +; GCN-LABEL: {{^}}v_cndmask_abs_neg_f16: +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, +define amdgpu_kernel void @v_cndmask_abs_neg_f16(half addrspace(1)* %out, i32 %c, half addrspace(1)* %fptr) #0 { + %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 + %f.gep = getelementptr half, half addrspace(1)* %fptr, i32 %idx + %f = load half, half addrspace(1)* %f.gep + %f.abs = call half @llvm.fabs.f16(half %f) + %f.neg = fneg half %f + %setcc = icmp ne i32 %c, 0 + %select = select i1 %setcc, half %f.abs, half %f.neg + store half %select, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_cndmask_abs_neg_f32: +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, |v{{[0-9]+}}|, +define amdgpu_kernel void @v_cndmask_abs_neg_f32(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { + %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 + %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx + %f = load float, float addrspace(1)* %f.gep + %f.abs = call float @llvm.fabs.f32(float %f) + %f.neg = fneg float %f + %setcc = icmp ne i32 %c, 0 + %select = select i1 %setcc, float %f.abs, float %f.neg + store float %select, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_cndmask_abs_neg_f64: +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, +define amdgpu_kernel void @v_cndmask_abs_neg_f64(double addrspace(1)* %out, i32 %c, double addrspace(1)* %fptr) #0 { + %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 + %f.gep = getelementptr double, double addrspace(1)* %fptr, i32 %idx + %f = load double, double addrspace(1)* %f.gep + %f.abs = call double @llvm.fabs.f64(double %f) + %f.neg = fneg double %f + %setcc = icmp ne i32 %c, 0 + %select = select i1 %setcc, double %f.abs, double %f.neg + store double %select, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone }