Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -420,9 +420,10 @@ int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding -int FP32_NEG_ONE = 0xbf800000; int FP32_ONE = 0x3f800000; +int FP32_NEG_ONE = 0xbf800000; int FP64_ONE = 0x3ff0000000000000; +int FP64_NEG_ONE = 0xbff0000000000000; } def CONST : Constants; Index: lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.h +++ lib/Target/AMDGPU/R600ISelLowering.h @@ -72,7 +72,8 @@ SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -122,6 +122,7 @@ setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); @@ -832,13 +833,18 @@ return; case ISD::FP_TO_UINT: if (N->getValueType(0) == MVT::i1) { - Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); return; } // Fall-through. Since we don't care about out of bounds values // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint // considers some extra cases which are not necessary here. case ISD::FP_TO_SINT: { + if (N->getValueType(0) == MVT::i1) { + Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); + return; + } + SDValue Result; if (expandFP_TO_SINT(N, Result, DAG)) Results.push_back(Result); @@ -1052,15 +1058,24 @@ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); } -SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { +SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode( + ISD::SETCC, + DL, + MVT::i1, + Op, DAG.getConstantFP(1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); +} + +SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( ISD::SETCC, DL, MVT::i1, - Op, DAG.getConstantFP(0.0f, DL, MVT::f32), - DAG.getCondCode(ISD::SETNE) - ); + Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); } SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -3391,6 +3391,16 @@ (V_CNDMASK_B32_e64 0, -1, $src), sub1) >; +class FPToI1Pat : Pat < + (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), + (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) +>; + +def : FPToI1Pat; +def : FPToI1Pat; +def : FPToI1Pat; +def : FPToI1Pat; + // If we need to perform a logical operation on i1 values, we need to // use vector comparisons since there is only one SCC register. Vector // comparisions still write to a pair of SGPRs, so treat these as Index: test/CodeGen/AMDGPU/fp_to_sint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; FUNC-LABEL: @fp_to_sint_f64_i32 ; SI: v_cvt_i32_f64_e32 @@ -54,3 +55,23 @@ store i64 %cast, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptosi double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptosi double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/fp_to_sint.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_sint.ll +++ test/CodeGen/AMDGPU/fp_to_sint.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC -declare float @llvm.fabs.f32(float) #0 +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -17,7 +17,7 @@ ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { - %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptosi float %in.fabs to i32 store i32 %conv, i32 addrspace(1)* %out ret void @@ -227,4 +227,26 @@ ret void } -attributes #0 = { nounwind readnone } +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y, +; EG-NEXT: -1082130432(-1.000000e+00) +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptosi float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptosi float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/fp_to_uint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; SI-LABEL: {{^}}fp_to_uint_i32_f64: ; SI: v_cvt_u32_f64_e32 @@ -68,3 +69,23 @@ store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32 ret void } + +; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptoui double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptoui double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/fp_to_uint.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_uint.ll +++ test/CodeGen/AMDGPU/fp_to_uint.ll @@ -1,6 +1,8 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC + +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -215,3 +217,27 @@ store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + + +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0, +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptoui float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptoui float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }