Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -53,9 +53,6 @@ const SDLoc &DL, EVT VT) const; - /// \brief Custom lowering for ISD::ConstantFP. - SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; - /// \brief Custom lowering for ISD::FP_ROUND for MVT::f16. SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -276,7 +276,7 @@ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); // F16 - Constant Actions. - setOperationAction(ISD::ConstantFP, MVT::f16, Custom); + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); // F16 - Load/Store Actions. setOperationAction(ISD::LOAD, MVT::f16, Promote); @@ -1840,9 +1840,6 @@ case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG); case ISD::TRAP: return lowerTRAP(Op, DAG); - - case ISD::ConstantFP: - return lowerConstantFP(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); } @@ -2047,15 +2044,6 @@ DAG.getNode(ISD::FTRUNC, DL, VT, Op); } -SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { - if (ConstantFPSDNode *FP = dyn_cast(Op)) { - return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(), - SDLoc(Op), MVT::i32); - } - - return SDValue(); -} - SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f16 && "Do not know how to custom lower FP_ROUND for non-f16 type"); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -712,12 +712,25 @@ (S_MOV_B32 imm:$imm) >; +// FIXME: Workaround for ordering issue with peephole optimizer where +// a register class copy interferes with immediate folding. Should +// use s_mov_b32, which can be shrunk to s_movk_i32 +def : Pat < + (VGPRImm<(f16 fpimm)>:$imm), + (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) +>; + def : Pat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < + (f16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) +>; + +def : Pat < (i32 frameindex:$fi), (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) >; Index: test/CodeGen/AMDGPU/br_cc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/br_cc.f16.ll +++ test/CodeGen/AMDGPU/br_cc.f16.ll @@ -46,7 +46,7 @@ ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] -; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}} @@ -82,7 +82,7 @@ ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nge_f16_e32 vcc, v[[B_F16]], v[[A_F16]] +; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}}