Index: include/llvm/CodeGen/ValueTypes.h =================================================================== --- include/llvm/CodeGen/ValueTypes.h +++ include/llvm/CodeGen/ValueTypes.h @@ -89,6 +89,16 @@ return VecTy; } + EVT changeTypeToInteger() { + if (isVector()) + return changeVectorElementTypeToInteger(); + + if (isSimple()) + return MVT::getIntegerVT(getSizeInBits()); + + return changeExtendedTypeToInteger(); + } + /// isSimple - Test if the given EVT is simple (as opposed to being /// extended). bool isSimple() const { @@ -342,6 +352,7 @@ // Methods for handling the Extended-type case in functions above. // These are all out-of-line to prevent users of this header file // from having a dependency on Type.h. + EVT changeExtendedTypeToInteger() const; EVT changeExtendedVectorElementTypeToInteger() const; static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1095,7 +1095,8 @@ break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); @@ -1114,6 +1115,23 @@ Chain = Load.getValue(1); break; } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } } assert(!SrcVT.isVector() && Index: lib/IR/ValueTypes.cpp =================================================================== --- lib/IR/ValueTypes.cpp +++ lib/IR/ValueTypes.cpp @@ -19,6 +19,11 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +EVT EVT::changeExtendedTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + return getIntegerVT(Context, getSizeInBits()); +} + EVT EVT::changeExtendedVectorElementTypeToInteger() const { LLVMContext &Context = LLVMTy->getContext(); EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits()); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -461,12 +461,6 @@ return TII->isInlineConstant(Imm); } -static EVT toIntegerVT(EVT VT) { - if (VT.isVector()) - return VT.changeVectorElementTypeToInteger(); - return MVT::getIntegerVT(VT.getSizeInBits()); -} - SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -490,30 +484,10 @@ unsigned Align = DL.getABITypeAlignment(Ty); - if (VT != MemVT && VT.isFloatingPoint()) { - // Do an integer load and convert. - // FIXME: This is mostly because load legalization after type legalization - // doesn't handle FP extloads. - assert(VT.getScalarType() == MVT::f32 && - MemVT.getScalarType() == MVT::f16); - - EVT IVT = toIntegerVT(VT); - EVT MemIVT = toIntegerVT(MemVT); - SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, - IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, - false, // isVolatile - true, // isNonTemporal - true, // isInvariant - Align); // Alignment - SDValue Ops[] = { - DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load), - Load.getValue(1) - }; - - return DAG.getMergeValues(Ops, SL); - } - ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (MemVT.isFloatingPoint()) + ExtTy = ISD::EXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile Index: test/CodeGen/AMDGPU/half.ll =================================================================== --- test/CodeGen/AMDGPU/half.ll +++ test/CodeGen/AMDGPU/half.ll @@ -112,12 +112,24 @@ } ; GCN-LABEL: {{^}}extload_f16_to_f64_arg: +; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} +; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} +; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] +; GCN: buffer_store_dwordx2 [[RESULT]] define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { %ext = fpext half %arg to double store double %ext, double addrspace(1)* %out ret void } + ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { %ext = fpext <2 x half> %arg to <2 x double> store <2 x double> %ext, <2 x double> addrspace(1)* %out @@ -125,6 +137,16 @@ } ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { %ext = fpext <3 x half> %arg to <3 x double> store <3 x double> %ext, <3 x double> addrspace(1)* %out @@ -132,6 +154,19 @@ } ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { %ext = fpext <4 x half> %arg to <4 x double> store <4 x double> %ext, <4 x double> addrspace(1)* %out @@ -139,6 +174,37 @@ } ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN: s_endpgm define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { %ext = fpext <8 x half> %arg to <8 x double> store <8 x double> %ext, <8 x double> addrspace(1)* %out @@ -194,6 +260,12 @@ } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x float> @@ -246,6 +318,15 @@ } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]] +; GCN-DAG: buffer_store_dwordx2 [[CVT2]] +; GCN-DAG: buffer_store_dwordx2 [[CVT3]] +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x double>