Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -166,6 +166,23 @@ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -111,6 +111,15 @@ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f16; +} +def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f16; +} multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -3252,6 +3261,16 @@ def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), Index: test/CodeGen/PowerPC/handle-f16-storage-type.ll =================================================================== --- test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=P8 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s +; Function Attrs: nounwind readonly +define dso_local double @loadd(i16* nocapture readonly %a) local_unnamed_addr #0 { +; P8-LABEL: loadd: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: .cfi_def_cfa_offset 32 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, i16* %a, i64 1 + %0 = load i16, i16* %arrayidx, align 2 + %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) + ret double %1 +} + +; Function Attrs: nounwind readnone willreturn +declare double @llvm.convert.from.fp16.f64(i16) #1 + +; Function Attrs: nounwind readonly +define dso_local float @loadf(i16* nocapture readonly %a) local_unnamed_addr #0 { +; P8-LABEL: loadf: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: .cfi_def_cfa_offset 32 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, i16* %a, i64 1 + %0 = load i16, i16* %arrayidx, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +; Function Attrs: nounwind readnone willreturn +declare float @llvm.convert.from.fp16.f32(i16) #1 + +; Function Attrs: nofree nounwind writeonly +define dso_local void @stored(i16* nocapture %a, double %b) local_unnamed_addr #2 { +; P8-LABEL: stored: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: .cfi_def_cfa_offset 48 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset r30, -16 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: blr +; +; CHECK-LABEL: stored: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) + store i16 %0, i16* %a, align 2 + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i16 @llvm.convert.to.fp16.f64(double) #1 + +; Function Attrs: nofree nounwind writeonly +define dso_local void @storef(i16* nocapture %a, float %b) local_unnamed_addr #2 { +; P8-LABEL: storef: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: .cfi_def_cfa_offset 48 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset r30, -16 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: blr +; +; CHECK-LABEL: storef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) + store i16 %0, i16* %a, align 2 + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i16 @llvm.convert.to.fp16.f32(float) #1