diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -945,6 +945,10 @@ let IsLoad = 1; let MemoryVT = i32; } +def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f16; +} def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = f32; @@ -1080,6 +1084,11 @@ let IsStore = 1; let MemoryVT = i32; } +def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f16; +} def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = 1; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,16 @@ def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -0,0 +1,200 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=P8 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=SOFT + +define dso_local double @loadd(i16* nocapture readonly %a) local_unnamed_addr #0 { +; P8-LABEL: loadd: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadd: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, i16* %a, i64 1 + %0 = load i16, i16* %arrayidx, align 2 + %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) + ret double %1 +} + +declare double @llvm.convert.from.fp16.f64(i16) + +define dso_local float @loadf(i16* nocapture readonly %a) local_unnamed_addr #0 { +; P8-LABEL: loadf: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 2(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: loadf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi r3, r3, 2 +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: loadf: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 2(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i16, i16* %a, i64 1 + %0 = load i16, i16* %arrayidx, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +declare float @llvm.convert.from.fp16.f32(i16) + +define dso_local void @stored(i16* nocapture %a, double %b) local_unnamed_addr #0 { +; P8-LABEL: stored: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: stored: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: stored: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: mr r3, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b) + store i16 %0, i16* %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f64(double) + +define dso_local void @storef(i16* nocapture %a, float %b) local_unnamed_addr #0 { +; P8-LABEL: storef: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: storef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r3 +; CHECK-NEXT: blr +; +; SOFT-LABEL: storef: +; SOFT: # %bb.0: # %entry +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: clrldi r3, r4, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b) + store i16 %0, i16* %a, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16.f32(float) +attributes #0 = { nounwind }