Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -418,6 +418,15 @@ SoftenFloatResult(Op.getNode(), 0); } + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) Op = GetSoftenedFloat(Op); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -198,6 +198,14 @@ if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to @@ -384,6 +392,12 @@ assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -1277,20 +1277,14 @@ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } + // Decide how to handle f16. If the target does not have native f16 support, + // promote it to f32, because there are no f16 library calls (except for + // conversions). if (!isTypeLegal(MVT::f16)) { - // If the target has native f32 support, promote f16 operations to f32. If - // f32 is not supported, generate soft float library calls. - if (isTypeLegal(MVT::f32)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); - } else { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); } // Loop over all of the vector value types to see which need transformations. Index: llvm/trunk/test/CodeGen/ARM/fp16-promote.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fp16-promote.ll +++ llvm/trunk/test/CodeGen/ARM/fp16-promote.ll @@ -1,18 +1,18 @@ -; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL -; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL +; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL +; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP +; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "armv7---eabihf" -; CHECK-FP16-LABEL: test_fadd: +; CHECK-ALL-LABEL: test_fadd: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vadd.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fadd: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vadd.f32 +; CHECK-VFP: vadd.f32 +; CHECK-NOVFP: bl __aeabi_fadd +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fadd(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -22,15 +22,14 @@ ret void } -; CHECK-FP16-LABEL: test_fsub: +; CHECK-ALL-LABEL: test_fsub: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vsub.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fsub: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vsub.f32 +; CHECK-VFP: vsub.f32 +; CHECK-NOVFP: bl __aeabi_fsub +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fsub(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -40,15 +39,14 @@ ret void } -; CHECK-FP16-LABEL: test_fmul: +; CHECK-ALL-LABEL: test_fmul: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vmul.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fmul ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vmul.f32 +; CHECK-VFP: vmul.f32 +; CHECK-NOVFP: bl __aeabi_fmul +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmul(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -58,15 +56,14 @@ ret void } -; CHECK-FP16-LABEL: test_fdiv: +; CHECK-ALL-LABEL: test_fdiv: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vdiv.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fdiv ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vdiv.f32 +; CHECK-VFP: vdiv.f32 +; CHECK-NOVFP: bl __aeabi_fdiv +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fdiv(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -76,15 +73,13 @@ ret void } -; CHECK-FP16-LABEL: test_frem: +; CHECK-ALL-LABEL: test_frem: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl fmodf -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_frem ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl fmodf +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_frem(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -96,9 +91,8 @@ ; CHECK-ALL-LABEL: test_load_store: ; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldrh r0, [r0] -; CHECK-ALL-NEXT: strh r0, [r1] -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] +; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] define void @test_load_store(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 store half %a, half* %q @@ -125,9 +119,12 @@ ; CHECK-ALL-NEXT: .fnstart ; CHECK-ALL-NEXT: .save {r11, lr} ; CHECK-ALL-NEXT: push {r11, lr} -; CHECK-ALL-NEXT: vmov.f32 s2, s0 -; CHECK-ALL-NEXT: vmov.f32 s0, s1 -; CHECK-ALL-NEXT: vmov.f32 s1, s2 +; CHECK-VFP-NEXT: vmov.f32 s2, s0 +; CHECK-VFP-NEXT: vmov.f32 s0, s1 +; CHECK-VFP-NEXT: vmov.f32 s1, s2 +; CHECK-NOVFP-NEXT: mov r2, r0 +; CHECK-NOVFP-NEXT: mov r0, r1 +; CHECK-NOVFP-NEXT: mov r1, r2 ; CHECK-ALL-NEXT: bl test_callee ; CHECK-ALL-NEXT: pop {r11, pc} define half @test_call_flipped(half %a, half %b) #0 { @@ -137,9 +134,12 @@ ; CHECK-ALL-LABEL: test_tailcall_flipped: ; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: vmov.f32 s2, s0 -; CHECK-ALL-NEXT: vmov.f32 s0, s1 -; CHECK-ALL-NEXT: vmov.f32 s1, s2 +; CHECK-VFP-NEXT: vmov.f32 s2, s0 +; CHECK-VFP-NEXT: vmov.f32 s0, s1 +; CHECK-VFP-NEXT: vmov.f32 s1, s2 +; CHECK-NOVFP-NEXT: mov r2, r0 +; CHECK-NOVFP-NEXT: mov r0, r1 +; CHECK-NOVFP-NEXT: mov r1, r2 ; CHECK-ALL-NEXT: b test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { %r = tail call half @test_callee(half %b, half %a) @@ -149,12 +149,10 @@ ; Optimizer picks %p or %q based on %c and only loads that value ; No conversion is needed ; CHECK-ALL-LABEL: test_select: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: cmp r2, #0 -; CHECK-ALL-NEXT: movne r1, r0 -; CHECK-ALL-NEXT: ldrh r1, [r1] -; CHECK-ALL-NEXT: strh r1, [r0] -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: cmp {{r[0-9]+}}, #0 +; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] +; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -165,17 +163,15 @@ ; Test only two variants of fcmp. These get translated to f32 vcmpe ; instructions anyway. -; CHECK-FP16-LABEL: test_fcmp_une: +; CHECK-ALL-LABEL: test_fcmp_une: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: movwne -; CHECK-LIBCALL-LABEL: test_fcmp_une: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: movwne +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmpeq +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-ALL: movw{{ne|eq}} define i1 @test_fcmp_une(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -183,18 +179,15 @@ ret i1 %r } -; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-ALL-LABEL: test_fcmp_ueq: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: movweq -; CHECK-FP16: movwvs -; CHECK-LIBCALL-LABEL: test_fcmp_ueq: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: movweq +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmpeq +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-LIBCALL: movw{{ne|eq}} define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -202,19 +195,18 @@ ret i1 %r } -; CHECK-FP16-LABEL: test_br_cc: +; CHECK-ALL-LABEL: test_br_cc: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: strmi -; CHECK-FP16: strpl -; CHECK-LIBCALL-LABEL: test_br_cc: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: strmi -; CHECK-LIBCALL: strpl +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmplt +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-VFP: strmi +; CHECK-VFP: strpl +; CHECK-NOVFP: strne +; CHECK-NOVFP: streq define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -229,20 +221,19 @@ } declare i1 @test_dummy(half* %p) #0 -; CHECK-FP16-LABEL: test_phi: +; CHECK-ALL-LABEL: test_phi: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: bl test_dummy ; CHECK-FP16: bne [[LOOP]] ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_phi: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: bl __aeabi_h2f ; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: bl __aeabi_h2f ; CHECK-LIBCALL: bl test_dummy ; CHECK-LIBCALL: bne [[LOOP]] -; CHECK-LIBCALL: bl __aeabi_f2h +; CHECK-LIBCALL-VFP: bl __aeabi_f2h define void @test_phi(half* %p) #0 { entry: %a = load half, half* %p @@ -257,59 +248,52 @@ ret void } -; CHECK-FP16-LABEL: test_fptosi_i32: +; CHECK-ALL-LABEL: test_fptosi_i32: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.s32.f32 -; CHECK-LIBCALL-LABEL: test_fptosi_i32: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.s32.f32 +; CHECK-VFP: vcvt.s32.f32 +; CHECK-NOVFP: bl __aeabi_f2iz define i32 @test_fptosi_i32(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptosi half %a to i32 ret i32 %r } -; CHECK-FP16-LABEL: test_fptosi_i64: +; CHECK-ALL-LABEL: test_fptosi_i64: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl __aeabi_f2lz -; CHECK-LIBCALL-LABEL: test_fptosi_i64: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_f2lz +; CHECK-ALL: bl __aeabi_f2lz define i64 @test_fptosi_i64(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptosi half %a to i64 ret i64 %r } -; CHECK-FP16-LABEL: test_fptoui_i32: +; CHECK-ALL-LABEL: test_fptoui_i32: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.u32.f32 -; CHECK-LIBCALL-LABEL: test_fptoui_i32: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.u32.f32 +; CHECK-VFP: vcvt.u32.f32 +; CHECK-NOVFP: bl __aeabi_f2uiz define i32 @test_fptoui_i32(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptoui half %a to i32 ret i32 %r } -; CHECK-FP16-LABEL: test_fptoui_i64: +; CHECK-ALL-LABEL: test_fptoui_i64: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl __aeabi_f2ulz -; CHECK-LIBCALL-LABEL: test_fptoui_i64: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_f2ulz +; CHECK-ALL: bl __aeabi_f2ulz define i64 @test_fptoui_i64(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptoui half %a to i64 ret i64 %r } -; CHECK-FP16-LABEL: test_sitofp_i32: -; CHECK-FP16: vcvt.f32.s32 +; CHECK-ALL-LABEL: test_sitofp_i32: +; CHECK-VFP: vcvt.f32.s32 +; CHECK-NOVFP: bl __aeabi_i2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sitofp_i32: -; CHECK-LIBCALL: vcvt.f32.s32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sitofp_i32(i32 %a, half* %p) #0 { %r = sitofp i32 %a to half @@ -317,11 +301,10 @@ ret void } -; CHECK-FP16-LABEL: test_uitofp_i32: -; CHECK-FP16: vcvt.f32.u32 +; CHECK-ALL-LABEL: test_uitofp_i32: +; CHECK-VFP: vcvt.f32.u32 +; CHECK-NOVFP: bl __aeabi_ui2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_uitofp_i32: -; CHECK-LIBCALL: vcvt.f32.u32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_uitofp_i32(i32 %a, half* %p) #0 { %r = uitofp i32 %a to half @@ -329,11 +312,9 @@ ret void } -; CHECK-FP16-LABEL: test_sitofp_i64: -; CHECK-FP16: bl __aeabi_l2f +; CHECK-ALL-LABEL: test_sitofp_i64: +; CHECK-ALL: bl __aeabi_l2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sitofp_i64: -; CHECK-LIBCALL: bl __aeabi_l2f ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sitofp_i64(i64 %a, half* %p) #0 { %r = sitofp i64 %a to half @@ -341,11 +322,9 @@ ret void } -; CHECK-FP16-LABEL: test_uitofp_i64: -; CHECK-FP16: bl __aeabi_ul2f +; CHECK-ALL-LABEL: test_uitofp_i64: +; CHECK-ALL: bl __aeabi_ul2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_uitofp_i64: -; CHECK-LIBCALL: bl __aeabi_ul2f ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_uitofp_i64(i64 %a, half* %p) #0 { %r = uitofp i64 %a to half @@ -385,10 +364,10 @@ ; CHECK-FP16-LABEL: test_fpextend_double: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.f64.f32 ; CHECK-LIBCALL-LABEL: test_fpextend_double: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.f64.f32 +; CHECK-VFP: vcvt.f64.f32 +; CHECK-NOVFP: bl __aeabi_f2d define double @test_fpextend_double(half* %p) { %a = load half, half* %p, align 2 %r = fpext half %a to double @@ -438,13 +417,13 @@ declare half @llvm.round.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 -; CHECK-FP16-LABEL: test_sqrt: +; CHECK-ALL-LABEL: test_sqrt: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vsqrt.f32 ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sqrt: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vsqrt.f32 +; CHECK-VFP-LIBCALL: vsqrt.f32 +; CHECK-NOVFP: bl sqrtf ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sqrt(half* %p) #0 { %a = load half, half* %p, align 2 @@ -671,7 +650,10 @@ ; CHECK-LIBCALL-LABEL: test_copysign: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vbsl +; CHECK-VFP-LIBCALL: vbsl +; CHECK-NOVFP: bfc +; CHECK-NOVFP: and +; CHECK-NOVFP: orr ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_copysign(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -781,7 +763,8 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vmla.f32 +; CHECK-VFP-LIBCALL: vmla.f32 +; CHECK-NOVFP: bl __aeabi_fmul ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { %a = load half, half* %p, align 2 @@ -797,31 +780,28 @@ ; and extractelement have these extra loads and stores. ; CHECK-ALL-LABEL: test_insertelement: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: .pad #8 -; CHECK-ALL-NEXT: sub sp, sp, #8 -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: mov -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: add -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: add sp, sp, #8 -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: sub sp, sp, #8 +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: mov +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: add +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: add sp, sp, #8 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { %a = load half, half* %p, align 2 %b = load <4 x half>, <4 x half>* %q, align 8 @@ -831,23 +811,30 @@ } ; CHECK-ALL-LABEL: test_extractelement: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: .pad #8 -; CHECK-ALL-NEXT: sub sp, sp, #8 -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: orr -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: orr -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: mov -; CHECK-ALL-NEXT: add -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: add sp, sp, #8 -; CHECK-ALL-NEXT: bx lr +; CHECK-VFP: sub sp, sp, #8 +; CHECK-VFP: ldrh +; CHECK-VFP: ldrh +; CHECK-VFP: orr +; CHECK-VFP: str +; CHECK-VFP: ldrh +; CHECK-VFP: ldrh +; CHECK-VFP: orr +; CHECK-VFP: str +; CHECK-VFP: mov +; CHECK-VFP: add +; CHECK-VFP: ldrh +; CHECK-VFP: strh +; CHECK-VFP: add sp, sp, #8 +; CHECK-VFP: bx lr +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 { %a = load <4 x half>, <4 x half>* %q, align 8 %b = extractelement <4 x half> %a, i32 %i @@ -860,12 +847,10 @@ %struct.dummy = type { i32, half } ; CHECK-ALL-LABEL: test_insertvalue: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldr -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL-DAG: ldr +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: str define void @test_insertvalue(%struct.dummy* %p, half* %q) { %a = load %struct.dummy, %struct.dummy* %p %b = load half, half* %q @@ -875,10 +860,9 @@ } ; CHECK-ALL-LABEL: test_extractvalue: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: .fnstart +; CHECK-ALL: ldrh +; CHECK-ALL: strh define void @test_extractvalue(%struct.dummy* %p, half* %q) { %a = load %struct.dummy, %struct.dummy* %p %b = extractvalue %struct.dummy %a, 1 @@ -886,10 +870,11 @@ ret void } -; CHECK-FP16-LABEL: test_struct_return: +; CHECK-ALL-LABEL: test_struct_return: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-LIBCALL-LABEL: test_struct_return: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-VFP-LIBCALL: bl __aeabi_h2f +; CHECK-NOVFP-DAG: ldr +; CHECK-NOVFP-DAG: ldrh define %struct.dummy @test_struct_return(%struct.dummy* %p) { %a = load %struct.dummy, %struct.dummy* %p ret %struct.dummy %a @@ -897,6 +882,7 @@ ; CHECK-ALL-LABEL: test_struct_arg: ; CHECK-ALL-NEXT: .fnstart +; CHECK-NOVFP-NEXT: mov r0, r1 ; CHECK-ALL-NEXT: bx lr define half @test_struct_arg(%struct.dummy %p) { %a = extractvalue %struct.dummy %p, 1