Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -1828,6 +1828,12 @@ return IsSigned; } + /// Returns true if arguments should be extended in lib calls. + virtual bool shouldExtendTypeInLibCall(EVT Type, bool IsExtended, + bool IsCastFromFloat) const { + return IsExtended; + } + /// Returns how the given (atomic) load should be expanded by the /// IR-level AtomicExpand pass. virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { @@ -3022,8 +3028,9 @@ /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. std::pair makeLibCall( SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef Ops, - bool isSigned, const SDLoc &dl, bool doesNotReturn = false, - bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; + bool isSigned, const SDLoc &dl, bool isCastFromFloat = false, + bool doesNotReturn = false, bool isReturnValueUsed = true, + bool isPostTypeLegalization = false) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -232,7 +232,7 @@ RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N), true).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -419,7 +419,7 @@ RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N), true).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -616,7 +616,7 @@ RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N), true).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2602,7 +2602,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, false, getCurSDLoc(), false, false).second; + None, false, getCurSDLoc(), false, false, false).second; // On PS4, the "return address" must still be within the calling function, // even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -122,8 +122,8 @@ std::pair TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef Ops, bool isSigned, - const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed, + const SDLoc &dl, bool isCastFromFloat, + bool doesNotReturn, bool isReturnValueUsed, bool isPostTypeLegalization) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -134,6 +134,11 @@ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + + Entry.IsSExt = shouldExtendTypeInLibCall(Op.getValueType(), Entry.IsSExt, + isCastFromFloat); + Entry.IsZExt = shouldExtendTypeInLibCall(Op.getValueType(), Entry.IsZExt, + isCastFromFloat); Args.push_back(Entry); } @@ -145,6 +150,11 @@ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool zeroExtend = !signExtend; + + signExtend = shouldExtendTypeInLibCall(RetVT, signExtend, isCastFromFloat); + zeroExtend = shouldExtendTypeInLibCall(RetVT, zeroExtend, isCastFromFloat); + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) @@ -152,7 +162,7 @@ .setDiscardResult(!isReturnValueUsed) .setIsPostTypeLegalization(isPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -6576,12 +6586,14 @@ // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, + /* isCastFromFloat */ false, /* doesNotReturn */ false, + /* isReturnValueUsed */ true, /* isPostTypeLegalization */ true).first; } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, + /* isCastFromFloat */ false, /* doesNotReturn */ false, + /* isReturnValueUsed */ true, /* isPostTypeLegalization */ true).first; } assert(Ret.getOpcode() == ISD::MERGE_VALUES && Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -141,6 +141,9 @@ unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + bool shouldExtendTypeInLibCall(EVT Type, bool IsExtended, + bool IsCastFromFloat) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -2643,3 +2643,13 @@ const Constant *PersonalityFn) const { return RISCV::X11; } + +bool +RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type, bool IsExtended, + bool IsCastFromFloat) const { + RISCVABI::ABI ABI = Subtarget.getTargetABI(); + if ((ABI == RISCVABI::ABI_LP64) && (Type == MVT::i32) && IsCastFromFloat) + return false; + + return IsExtended; +} Index: test/CodeGen/RISCV/rv32i-rv64i-float-double.ll =================================================================== --- test/CodeGen/RISCV/rv32i-rv64i-float-double.ll +++ test/CodeGen/RISCV/rv32i-rv64i-float-double.ll @@ -31,13 +31,12 @@ ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) ; RV64IF-NEXT: sd s0, 0(sp) +; RV64IF-NEXT: mv s0, a1 +; RV64IF-NEXT: call __addsf3 ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: slli a1, a1, 32 -; RV64IF-NEXT: srli s0, a1, 32 -; RV64IF-NEXT: mv a1, s0 -; RV64IF-NEXT: call __addsf3 -; RV64IF-NEXT: mv a1, s0 +; RV64IF-NEXT: slli a1, s0, 32 +; RV64IF-NEXT: srli a1, a1, 32 ; RV64IF-NEXT: call __divsf3 ; RV64IF-NEXT: ld s0, 0(sp) ; RV64IF-NEXT: ld ra, 8(sp) Index: test/CodeGen/RISCV/rv64-complex-float.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/rv64-complex-float.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s + +define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind { +; CHECK-LABEL: complex_float_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) +; CHECK-NEXT: sd s0, 16(sp) +; CHECK-NEXT: sd s1, 8(sp) +; CHECK-NEXT: sd s2, 0(sp) +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: call __addsf3 +; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: srli a0, s1, 32 +; CHECK-NEXT: srli a1, s0, 32 +; CHECK-NEXT: call __addsf3 +; CHECK-NEXT: slli a1, s2, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ld s2, 0(sp) +; CHECK-NEXT: ld s1, 8(sp) +; CHECK-NEXT: ld s0, 16(sp) +; CHECK-NEXT: ld ra, 24(sp) +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %a.sroa.0.0.extract.trunc = trunc i64 %a.coerce to i32 + %0 = bitcast i32 %a.sroa.0.0.extract.trunc to float + %a.sroa.2.0.extract.shift = lshr i64 %a.coerce, 32 + %a.sroa.2.0.extract.trunc = trunc i64 %a.sroa.2.0.extract.shift to i32 + %1 = bitcast i32 %a.sroa.2.0.extract.trunc to float + %b.sroa.0.0.extract.trunc = trunc i64 %b.coerce to i32 + %2 = bitcast i32 %b.sroa.0.0.extract.trunc to float + %b.sroa.2.0.extract.shift = lshr i64 %b.coerce, 32 + %b.sroa.2.0.extract.trunc = trunc i64 %b.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %b.sroa.2.0.extract.trunc to float + %add.r = fadd float %0, %2 + %add.i = fadd float %1, %3 + %4 = bitcast float %add.r to i32 + %5 = bitcast float %add.i to i32 + %retval.sroa.2.0.insert.ext = zext i32 %5 to i64 + %retval.sroa.2.0.insert.shift = shl nuw i64 %retval.sroa.2.0.insert.ext, 32 + %retval.sroa.0.0.insert.ext = zext i32 %4 to i64 + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.shift, %retval.sroa.0.0.insert.ext + ret i64 %retval.sroa.0.0.insert.insert +} + +define i64 @complex_float_sub(i64 %a.coerce, i64 %b.coerce) nounwind { +; CHECK-LABEL: complex_float_sub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) +; CHECK-NEXT: sd s0, 16(sp) +; CHECK-NEXT: sd s1, 8(sp) +; CHECK-NEXT: sd s2, 0(sp) +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: call __subsf3 +; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: srli a0, s1, 32 +; CHECK-NEXT: srli a1, s0, 32 +; CHECK-NEXT: call __subsf3 +; CHECK-NEXT: slli a1, s2, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ld s2, 0(sp) +; CHECK-NEXT: ld s1, 8(sp) +; CHECK-NEXT: ld s0, 16(sp) +; CHECK-NEXT: ld ra, 24(sp) +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %a.sroa.0.0.extract.trunc = trunc i64 %a.coerce to i32 + %0 = bitcast i32 %a.sroa.0.0.extract.trunc to float + %a.sroa.2.0.extract.shift = lshr i64 %a.coerce, 32 + %a.sroa.2.0.extract.trunc = trunc i64 %a.sroa.2.0.extract.shift to i32 + %1 = bitcast i32 %a.sroa.2.0.extract.trunc to float + %b.sroa.0.0.extract.trunc = trunc i64 %b.coerce to i32 + %2 = bitcast i32 %b.sroa.0.0.extract.trunc to float + %b.sroa.2.0.extract.shift = lshr i64 %b.coerce, 32 + %b.sroa.2.0.extract.trunc = trunc i64 %b.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %b.sroa.2.0.extract.trunc to float + %sub.r = fsub float %0, %2 + %sub.i = fsub float %1, %3 + %4 = bitcast float %sub.r to i32 + %5 = bitcast float %sub.i to i32 + %retval.sroa.2.0.insert.ext = zext i32 %5 to i64 + %retval.sroa.2.0.insert.shift = shl nuw i64 %retval.sroa.2.0.insert.ext, 32 + %retval.sroa.0.0.insert.ext = zext i32 %4 to i64 + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.shift, %retval.sroa.0.0.insert.ext + ret i64 %retval.sroa.0.0.insert.insert +} + +define i64 @complex_float_mul(i64 %a.coerce, i64 %b.coerce) nounwind { +; CHECK-LABEL: complex_float_mul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) +; CHECK-NEXT: sd s0, 48(sp) +; CHECK-NEXT: sd s1, 40(sp) +; CHECK-NEXT: sd s2, 32(sp) +; CHECK-NEXT: sd s3, 24(sp) +; CHECK-NEXT: sd s4, 16(sp) +; CHECK-NEXT: sd s5, 8(sp) +; CHECK-NEXT: mv s4, a1 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: call __mulsf3 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: srli s3, s1, 32 +; CHECK-NEXT: srli s2, s4, 32 +; CHECK-NEXT: mv a0, s3 +; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: call __mulsf3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: call __subsf3 +; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: call __mulsf3 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv a0, s3 +; CHECK-NEXT: mv a1, s4 +; CHECK-NEXT: call __mulsf3 +; CHECK-NEXT: mv a1, s0 +; CHECK-NEXT: call __addsf3 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: slli a0, s5, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: call __unordsf2 +; CHECK-NEXT: beqz a0, .LBB2_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: slli a0, s0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: call __unordsf2 +; CHECK-NEXT: beqz a0, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %complex_mul_libcall +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a1, s3 +; CHECK-NEXT: mv a2, s4 +; CHECK-NEXT: mv a3, s2 +; CHECK-NEXT: call __mulsc3 +; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: srli s0, a0, 32 +; CHECK-NEXT: .LBB2_3: # %complex_mul_cont +; CHECK-NEXT: slli a0, s0, 32 +; CHECK-NEXT: slli a1, s5, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ld s5, 8(sp) +; CHECK-NEXT: ld s4, 16(sp) +; CHECK-NEXT: ld s3, 24(sp) +; CHECK-NEXT: ld s2, 32(sp) +; CHECK-NEXT: ld s1, 40(sp) +; CHECK-NEXT: ld s0, 48(sp) +; CHECK-NEXT: ld ra, 56(sp) +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret +entry: + %a.sroa.0.0.extract.trunc = trunc i64 %a.coerce to i32 + %0 = bitcast i32 %a.sroa.0.0.extract.trunc to float + %a.sroa.2.0.extract.shift = lshr i64 %a.coerce, 32 + %a.sroa.2.0.extract.trunc = trunc i64 %a.sroa.2.0.extract.shift to i32 + %1 = bitcast i32 %a.sroa.2.0.extract.trunc to float + %b.sroa.0.0.extract.trunc = trunc i64 %b.coerce to i32 + %2 = bitcast i32 %b.sroa.0.0.extract.trunc to float + %b.sroa.2.0.extract.shift = lshr i64 %b.coerce, 32 + %b.sroa.2.0.extract.trunc = trunc i64 %b.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %b.sroa.2.0.extract.trunc to float + %mul_ac = fmul float %0, %2 + %mul_bd = fmul float %1, %3 + %mul_ad = fmul float %0, %3 + %mul_bc = fmul float %1, %2 + %mul_r = fsub float %mul_ac, %mul_bd + %mul_i = fadd float %mul_bc, %mul_ad + %isnan_cmp = fcmp uno float %mul_r, 0.000000e+00 + %isnan_cmp1 = fcmp uno float %mul_i, 0.000000e+00 + %or.cond = and i1 %isnan_cmp, %isnan_cmp1 + br i1 %or.cond, label %complex_mul_libcall, label %complex_mul_cont + +complex_mul_libcall: ; preds = %entry + %call = tail call i64 @__mulsc3(float %0, float %1, float %2, float %3) #2 + %coerce.sroa.0.0.extract.trunc = trunc i64 %call to i32 + %4 = bitcast i32 %coerce.sroa.0.0.extract.trunc to float + %coerce.sroa.2.0.extract.shift = lshr i64 %call, 32 + %coerce.sroa.2.0.extract.trunc = trunc i64 %coerce.sroa.2.0.extract.shift to i32 + %5 = bitcast i32 %coerce.sroa.2.0.extract.trunc to float + br label %complex_mul_cont + +complex_mul_cont: ; preds = %complex_mul_libcall, %entry + %real_mul_phi = phi float [ %mul_r, %entry ], [ %4, %complex_mul_libcall ] + %imag_mul_phi = phi float [ %mul_i, %entry ], [ %5, %complex_mul_libcall ] + %6 = bitcast float %real_mul_phi to i32 + %7 = bitcast float %imag_mul_phi to i32 + %retval.sroa.2.0.insert.ext = zext i32 %7 to i64 + %retval.sroa.2.0.insert.shift = shl nuw i64 %retval.sroa.2.0.insert.ext, 32 + %retval.sroa.0.0.insert.ext = zext i32 %6 to i64 + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.shift, %retval.sroa.0.0.insert.ext + ret i64 %retval.sroa.0.0.insert.insert +} + +declare i64 @__mulsc3(float, float, float, float) local_unnamed_addr