diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -44,6 +44,7 @@ FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ()) +FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -494,6 +494,9 @@ // Expands large div/rem instructions. FunctionPass *createExpandLargeDivRemPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandLargeFpConvertPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1955,6 +1955,12 @@ return MaxDivRemBitWidthSupported; } + /// Returns the size in bits of the maximum larget fp convert the backend + /// supports. Larger operations will be expanded by ExpandLargeFPConvert. + unsigned getMaxLargeFPConvertBitWidthSupported() const { + return MaxLargeFPConvertBitWidthSupported; + } + /// Returns the size of the smallest cmpxchg or ll/sc instruction /// the backend supports. Any smaller operations are widened in /// AtomicExpandPass. @@ -2526,6 +2532,12 @@ MaxDivRemBitWidthSupported = SizeInBits; } + /// Set the size in bits of the maximum fp convert the backend supports. + /// Larger operations will be expanded by ExpandLargeFPConvert. + void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { + MaxLargeFPConvertBitWidthSupported = SizeInBits; + } + /// Sets the minimum cmpxchg or ll/sc size supported by the backend. void setMinCmpXchgSizeInBits(unsigned SizeInBits) { MinCmpXchgSizeInBits = SizeInBits; @@ -3245,6 +3257,10 @@ /// Larger operations will be expanded by ExpandLargeDivRem. unsigned MaxDivRemBitWidthSupported; + /// Size in bits of the maximum larget fp convert size the backend + /// supports. Larger operations will be expanded by ExpandLargeFPConvert. + unsigned MaxLargeFPConvertBitWidthSupported; + /// Size in bits of the minimum cmpxchg or ll/sc operation the /// backend supports. unsigned MinCmpXchgSizeInBits; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -134,6 +134,7 @@ void initializeEdgeBundlesPass(PassRegistry&); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); +void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&); void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -54,6 +54,7 @@ EHContGuardCatchret.cpp ExecutionDomainFix.cpp ExpandLargeDivRem.cpp + ExpandLargeFpConvert.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -37,6 +37,7 @@ initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -0,0 +1,683 @@ +//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, +// ‘sitofp .. to’ instructions with a bitwidth above a threshold into +// auto-generated functions. This is useful for targets like x86_64 that cannot +// lower fp convertions with more than 128 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include + +using namespace llvm; + +static cl::opt + ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, + cl::init(llvm::IntegerType::MAX_INT_BITS), + cl::desc("fp convert instructions on integers with " + "more than bits are expanded.")); + +/// Generate code to convert a fp number to integer, replacing FPToS(U)I with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations. +/// +/// An example IR generated from compiler-rt/fixsfdi.c looks like below: +/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 { +/// entry: +/// %0 = bitcast float %a to i32 +/// %conv.i = zext i32 %0 to i64 +/// %tobool.not = icmp sgt i32 %0, -1 +/// %conv = select i1 %tobool.not, i64 1, i64 -1 +/// %and = lshr i64 %conv.i, 23 +/// %shr = and i64 %and, 255 +/// %and2 = and i64 %conv.i, 8388607 +/// %or = or i64 %and2, 8388608 +/// %cmp = icmp ult i64 %shr, 127 +/// br i1 %cmp, label %cleanup, label %if.end +/// +/// if.end: ; preds = %entry +/// %sub = add nuw nsw i64 %shr, 4294967169 +/// %conv5 = and i64 %sub, 4294967232 +/// %cmp6.not = icmp eq i64 %conv5, 0 +/// br i1 %cmp6.not, label %if.end12, label %if.then8 +/// +/// if.then8: ; preds = %if.end +/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808 +/// br label %cleanup +/// +/// if.end12: ; preds = %if.end +/// %cmp13 = icmp ult i64 %shr, 150 +/// br i1 %cmp13, label %if.then15, label %if.else +/// +/// if.then15: ; preds = %if.end12 +/// %sub16 = sub nuw nsw i64 150, %shr +/// %shr17 = lshr i64 %or, %sub16 +/// %mul = mul nsw i64 %shr17, %conv +/// br label %cleanup +/// +/// if.else: ; preds = %if.end12 +/// %sub18 = add nsw i64 %shr, -150 +/// %shl = shl i64 %or, %sub18 +/// %mul19 = mul nsw i64 %shl, %conv +/// br label %cleanup +/// +/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8 +/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ] +/// ret i64 %retval.0 +/// } +/// +/// Replace fp to integer with generated code. +static bool expandFPToI(Instruction *FPToI) { + IRBuilder<> Builder(FPToI); + auto *FloatVal = FPToI->getOperand(0); + IntegerType *IntTy = cast(FPToI->getType()); + + unsigned BitWidth = FPToI->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1; + + // FIXME: As there is no related builtins added in compliler-rt, + // here currently utilized the fp32 <-> fp16 lib calls to implement. + Value *a1 = nullptr; + if (FPMantissaWidth == 10) { + if (FPToI->getOpcode() == Instruction::FPToUI) { + Value *a0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32)); + a1 = Builder.CreateZExt(a0, IntTy); + } else { + Value *a0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32)); + a1 = Builder.CreateSExt(a0, IntTy); + } + FPToI->replaceAllUsesWith(a1); + FPToI->dropAllReferences(); + FPToI->eraseFromParent(); + return true; + } + + // Deal with "x86_fp80" situation. + FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; + unsigned FloatWidth = pow(2, int(log2(FPMantissaWidth)) + 1); + unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; + Value *implicitBit = Builder.CreateShl( + Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth)); + Value *significandMask = + Builder.CreateSub(implicitBit, Builder.getIntN(BitWidth, 1)); + + // BasicBlock *IBB = Builder.GetInsertBlock(); + BasicBlock *Entry = Builder.GetInsertBlock(); + Function *F = Entry->getParent(); + Entry->setName(Twine(Entry->getName(), "fp-to-i-entry")); + BasicBlock *End = + Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup"); + BasicBlock *IfEnd = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End); + BasicBlock *IfThen5 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End); + BasicBlock *IfEnd9 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End); + BasicBlock *IfThen12 = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End); + BasicBlock *IfElse = + BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End); + + Entry->getTerminator()->eraseFromParent(); + + // entry: + Builder.SetInsertPoint(Entry); + Value *FloatVal0 = FloatVal; + if (FloatVal->getType()->getFPMantissaWidth() == 64) + FloatVal0 = + Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext())); + Value *aRep0 = + Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth)); + Value *aRep = Builder.CreateZExt(aRep0, FPToI->getType()); + Value *tobool_not = Builder.CreateICmpSGT( + aRep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1)); + Value *sign = + Builder.CreateSelect(tobool_not, ConstantInt::getSigned(IntTy, 1), + ConstantInt::getSigned(IntTy, -1)); + Value *andf = + Builder.CreateLShr(aRep, Builder.getIntN(BitWidth, FPMantissaWidth)); + Value *exponent_with_bias = Builder.CreateAnd( + andf, Builder.getIntN(BitWidth, (1u << ExponentWidth) - 1)); + Value *aAbs = Builder.CreateAnd(aRep, significandMask); + Value *significand = Builder.CreateOr(aAbs, implicitBit); + Value *cmp = Builder.CreateICmpULT( + exponent_with_bias, + Builder.getIntN(BitWidth, (1u << (ExponentWidth - 1)) - 1)); + Builder.CreateCondBr(cmp, End, IfEnd); + + // if.end: + Builder.SetInsertPoint(IfEnd); + Value *add1 = Builder.CreateAdd( + exponent_with_bias, + ConstantInt::getSigned( + IntTy, -int64_t((1u << (ExponentWidth - 1)) + BitWidth - 1))); + Value *cmp3 = + Builder.CreateICmpULT(add1, ConstantInt::getSigned(IntTy, -BitWidth)); + Builder.CreateCondBr(cmp3, IfThen5, IfEnd9); + + // if.then5: + Builder.SetInsertPoint(IfThen5); + Value *neg_one = Builder.CreateSExt( + ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy); + Value *neg_inf = + Builder.CreateShl(ConstantInt::getSigned(IntTy, 1), + ConstantInt::getSigned(IntTy, BitWidth - 1)); + Value *pos_inf = Builder.CreateXor(neg_one, neg_inf, "inf_temp3"); + Value *cond8 = Builder.CreateSelect(tobool_not, pos_inf, neg_inf, "cond8"); + Builder.CreateBr(End); + + // if.end9: + Builder.SetInsertPoint(IfEnd9); + Value *cmp10 = Builder.CreateICmpULT( + exponent_with_bias, + Builder.getIntN(BitWidth, + (1u << (ExponentWidth - 1)) + FPMantissaWidth - 1)); + Builder.CreateCondBr(cmp10, IfThen12, IfElse); + + // if.then12: + Builder.SetInsertPoint(IfThen12); + Value *sub13 = + Builder.CreateSub(Builder.getIntN(BitWidth, (1u << (ExponentWidth - 1)) + + FPMantissaWidth - 1), + exponent_with_bias); + Value *shr14 = Builder.CreateLShr(significand, sub13); + Value *mul = Builder.CreateMul(shr14, sign); + Builder.CreateBr(End); + + // if.else: + Builder.SetInsertPoint(IfElse); + Value *sub15 = Builder.CreateAdd( + exponent_with_bias, + ConstantInt::getSigned( + IntTy, -int64_t((1u << (ExponentWidth - 1)) + FPMantissaWidth - 1))); + Value *shl = Builder.CreateShl(significand, sub15); + Value *mul16 = Builder.CreateMul(shl, sign); + Builder.CreateBr(End); + + // cleanup: + Builder.SetInsertPoint(End, End->begin()); + PHINode *retval_0 = Builder.CreatePHI(FPToI->getType(), 4); + + retval_0->addIncoming(cond8, IfThen5); + retval_0->addIncoming(mul, IfThen12); + retval_0->addIncoming(mul16, IfElse); + retval_0->addIncoming(Builder.getIntN(BitWidth, 0), Entry); + + FPToI->replaceAllUsesWith(retval_0); + FPToI->dropAllReferences(); + FPToI->eraseFromParent(); + return true; +} + +/// Generate code to convert a fp number to integer, replacing S(U)IToFP with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations. This implementation has an implicit that integer width is +/// larger than fp. +/// +/// An example IR generated from compiler-rt/floatdisf.c looks like below: +/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 { +/// entry: +/// %cmp = icmp eq i64 %a, 0 +/// br i1 %cmp, label %return, label %if.end +/// +/// if.end: ; preds = %entry +/// %shr = ashr i64 %a, 63 +/// %xor = xor i64 %shr, %a +/// %sub = sub nsw i64 %xor, %shr +/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5 +/// %cast = trunc i64 %0 to i32 +/// %sub1 = sub nuw nsw i32 64, %cast +/// %sub2 = xor i32 %cast, 63 +/// %cmp3 = icmp ult i32 %cast, 40 +/// br i1 %cmp3, label %if.then4, label %if.else +/// +/// if.then4: ; preds = %if.end +/// switch i32 %sub1, label %sw.default [ +/// i32 25, label %sw.bb +/// i32 26, label %sw.epilog +/// ] +/// +/// sw.bb: ; preds = %if.then4 +/// %shl = shl i64 %sub, 1 +/// br label %sw.epilog +/// +/// sw.default: ; preds = %if.then4 +/// %sub5 = sub nsw i64 38, %0 +/// %sh_prom = and i64 %sub5, 4294967295 +/// %shr6 = lshr i64 %sub, %sh_prom +/// %shr9 = lshr i64 274877906943, %0 +/// %and = and i64 %shr9, %sub +/// %cmp10 = icmp ne i64 %and, 0 +/// %conv11 = zext i1 %cmp10 to i64 +/// %or = or i64 %shr6, %conv11 +/// br label %sw.epilog +/// +/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb +/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ] +/// %1 = lshr i64 %a.addr.0, 2 +/// %2 = and i64 %1, 1 +/// %or16 = or i64 %2, %a.addr.0 +/// %inc = add nsw i64 %or16, 1 +/// %3 = and i64 %inc, 67108864 +/// %tobool.not = icmp eq i64 %3, 0 +/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3 +/// %spec.select = ashr i64 %inc, %spec.select.v +/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1 +/// br label %if.end26 +/// +/// if.else: ; preds = %if.end +/// %sub23 = add nuw nsw i64 %0, 4294967256 +/// %sh_prom24 = and i64 %sub23, 4294967295 +/// %shl25 = shl i64 %sub, %sh_prom24 +/// br label %if.end26 +/// +/// if.end26: ; preds = %sw.epilog, %if.else +/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ] +/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ] +/// %conv27 = trunc i64 %shr to i32 +/// %and28 = and i32 %conv27, -2147483648 +/// %add = shl nuw nsw i32 %e.0, 23 +/// %shl29 = add nuw nsw i32 %add, 1065353216 +/// %conv31 = trunc i64 %a.addr.1 to i32 +/// %and32 = and i32 %conv31, 8388607 +/// %or30 = or i32 %and32, %and28 +/// %or33 = or i32 %or30, %shl29 +/// %4 = bitcast i32 %or33 to float +/// br label %return +/// +/// return: ; preds = %entry, %if.end26 +/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ] +/// ret float %retval.0 +/// } +/// +/// Replace integer to fp with generated code. +static bool expandIToFP(Instruction *IToFP) { + IRBuilder<> Builder(IToFP); + auto *IntVal = IToFP->getOperand(0); + IntegerType *IntTy = cast(IntVal->getType()); + + unsigned BitWidth = IntVal->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1; + FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; + FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; + unsigned FloatWidth = pow(2, int(log2(FPMantissaWidth)) + 1); + bool isSigned = IToFP->getOpcode() == Instruction::SIToFP; + + BasicBlock *Entry = Builder.GetInsertBlock(); + Function *F = Entry->getParent(); + Entry->setName(Twine(Entry->getName(), "itofp-entry")); + BasicBlock *End = + Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return"); + BasicBlock *IfEnd = + BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End); + BasicBlock *IfThen4 = + BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End); + BasicBlock *SwBB = + BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End); + BasicBlock *SwDefault = + BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End); + BasicBlock *SwEpilog = + BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End); + BasicBlock *IfThen20 = + BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End); + BasicBlock *IfElse = + BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End); + BasicBlock *IfEnd26 = + BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End); + + Entry->getTerminator()->eraseFromParent(); + + Function *CTLZ = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy); + ConstantInt *True = Builder.getTrue(); + + // entry: + Builder.SetInsertPoint(Entry); + Value *cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0)); + Builder.CreateCondBr(cmp, End, IfEnd); + + // if.end: + Builder.SetInsertPoint(IfEnd); + Value *shr = + Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1)); + Value *xorr = Builder.CreateXor(shr, IntVal); + Value *sub = Builder.CreateSub(xorr, shr); + Value *call = Builder.CreateCall(CTLZ, {isSigned ? sub : IntVal, True}); + Value *cast = Builder.CreateTrunc(call, Builder.getInt32Ty()); + int BitWidthNew = FloatWidth == 128 ? BitWidth : 32; + Value *sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth), + FloatWidth == 128 ? call : cast); + Value *sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1), + FloatWidth == 128 ? call : cast); + Value *cmp3 = Builder.CreateICmpSGT( + sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); + Builder.CreateCondBr(cmp3, IfThen4, IfElse); + + // if.then4: + Builder.SetInsertPoint(IfThen4); + llvm::SwitchInst *SI = Builder.CreateSwitch(sub1, SwDefault); + SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB); + SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog); + + // sw.bb: + Builder.SetInsertPoint(SwBB); + Value *shl = + Builder.CreateShl(isSigned ? sub : IntVal, Builder.getIntN(BitWidth, 1)); + Builder.CreateBr(SwEpilog); + + // sw.default: + Builder.SetInsertPoint(SwDefault); + Value *sub5 = Builder.CreateSub( + Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3), + FloatWidth == 128 ? call : cast); + Value *sh_prom = Builder.CreateZExt(sub5, IntTy); + Value *shr6 = Builder.CreateLShr(isSigned ? sub : IntVal, + FloatWidth == 128 ? sub5 : sh_prom); + Value *sub8 = + Builder.CreateAdd(FloatWidth == 128 ? call : cast, + Builder.getIntN(BitWidthNew, FPMantissaWidth + 3)); + Value *sh_prom9 = Builder.CreateZExt(sub8, IntTy); + Value *shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1), + FloatWidth == 128 ? sub8 : sh_prom9); + Value *andd = Builder.CreateAnd(shr9, isSigned ? sub : IntVal); + Value *cmp10 = Builder.CreateICmpNE(andd, Builder.getIntN(BitWidth, 0)); + Value *conv11 = Builder.CreateZExt(cmp10, IntTy); + Value *orr = Builder.CreateOr(shr6, conv11); + Builder.CreateBr(SwEpilog); + + // sw.epilog: + Builder.SetInsertPoint(SwEpilog); + PHINode *a_addr_0 = Builder.CreatePHI(IntTy, 3); + a_addr_0->addIncoming(orr, SwDefault); + a_addr_0->addIncoming(isSigned ? sub : IntVal, IfThen4); + a_addr_0->addIncoming(shl, SwBB); + Value *a0 = Builder.CreateTrunc(a_addr_0, Builder.getInt32Ty()); + Value *a1 = Builder.CreateLShr(a0, Builder.getIntN(32, 2)); + Value *a2 = Builder.CreateAnd(a1, Builder.getIntN(32, 1)); + Value *conv16 = Builder.CreateZExt(a2, IntTy); + Value *or17 = Builder.CreateOr(a_addr_0, conv16); + Value *inc = Builder.CreateAdd(or17, Builder.getIntN(BitWidth, 1)); + Value *shr18 = nullptr; + if (isSigned) + shr18 = Builder.CreateAShr(inc, Builder.getIntN(BitWidth, 2)); + else + shr18 = Builder.CreateLShr(inc, Builder.getIntN(BitWidth, 2)); + Value *temp_1 = + Builder.CreateShl(Builder.getIntN(BitWidth, 1), + Builder.getIntN(BitWidth, FPMantissaWidth + 3)); + Value *a3 = Builder.CreateAnd(inc, temp_1, "a3"); + Value *tobool_not = Builder.CreateICmpEQ(a3, Builder.getIntN(BitWidth, 0)); + Value *extract_t60 = + Builder.CreateTrunc(shr18, Builder.getIntNTy(FloatWidth)); + Value *extract63 = Builder.CreateLShr(shr18, Builder.getIntN(BitWidth, 32)); + Value *extract_t64 = nullptr; + if (FloatWidth > 80) + extract_t64 = Builder.CreateTrunc(sub2, Builder.getInt64Ty()); + else { + extract_t64 = Builder.CreateTrunc(extract63, Builder.getInt32Ty()); + } + Builder.CreateCondBr(tobool_not, IfEnd26, IfThen20); + + // if.then20 + Builder.SetInsertPoint(IfThen20); + Value *shr21 = nullptr; + if (isSigned) + shr21 = Builder.CreateAShr(inc, Builder.getIntN(BitWidth, 3)); + else + shr21 = Builder.CreateLShr(inc, Builder.getIntN(BitWidth, 3)); + Value *extract_t = Builder.CreateTrunc(shr21, Builder.getIntNTy(FloatWidth)); + Value *extract = Builder.CreateLShr(shr21, Builder.getIntN(BitWidth, 32)); + Value *extract_t62 = nullptr; + if (FloatWidth > 80) + extract_t62 = Builder.CreateTrunc(sub1, Builder.getIntNTy(64)); + else { + extract_t62 = Builder.CreateTrunc(extract, Builder.getIntNTy(32)); + } + Builder.CreateBr(IfEnd26); + + // if.else: + Builder.SetInsertPoint(IfElse); + Value *sub24 = Builder.CreateAdd( + FloatWidth == 128 ? call : cast, + ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew), + -(BitWidth - FPMantissaWidth - 1))); + Value *sh_prom25 = Builder.CreateZExt(sub24, IntTy); + Value *shl26 = Builder.CreateShl(isSigned ? sub : IntVal, + FloatWidth == 128 ? sub24 : sh_prom25); + Value *extract_t61 = + Builder.CreateTrunc(shl26, Builder.getIntNTy(FloatWidth)); + Value *extract65 = Builder.CreateLShr(shl26, Builder.getIntN(BitWidth, 32)); + Value *extract_t66 = nullptr; + if (FloatWidth > 80) + extract_t66 = Builder.CreateTrunc(sub2, Builder.getIntNTy(64)); + else { + extract_t66 = Builder.CreateTrunc(extract65, Builder.getInt32Ty()); + } + Builder.CreateBr(IfEnd26); + + // if.end26: + Builder.SetInsertPoint(IfEnd26); + PHINode *a_addr_1_off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3); + a_addr_1_off0->addIncoming(extract_t, IfThen20); + a_addr_1_off0->addIncoming(extract_t60, SwEpilog); + a_addr_1_off0->addIncoming(extract_t61, IfElse); + PHINode *a_addr_1_off32 = nullptr; + if (FloatWidth > 80) + a_addr_1_off32 = Builder.CreatePHI(Builder.getIntNTy(64), 3); + else + a_addr_1_off32 = Builder.CreatePHI(Builder.getIntNTy(32), 3); + a_addr_1_off32->addIncoming(extract_t62, IfThen20); + a_addr_1_off32->addIncoming(extract_t64, SwEpilog); + a_addr_1_off32->addIncoming(extract_t66, IfElse); + PHINode *e_0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3); + e_0->addIncoming(sub1, IfThen20); + e_0->addIncoming(sub2, SwEpilog); + e_0->addIncoming(sub2, IfElse); + Value *and29 = nullptr; + if (FloatWidth > 80) { + Value *temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1), + Builder.getIntN(BitWidth, 63)); + and29 = Builder.CreateAnd(shr, temp2, "and29"); + } else { + Value *conv28 = Builder.CreateTrunc(shr, Builder.getIntNTy(32)); + and29 = Builder.CreateAnd( + conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000)); + } + unsigned int temp_mod = FPMantissaWidth % 32; + Value *and34 = nullptr; + Value *shl30 = nullptr; + if (FloatWidth == 128) { + temp_mod += 32; + Value *add = + Builder.CreateShl(a_addr_1_off32, Builder.getIntN(64, temp_mod)); + shl30 = Builder.CreateAdd( + add, + Builder.getIntN(64, ((1ull << (62ull - temp_mod)) - 1ull) << temp_mod)); + and34 = Builder.CreateZExt(shl30, Builder.getIntNTy(128)); + } else { + Value *add = Builder.CreateShl(e_0, Builder.getIntN(32, temp_mod)); + shl30 = Builder.CreateAdd( + add, Builder.getIntN(32, ((1 << (30 - temp_mod)) - 1) << temp_mod)); + if (FloatWidth > 32) + and34 = Builder.CreateAnd(a_addr_1_off32, + Builder.getIntN(32, (1 << temp_mod) - 1)); + else + and34 = Builder.CreateAnd(a_addr_1_off0, + Builder.getIntN(32, (1 << temp_mod) - 1)); + } + Value *or35 = nullptr; + if (FloatWidth == 128) { + Value *or31 = nullptr; + if (BitWidth > 128) { + Value *and29_trunc = Builder.CreateTrunc(and29, Builder.getIntNTy(128)); + or31 = Builder.CreateOr(and29_trunc, and34); + } else + or31 = Builder.CreateOr(and29, and34); + Value *or3462 = Builder.CreateShl(or31, Builder.getIntN(128, 64)); + Value *temp3 = Builder.CreateShl(Builder.getIntN(128, 1), + Builder.getIntN(128, FPMantissaWidth)); + Value *temp4 = Builder.CreateSub(temp3, Builder.getIntN(128, 1)); + Value *a6 = Builder.CreateAnd(a_addr_1_off0, temp4); + or35 = Builder.CreateOr(or3462, a6); + } else { + Value *or31 = Builder.CreateOr(and34, and29); + if (isSigned) + or35 = Builder.CreateOr(or31, shl30); + else + or35 = Builder.CreateOr(and34, shl30); + } + + Value *a4 = nullptr; + if (FloatWidth == 64) { + Value *fb_sroa_0_4_insert_ext = + Builder.CreateZExt(or35, Builder.getIntNTy(FloatWidth)); + Value *fb_sroa_0_4_insert_shift = Builder.CreateShl( + fb_sroa_0_4_insert_ext, Builder.getIntN(FloatWidth, 32)); + Value *fb_sroa_0_0_insert_ext = Builder.CreateAnd( + a_addr_1_off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF)); + Value *fb_sroa_0_0_insert_insert = + Builder.CreateOr(fb_sroa_0_4_insert_shift, fb_sroa_0_0_insert_ext); + a4 = Builder.CreateBitCast(fb_sroa_0_0_insert_insert, IToFP->getType()); + } else { + if (IToFP->getType()->getFPMantissaWidth() == 64) { + Value *a4_0 = + Builder.CreateBitCast(or35, Type::getFP128Ty(Builder.getContext())); + a4 = Builder.CreateFPTrunc(a4_0, IToFP->getType()); + } + // Deal with "half" situation. This is a workaround since we don't have + // floattihf.c currently. + else if (IToFP->getType()->getFPMantissaWidth() == 11) { + Value *a4_0 = + Builder.CreateBitCast(or35, Type::getFloatTy(Builder.getContext())); + a4 = Builder.CreateFPTrunc(a4_0, IToFP->getType()); + } else + a4 = Builder.CreateBitCast(or35, IToFP->getType()); + } + Builder.CreateBr(End); + + // return: + Builder.SetInsertPoint(End, End->begin()); + PHINode *retval_0 = Builder.CreatePHI(IToFP->getType(), 2); + retval_0->addIncoming(a4, IfEnd26); + retval_0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry); + IToFP->replaceAllUsesWith(retval_0); + IToFP->dropAllReferences(); + IToFP->eraseFromParent(); + return true; +} + +static bool runImpl(Function &F, const TargetLowering &TLI) { + SmallVector Replace; + bool Modified = false; + + unsigned MaxLegalFpConvertBitWidth = + TLI.getMaxLargeFPConvertBitWidthSupported(); + if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) + MaxLegalFpConvertBitWidth = ExpandFpConvertBits; + + if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) + return false; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::FPToUI: + case Instruction::FPToSI: { + // TODO: This pass doesn't handle vectors. + if (I.getOperand(0)->getType()->isVectorTy()) + continue; + + auto *IntTy = dyn_cast(I.getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + case Instruction::UIToFP: + case Instruction::SIToFP: { + // TODO: This pass doesn't handle vectors. + if (I.getOperand(0)->getType()->isVectorTy()) + continue; + + auto *IntTy = dyn_cast(I.getOperand(0)->getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + Instruction *I = Replace.pop_back_val(); + if (I->getOpcode() == Instruction::FPToUI || + I->getOpcode() == Instruction::FPToSI) { + expandFPToI(I); + } else { + expandIToFP(I); + } + } + + return Modified; +} + +class ExpandLargeFpConvertLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) { + initializeExpandLargeFpConvertLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TM = &getAnalysis().getTM(); + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + return runImpl(F, *TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; + +char ExpandLargeFpConvertLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) +INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) + +FunctionPass *llvm::createExpandLargeFpConvertPass() { + return new ExpandLargeFpConvertLegacyPass(); +} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -726,6 +726,8 @@ MaxDivRemBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; + MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; + MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1114,6 +1114,7 @@ addPass(createPreISelIntrinsicLoweringPass()); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addPass(createExpandLargeDivRemPass()); + addPass(createExpandLargeFpConvertPass()); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -172,6 +172,8 @@ setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); + setMaxLargeFPConvertBitWidthSupported(128); + // Set up the register classes. addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -16,6 +16,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Lower Garbage Collection Instructions diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -20,6 +20,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: SVE intrinsics optimizations ; CHECK-NEXT: FunctionPass Manager diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -28,6 +28,7 @@ ; GCN-O0-NEXT: Pre-ISel Intrinsic Lowering ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Expand large div/rem +; GCN-O0-NEXT: Expand large fp convert ; GCN-O0-NEXT: AMDGPU Printf lowering ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Dominator Tree Construction @@ -169,6 +170,7 @@ ; GCN-O1-NEXT: Pre-ISel Intrinsic Lowering ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Expand large div/rem +; GCN-O1-NEXT: Expand large fp convert ; GCN-O1-NEXT: AMDGPU Printf lowering ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Dominator Tree Construction @@ -432,6 +434,7 @@ ; GCN-O1-OPTS-NEXT: Pre-ISel Intrinsic Lowering ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Expand large div/rem +; GCN-O1-OPTS-NEXT: Expand large fp convert ; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Dominator Tree Construction @@ -727,6 +730,7 @@ ; GCN-O2-NEXT: Pre-ISel Intrinsic Lowering ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Expand large div/rem +; GCN-O2-NEXT: Expand large fp convert ; GCN-O2-NEXT: AMDGPU Printf lowering ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Dominator Tree Construction @@ -1025,6 +1029,7 @@ ; GCN-O3-NEXT: Pre-ISel Intrinsic Lowering ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Expand large div/rem +; GCN-O3-NEXT: Expand large fp convert ; GCN-O3-NEXT: AMDGPU Printf lowering ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -6,6 +6,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll --- a/llvm/test/CodeGen/M68k/pipeline.ll +++ b/llvm/test/CodeGen/M68k/pipeline.ll @@ -3,6 +3,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -20,6 +20,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Convert i1 constants to i32/i64 if they are returned ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: PPC Lower MASS Entries diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -20,6 +20,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Lower Garbage Collection Instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -24,6 +24,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -18,6 +18,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Lower AMX intrinsics ; CHECK-NEXT: Lower AMX type for load/store diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/CodeGen/X86/expand-large-fp-convert-fptosi129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-large-fp-convert-fptosi129.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=x86_64-- -expand-large-fp-convert < %s | FileCheck %s + +define i129 @halftosi129(half %a) { +; CHECK-LABEL: @halftosi129( +; CHECK-NEXT: [[TMP1:%.*]] = fptosi half [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i129 +; CHECK-NEXT: ret i129 [[TMP2]] +; + %conv = fptosi half %a to i129 + ret i129 %conv +} + +define i129 @floattosi129(float %a) { +; CHECK-LABEL: @floattosi129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 23 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 255 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 8388607 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 8388608 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -256 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 150 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 150, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -150 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi float %a to i129 + ret i129 %conv +} + +define i129 @doubletosi129(double %a) { +; CHECK-LABEL: @doubletosi129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 52 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 2047 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 4503599627370495 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 4503599627370496 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 1023 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -1152 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 1075 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 1075, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -1075 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi double %a to i129 + ret i129 %conv +} + +define i129 @x86_fp80tosi129(x86_fp80 %a) { +; CHECK-LABEL: @x86_fp80tosi129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = zext i128 [[TMP1]] to i129 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 112 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 32767 +; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 5192296858534827628530496329220096 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 16383 +; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -16512 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], 4294967167 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i129 [[TMP6]], 16495 +; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP13:%.*]] = sub i129 16495, [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr i129 [[TMP8]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i129 [[TMP14]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP16:%.*]] = add i129 [[TMP6]], -16495 +; CHECK-NEXT: [[TMP17:%.*]] = shl i129 [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i129 [[TMP17]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP15]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP18]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP19]] +; + %conv = fptosi x86_fp80 %a to i129 + ret i129 %conv +} + +define i129 @fp128tosi129(fp128 %a) { +; CHECK-LABEL: @fp128tosi129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128 +; CHECK-NEXT: [[TMP1:%.*]] = zext i128 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 112 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 32767 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 5192296858534827628530496329220096 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 16383 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -16512 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 16495 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 16495, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -16495 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi fp128 %a to i129 + ret i129 %conv +} diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/CodeGen/X86/expand-large-fp-convert-fptoui129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-large-fp-convert-fptoui129.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=x86_64-- -expand-large-fp-convert < %s | FileCheck %s + +define i129 @halftoui129(half %a) { +; CHECK-LABEL: @halftoui129( +; CHECK-NEXT: [[TMP1:%.*]] = fptosi half [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i129 +; CHECK-NEXT: ret i129 [[TMP2]] +; + %conv = fptosi half %a to i129 + ret i129 %conv +} + +define i129 @floattoui129(float %a) { +; CHECK-LABEL: @floattoui129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 23 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 255 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 8388607 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 8388608 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -256 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 150 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 150, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -150 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi float %a to i129 + ret i129 %conv +} + +define i129 @doubletoui129(double %a) { +; CHECK-LABEL: @doubletoui129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 52 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 2047 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 4503599627370495 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 4503599627370496 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 1023 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -1152 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 1075 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 1075, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -1075 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi double %a to i129 + ret i129 %conv +} + +define i129 @x86_fp80toui129(x86_fp80 %a) { +; CHECK-LABEL: @x86_fp80toui129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = zext i128 [[TMP1]] to i129 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 112 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 32767 +; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 5192296858534827628530496329220096 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 16383 +; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -16512 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], 4294967167 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i129 [[TMP6]], 16495 +; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP13:%.*]] = sub i129 16495, [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr i129 [[TMP8]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i129 [[TMP14]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP16:%.*]] = add i129 [[TMP6]], -16495 +; CHECK-NEXT: [[TMP17:%.*]] = shl i129 [[TMP8]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i129 [[TMP17]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP15]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP18]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP19]] +; + %conv = fptosi x86_fp80 %a to i129 + ret i129 %conv +} + +define i129 @fp128toui129(fp128 %a) { +; CHECK-LABEL: @fp128toui129( +; CHECK-NEXT: fp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128 +; CHECK-NEXT: [[TMP1:%.*]] = zext i128 [[TMP0]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 112 +; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 32767 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 5192296858534827628530496329220096 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 16383 +; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -16512 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], 4294967167 +; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[COND8:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP5]], 16495 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP12:%.*]] = sub i129 16495, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP7]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP15:%.*]] = add i129 [[TMP5]], -16495 +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i129 [[TMP16]], [[TMP3]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[COND8]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP14]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP17]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: ret i129 [[TMP18]] +; + %conv = fptosi fp128 %a to i129 + ret i129 %conv +} diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/CodeGen/X86/expand-large-fp-convert-si129tofp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-large-fp-convert-si129tofp.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=x86_64-- -expand-large-fp-convert < %s | FileCheck %s + +define half @ui129tohalf(i129 %a) { +; CHECK-LABEL: @ui129tohalf( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 103, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[TMP3]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP3]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = ashr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 67108864 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -105 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[TMP3]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP42]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP50]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: [[TMP53:%.*]] = fptrunc float [[TMP52]] to half +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP54:%.*]] = phi half [ [[TMP53]], [[ITOFP_IF_END26]] ], [ 0xH0000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret half [[TMP54]] +; + %conv = sitofp i129 %a to half + ret half %conv +} + +define float @ui129tofloat(i129 %a) { +; CHECK-LABEL: @ui129tofloat( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 103, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[TMP3]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP3]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = ashr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 67108864 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -105 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[TMP3]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP42]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP50]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP53:%.*]] = phi float [ [[TMP52]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[TMP53]] +; + %conv = sitofp i129 %a to float + ret float %conv +} + +define double @ui129todouble(i129 %a) { +; CHECK-LABEL: @ui129todouble( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 53 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 54, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 55, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 74, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[TMP3]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 55 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP3]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = ashr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 36028797018963968 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -76 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[TMP3]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 20 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1072693248 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP43]], 1048575 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP50]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = zext i32 [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = shl i64 [[TMP52]], 32 +; CHECK-NEXT: [[TMP54:%.*]] = and i64 [[TMP42]], 4294967295 +; CHECK-NEXT: [[TMP55:%.*]] = or i64 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i64 [[TMP55]] to double +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP57:%.*]] = phi double [ [[TMP56]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[TMP57]] +; + %conv = sitofp i129 %a to double + ret double %conv +} + +define x86_fp80 @ui129tox86_fp80(i129 %a) { +; CHECK-LABEL: @ui129tox86_fp80( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i129 14, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i129 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i129 [[TMP4]], 115 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 -1, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i129 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i129 +; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP11]], [[TMP16]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP17]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP3]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i129 [[TMP18]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i129 +; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = add i129 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = ashr i129 [[TMP24]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP24]], 41538374868278621028243970633760768 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP30:%.*]] = ashr i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128 +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP30]], 32 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP6]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP34:%.*]] = add i129 [[TMP4]], 4294967280 +; CHECK-NEXT: [[TMP35:%.*]] = shl i129 [[TMP3]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP35]], 32 +; CHECK-NEXT: [[TMP38:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP39:%.*]] = phi i128 [ [[TMP31]], [[ITOFP_IF_THEN20]] ], [ [[TMP27]], [[ITOFP_SW_EPILOG]] ], [ [[TMP36]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP38]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[AND29:%.*]] = and i129 [[TMP1]], 9223372036854775808 +; CHECK-NEXT: [[TMP42:%.*]] = shl i64 [[TMP40]], 48 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], 4611404543450677248 +; CHECK-NEXT: [[TMP44:%.*]] = zext i64 [[TMP43]] to i128 +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[AND29]] to i128 +; CHECK-NEXT: [[TMP46:%.*]] = or i128 [[TMP45]], [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = shl i128 [[TMP46]], 64 +; CHECK-NEXT: [[TMP48:%.*]] = and i128 [[TMP39]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP49:%.*]] = or i128 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i128 [[TMP49]] to fp128 +; CHECK-NEXT: [[TMP51:%.*]] = fptrunc fp128 [[TMP50]] to x86_fp80 +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP52:%.*]] = phi x86_fp80 [ [[TMP51]], [[ITOFP_IF_END26]] ], [ 0xK00000000000000000000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret x86_fp80 [[TMP52]] +; + %conv = sitofp i129 %a to x86_fp80 + ret x86_fp80 %conv +} + +define fp128 @ui129tofp128(i129 %a) { +; CHECK-LABEL: @ui129tofp128( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i129 14, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i129 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i129 [[TMP4]], 115 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 -1, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and i129 [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i129 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i129 +; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP11]], [[TMP16]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP17]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP3]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i129 [[TMP18]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i129 +; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = add i129 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = ashr i129 [[TMP24]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP24]], 41538374868278621028243970633760768 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP30:%.*]] = ashr i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128 +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP30]], 32 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP6]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP34:%.*]] = add i129 [[TMP4]], 4294967280 +; CHECK-NEXT: [[TMP35:%.*]] = shl i129 [[TMP3]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP35]], 32 +; CHECK-NEXT: [[TMP38:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP39:%.*]] = phi i128 [ [[TMP31]], [[ITOFP_IF_THEN20]] ], [ [[TMP27]], [[ITOFP_SW_EPILOG]] ], [ [[TMP36]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP38]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[AND29:%.*]] = and i129 [[TMP1]], 9223372036854775808 +; CHECK-NEXT: [[TMP42:%.*]] = shl i64 [[TMP40]], 48 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], 4611404543450677248 +; CHECK-NEXT: [[TMP44:%.*]] = zext i64 [[TMP43]] to i128 +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[AND29]] to i128 +; CHECK-NEXT: [[TMP46:%.*]] = or i128 [[TMP45]], [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = shl i128 [[TMP46]], 64 +; CHECK-NEXT: [[TMP48:%.*]] = and i128 [[TMP39]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP49:%.*]] = or i128 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i128 [[TMP49]] to fp128 +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP51:%.*]] = phi fp128 [ [[TMP50]], [[ITOFP_IF_END26]] ], [ 0xL00000000000000000000000000000000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret fp128 [[TMP51]] +; + %conv = sitofp i129 %a to fp128 + ret fp128 %conv +} diff --git a/llvm/test/CodeGen/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/CodeGen/X86/expand-large-fp-convert-ui129tofp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-large-fp-convert-ui129tofp.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=x86_64-- -expand-large-fp-convert < %s | FileCheck %s + +define half @ui129tohalf(i129 %a) { +; CHECK-LABEL: @ui129tohalf( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 103, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[A]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[A]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[A]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 67108864 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -105 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[A]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP42]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: [[TMP53:%.*]] = fptrunc float [[TMP52]] to half +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP54:%.*]] = phi half [ [[TMP53]], [[ITOFP_IF_END26]] ], [ 0xH0000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret half [[TMP54]] +; + %conv = uitofp i129 %a to half + ret half %conv +} + +define float @ui129tofloat(i129 %a) { +; CHECK-LABEL: @ui129tofloat( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 103, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[A]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[A]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[A]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 67108864 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -105 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[A]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP42]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP53:%.*]] = phi float [ [[TMP52]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[TMP53]] +; + %conv = uitofp i129 %a to float + ret float %conv +} + +define double @ui129todouble(i129 %a) { +; CHECK-LABEL: @ui129todouble( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], 53 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 54, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 55, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 74, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i129 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i129 [[A]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP5]], 55 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i129 +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 -1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = and i129 [[TMP15]], [[A]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i129 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i129 +; CHECK-NEXT: [[TMP19:%.*]] = or i129 [[TMP12]], [[TMP18]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP19]], [[ITOFP_SW_DEFAULT]] ], [ [[A]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i129 [[TMP20]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = or i129 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = add i129 [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP26]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP26]], 36028797018963968 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32 +; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i129 [[TMP32]], 32 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i129 [[TMP34]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP5]], -76 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i129 +; CHECK-NEXT: [[TMP38:%.*]] = shl i129 [[A]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i129 [[TMP38]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = lshr i129 [[TMP38]], 32 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i129 [[TMP40]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP42:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP39]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP35]], [[ITOFP_IF_THEN20]] ], [ [[TMP31]], [[ITOFP_SW_EPILOG]] ], [ [[TMP41]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 20 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1072693248 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP43]], 1048575 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = zext i32 [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = shl i64 [[TMP52]], 32 +; CHECK-NEXT: [[TMP54:%.*]] = and i64 [[TMP42]], 4294967295 +; CHECK-NEXT: [[TMP55:%.*]] = or i64 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i64 [[TMP55]] to double +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP57:%.*]] = phi double [ [[TMP56]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[TMP57]] +; + %conv = uitofp i129 %a to double + ret double %conv +} + +define x86_fp80 @ui129tox86_fp80(i129 %a) { +; CHECK-LABEL: @ui129tox86_fp80( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i129 14, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i129 [[A]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i129 [[TMP4]], 115 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 -1, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and i129 [[TMP13]], [[A]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i129 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i129 +; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP11]], [[TMP16]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP17]], [[ITOFP_SW_DEFAULT]] ], [ [[A]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i129 [[TMP18]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i129 +; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = add i129 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = lshr i129 [[TMP24]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP24]], 41538374868278621028243970633760768 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128 +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP30]], 32 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP6]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP34:%.*]] = add i129 [[TMP4]], 4294967280 +; CHECK-NEXT: [[TMP35:%.*]] = shl i129 [[A]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP35]], 32 +; CHECK-NEXT: [[TMP38:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP39:%.*]] = phi i128 [ [[TMP31]], [[ITOFP_IF_THEN20]] ], [ [[TMP27]], [[ITOFP_SW_EPILOG]] ], [ [[TMP36]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP38]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[AND29:%.*]] = and i129 [[TMP1]], 9223372036854775808 +; CHECK-NEXT: [[TMP42:%.*]] = shl i64 [[TMP40]], 48 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], 4611404543450677248 +; CHECK-NEXT: [[TMP44:%.*]] = zext i64 [[TMP43]] to i128 +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[AND29]] to i128 +; CHECK-NEXT: [[TMP46:%.*]] = or i128 [[TMP45]], [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = shl i128 [[TMP46]], 64 +; CHECK-NEXT: [[TMP48:%.*]] = and i128 [[TMP39]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP49:%.*]] = or i128 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i128 [[TMP49]] to fp128 +; CHECK-NEXT: [[TMP51:%.*]] = fptrunc fp128 [[TMP50]] to x86_fp80 +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP52:%.*]] = phi x86_fp80 [ [[TMP51]], [[ITOFP_IF_END26]] ], [ 0xK00000000000000000000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret x86_fp80 [[TMP52]] +; + %conv = uitofp i129 %a to x86_fp80 + ret x86_fp80 %conv +} + +define fp128 @ui129tofp128(i129 %a) { +; CHECK-LABEL: @ui129tofp128( +; CHECK-NEXT: itofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = trunc i129 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP7]], 113 +; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP10:%.*]] = sub i129 14, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i129 [[A]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i129 [[TMP4]], 115 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 -1, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and i129 [[TMP13]], [[A]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i129 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i129 +; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP11]], [[TMP16]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP17]], [[ITOFP_SW_DEFAULT]] ], [ [[A]], [[ITOFP_IF_THEN4]] ], [ [[TMP9]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i129 [[TMP18]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i129 +; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = add i129 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = lshr i129 [[TMP24]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP24]], 41538374868278621028243970633760768 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128 +; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP30]], 32 +; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP6]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP34:%.*]] = add i129 [[TMP4]], 4294967280 +; CHECK-NEXT: [[TMP35:%.*]] = shl i129 [[A]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP35]], 32 +; CHECK-NEXT: [[TMP38:%.*]] = trunc i129 [[TMP7]] to i64 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP39:%.*]] = phi i128 [ [[TMP31]], [[ITOFP_IF_THEN20]] ], [ [[TMP27]], [[ITOFP_SW_EPILOG]] ], [ [[TMP36]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP33]], [[ITOFP_IF_THEN20]] ], [ [[TMP29]], [[ITOFP_SW_EPILOG]] ], [ [[TMP38]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP6]], [[ITOFP_IF_THEN20]] ], [ [[TMP7]], [[ITOFP_SW_EPILOG]] ], [ [[TMP7]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[AND29:%.*]] = and i129 [[TMP1]], 9223372036854775808 +; CHECK-NEXT: [[TMP42:%.*]] = shl i64 [[TMP40]], 48 +; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], 4611404543450677248 +; CHECK-NEXT: [[TMP44:%.*]] = zext i64 [[TMP43]] to i128 +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[AND29]] to i128 +; CHECK-NEXT: [[TMP46:%.*]] = or i128 [[TMP45]], [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = shl i128 [[TMP46]], 64 +; CHECK-NEXT: [[TMP48:%.*]] = and i128 [[TMP39]], 5192296858534827628530496329220095 +; CHECK-NEXT: [[TMP49:%.*]] = or i128 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i128 [[TMP49]] to fp128 +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP51:%.*]] = phi fp128 [ [[TMP50]], [[ITOFP_IF_END26]] ], [ 0xL00000000000000000000000000000000, [[ITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: ret fp128 [[TMP51]] +; + %conv = uitofp i129 %a to fp128 + ret fp128 %conv +} diff --git a/llvm/test/CodeGen/X86/fp-i129.ll b/llvm/test/CodeGen/X86/fp-i129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-i129.ll @@ -0,0 +1,130 @@ +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X64 + +define i129 @fptosi_float(float %a) nounwind { +; CHECK-LABEL: fptosi_float: +; CHECK-NOT: call + %res = fptosi float %a to i129 + ret i129 %res +} + +define i129 @fptosi_double(double %a) nounwind { +; CHECK-LABEL: fptosi_double: +; CHECK-NOT: call + %res = fptosi double %a to i129 + ret i129 %res +} + +define i129 @fptosi_fp128(fp128 %a) nounwind { +; CHECK-LABEL: fptosi_fp128: +; CHECK-NOT: call + %res = fptosi fp128 %a to i129 + ret i129 %res +} + +define i129 @fptoui_float(float %a) nounwind { +; CHECK-LABEL: fptoui_float: +; CHECK-NOT: call + %res = fptoui float %a to i129 + ret i129 %res +} + +define i129 @fptoui_double(double %a) nounwind { +; CHECK-LABEL: fptoui_double: +; CHECK-NOT: call + %res = fptoui double %a to i129 + ret i129 %res +} + +define i129 @fptoui_fp128(fp128 %a) nounwind { +; CHECK-LABEL: fptoui_fp128: +; CHECK-NOT: call + %res = fptoui fp128 %a to i129 + ret i129 %res +} + +define float @sitofp_float(i129 %a) nounwind { +; CHECK-LABEL: sitofp_float: +; CHECK-NOT: call + %res = sitofp i129 %a to float + ret float %res +} + +define double @sitofp_double(i129 %a) nounwind { +; CHECK-LABEL: sitofp_double: +; CHECK-NOT: call + %res = sitofp i129 %a to double + ret double %res +} + +define fp128 @sitofp_fp128(i129 %a) nounwind { +; CHECK-LABEL: sitofp_fp128: +; CHECK-NOT: call + %res = sitofp i129 %a to fp128 + ret fp128 %res +} + +define float @uitofp_float(i129 %a) nounwind { +; CHECK-LABEL: uitofp_float: +; CHECK-NOT: call + %res = uitofp i129 %a to float + ret float %res +} + +define double @uitofp_double(i129 %a) nounwind { +; CHECK-LABEL: uitofp_double: +; CHECK-NOT: call + %res = uitofp i129 %a to double + ret double %res +} + +define fp128 @uitofp_fp128(i129 %a) nounwind { +; CHECK-LABEL: uitofp_fp128: +; CHECK-NOT: call + %res = uitofp i129 %a to fp128 + ret fp128 %res +} + +; higher sizes +define i257 @fptosi257_double(double %a) nounwind { +; CHECK-LABEL: fptosi257_double: +; CHECK-NOT: call + %res = fptosi double %a to i257 + ret i257 %res +} + +; half tests +define i257 @fptosi_half(half %a) nounwind { +; X86-LABEL: fptosi_half: +; X86: __gnu_h2f_ieee +; +; X64-LABEL: fptosi_half: +; X64: __extendhfsf2 + %res = fptosi half %a to i257 + ret i257 %res +} + +define half @uitofp_half(i257 %a) nounwind { +; X86-LABEL: uitofp_half: +; X86: __gnu_f2h_ieee +; +; X64-LABEL: uitofp_half: +; X64: __truncsfhf2 + %res = uitofp i257 %a to half + ret half %res +} + +; x86_fp80 tests +define i257 @fptoui_x86_fp80(x86_fp80 %a) nounwind { +; CHECK-LABEL: fptoui_x86_fp80: +; CHECK: __extendxftf2 + %res = fptoui x86_fp80 %a to i257 + ret i257 %res +} + +define x86_fp80 @sitofp_x86_fp80(i257 %a) nounwind { +; CHECK-LABEL: sitofp_x86_fp80: +; CHECK: __trunctfxf2 + %res = sitofp i257 %a to x86_fp80 + ret x86_fp80 %res +} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand large div/rem +; CHECK-NEXT: Expand large fp convert ; CHECK-NEXT: Expand Atomic instructions ; CHECK-NEXT: Lower AMX intrinsics ; CHECK-NEXT: Lower AMX type for load/store diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -422,7 +422,8 @@ "dot-regions", "dot-regions-only", "view-regions", "view-regions-only", "select-optimize", "expand-large-div-rem", - "structurizecfg", "fix-irreducible"}; + "structurizecfg", "fix-irreducible", + "expand-large-fp-convert"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -469,6 +470,7 @@ // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry);