diff --git a/crach.c b/crach.c new file mode 100644 --- /dev/null +++ b/crach.c @@ -0,0 +1,17 @@ +#include +static __inline long long ToInt0(_BitInt(256) x) { + const union { + long long f[4]; + _BitInt(256) i; + } rep = {.i = x}; + return rep.f[0]; +} + +int main() { + float b =3.14; + _BitInt(256) a = b; + printf("(int)3.14 = %lld\n", ToInt0(a)); + _BitInt(256) c = 1078523331; + float d = c; + printf("(float) 1078523331 = %f\n", d); +} diff --git a/fix.c b/fix.c new file mode 100644 --- /dev/null +++ b/fix.c @@ -0,0 +1,29 @@ +// clang -Xclang -fexperimental-max-bitint-width=256 fix.c +// Changes into _BitInt(129) also works. +#include +// static __inline float fromRep(unsigned int x) { +// const union { +// float f; +// int i; +// } rep = {.i = x}; +// return rep.f; +// } + +// typedef union { +// _BitInt(129) x; +// unsigned int y[5]; +// } bitint; + // bitint a; + // a.x = b; + // printf("%d\n", a.y[0]); + // printf("%d\n", a.y[1]); + // printf("%d\n", a.y[2]); + // printf("%d\n", a.y[3]); + // printf("%d\n", a.y[4]); + +int main() { + float b = 3433.14123f; + _BitInt(32) c = b; + int* d = (int*)&c; + printf("%d\n", d[0]); +} diff --git a/fixsfsi_single_source.c b/fixsfsi_single_source.c new file mode 100644 --- /dev/null +++ b/fixsfsi_single_source.c @@ -0,0 +1,71 @@ +//===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +#define significandBits 23 +#define signBit 0x80000000 // (REP_C(1) << (significandBits + exponentBits)) +#define exponentBias 127 //(maxExponent >> 1) +#define implicitBit 0x800000 // (REP_C(1) << significandBits) + +#define absMask 0x7FFFFFFF //(signBit - 1U) +#define significandMask (implicitBit - 1U) + +static __inline unsigned int toRep(float x) { + const union { + float f; + unsigned int i; + } rep = {.f = x}; + return rep.i; +} + +static __inline float fromRep(unsigned int x) { + const union { + float f; + int i; + } rep = {.i = x}; + return rep.f; +} + +int foo(float a) { + const int fixint_max = (int)((~(unsigned int)0) / 2); + const int fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand parts. + const unsigned int aRep = toRep(a); + const unsigned int aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const unsigned int significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(int) * 8) + return sign == 1 ? fixint_max : fixifixsfsi_single_source_readablent_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((int)significand << (exponent - significandBits)); +} + +int main() { + // fesetround(FE_UPWARD); + // fesetround(FE_TONEAREST); + // fesetround(FE_TOWARDZERO); + // fesetround(FE_UPWARD); + float a = fromRep(0x44ff7334); + // printf("intput a float:\n"); + // scanf("%f", &a); + int b = foo(a); + printf("conversion result = %d\n", b); +} \ No newline at end of file diff --git a/fixsfsi_single_source_readable.ll b/fixsfsi_single_source_readable.ll new file mode 100644 --- /dev/null +++ b/fixsfsi_single_source_readable.ll @@ -0,0 +1,58 @@ +; ModuleID = 'fixsfsi_single_source.c' +source_filename = "fixsfsi_single_source.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable +define dso_local i32 @foo(float noundef %a) local_unnamed_addr #0 { +entry: + %aRep = bitcast float %a to i32 + %tobool.not = icmp sgt i32 %aRep, -1 + %sign = select i1 %tobool.not, i32 1, i32 -1 + %and = lshr i32 %aRep, 23 + %exponent_with_bias = and i32 %and, 255 + %aAbs = and i32 %aRep, 8388607 + %significand = or i32 %aAbs, 8388608 + %cmp = icmp ult i32 %exponent_with_bias, 127 + br i1 %cmp, label %cleanup, label %if.end + +if.end: ; preds = %entry + %add1 = add nsw i32 %exponent_with_bias, -159 + %cmp3 = icmp ult i32 %add1, -32 + br i1 %cmp3, label %if.then5, label %if.end9 + +if.then5: ; preds = %if.end + %cond8 = select i1 %tobool.not, i32 2147483647, i32 -2147483648 + br label %cleanup + +if.end9: ; preds = %if.end + %cmp10 = icmp ult i32 %exponent_with_bias, 150 + br i1 %cmp10, label %if.then12, label %if.else + +if.then12: ; preds = %if.end9 + %sub13 = sub nuw nsw i32 150, %exponent_with_bias + %shr14 = lshr i32 %significand, %sub13 + %mul = mul nsw i32 %shr14, %sign + br label %cleanup + +if.else: ; preds = %if.end9 + %sub15 = add nsw i32 %exponent_with_bias, -150 + %shl = shl nuw i32 %significand, %sub15 + %mul16 = mul nsw i32 %shl, %sign + br label %cleanup + +cleanup: ; preds = %entry, %if.else, %if.then12, %if.then5 + %retval.0 = phi i32 [ %cond8, %if.then5 ], [ %mul, %if.then12 ], [ %mul16, %if.else ], [ 0, %entry ] + ret i32 %retval.0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 6c1dd0b7329f6ed2e402468c74355f09447052e3)"} diff --git a/float.c b/float.c new file mode 100644 --- /dev/null +++ b/float.c @@ -0,0 +1,6 @@ +#include +int main() { + _BitInt(64) a = 123412312; + double b = a; + printf("%lf\n", b); +} diff --git a/floatdidf_single_source.c b/floatdidf_single_source.c new file mode 100644 --- /dev/null +++ b/floatdidf_single_source.c @@ -0,0 +1,75 @@ +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define significandBits 52 +#define typeWidth 64 //(sizeof(unsigned long long) * __CHAR_BIT__) +#define exponentBits 11 // (typeWidth - significandBits - 1) + +#define exponentBias 1023 //(maxExponent >> 1) + +#define implicitBit 0x10000000000000 // (REP_C(1) << significandBits) + +#define signBit 0x8000000000000000 // (REP_C(1) << (significandBits + exponentBits)) + +static __inline double fromRep(unsigned long long x) { + const union { + double f; + long long i; + } rep = {.i = x}; + return rep.f; +} + +double foo(long long a) { + const long long aWidth = sizeof a * __CHAR_BIT__; + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + // All other cases begin by extracting the sign and absolute value of a + unsigned long long sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + // Exponent of (fp_t)a is the width of abs(a). + const long long exponent = (aWidth - 1) - __builtin_clzll(a); + unsigned long long result; + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const long long shift = significandBits - exponent; + result = (unsigned long long)a << shift ^ implicitBit; + } else { + const long long shift = exponent - significandBits; + result = (unsigned long long)a >> shift ^ implicitBit; + unsigned long long round = (unsigned long long)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + // Insert the exponent + result += (unsigned long long)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +int main() { + long long a; + printf("intput a integer:\n"); + scanf("%lld", &a); + double b = foo(a); + printf("conversion result = %f\n", b); +} \ No newline at end of file diff --git a/floatdidf_single_source_readable.ll b/floatdidf_single_source_readable.ll new file mode 100644 --- /dev/null +++ b/floatdidf_single_source_readable.ll @@ -0,0 +1,47 @@ +define dso_local double @foo(i64 noundef %a) local_unnamed_addr #0 { +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %cleanup, label %if.end + +if.end: ; preds = %entry + %0 = and i64 %a, -9223372036854775808 + %1 = tail call i64 @llvm.abs.i64(i64 %a, i1 true) + %2 = tail call i64 @llvm.ctlz.i64(i64 %1, i1 true), !range !5 + %sub4 = xor i64 %2, 63 + %cmp5 = icmp ult i64 %sub4, 53 + br i1 %cmp5, label %if.then7, label %if.else + +if.then7: ; preds = %if.end + %sub8 = sub nuw nsw i64 52, %sub4 + %shl = shl i64 %1, %sub8 + %xor = xor i64 %shl, 4503599627370496 + br label %if.end22 + +if.else: ; preds = %if.end + %sub10 = sub nsw i64 11, %2 + %shr = lshr i64 %1, %sub10 + %xor11 = xor i64 %shr, 4503599627370496 + %sub12 = add nuw nsw i64 %2, 53 + %shl13 = shl i64 %1, %sub12 + %cmp14 = icmp ugt i64 %shl13, -9223372036854775808 + %inc = zext i1 %cmp14 to i64 + %spec.select43 = add nuw i64 %xor11, %inc + %cmp18 = icmp eq i64 %shl13, -9223372036854775808 + %and = and i64 %spec.select43, 1 + %add = select i1 %cmp18, i64 %and, i64 0 + %result.1 = add nuw i64 %add, %spec.select43 + br label %if.end22 + +if.end22: ; preds = %if.else, %if.then7 + %result.2 = phi i64 [ %xor, %if.then7 ], [ %result.1, %if.else ] + %3 = shl nuw nsw i64 %2, 52 + %shl24 = sub nuw nsw i64 4890909195324358656, %3 + %add25 = add i64 %shl24, %result.2 + %or = or i64 %add25, %0 + %4 = bitcast i64 %or to double + br label %cleanup + +cleanup: ; preds = %entry, %if.end22 + %retval.0 = phi double [ %4, %if.end22 ], [ 0.000000e+00, %entry ] + ret double %retval.0 +} diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -44,6 +44,7 @@ FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ()) +FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -490,6 +490,9 @@ // Expands large div/rem instructions. FunctionPass *createExpandLargeDivRemPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandLargeFpConvertPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -138,6 +138,7 @@ void initializeEdgeBundlesPass(PassRegistry&); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); +void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&); void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -53,6 +53,7 @@ EHContGuardCatchret.cpp ExecutionDomainFix.cpp ExpandLargeDivRem.cpp + ExpandLargeFpConvert.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -37,6 +37,7 @@ initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -0,0 +1,335 @@ +//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, +// ‘sitofp .. to’ instructions with a bitwidth above a threshold into a call to +// auto-generated functions. This is useful for targets like x86_64 that cannot +// lower fp convertions with more than 128 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +// #include "llvm/Transforms/Utils/IntegerDivision.h" + +using namespace llvm; + +static cl::opt + ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, + cl::init(llvm::IntegerType::MAX_INT_BITS), + cl::desc("fp convert instructions on integers with " + "more than bits are expanded.")); + +static bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) { + auto *C = dyn_cast(V); + if (!C) + return false; + + APInt Val = C->getValue(); + if (SignedOp && Val.isNegative()) + Val = -Val; + return Val.isPowerOf2(); +} + +static bool isSigned(unsigned int Opcode) { + return Opcode == Instruction::FPToSI || Opcode == Instruction::SIToFP; +} + +/// Generate code to convert a fp number to integer, replacing FPToS(U)I with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. +/// +/// Replace fp to integer with generated code. +static bool expandFPToI(Instruction *FPToI) { + IRBuilder<> Builder(FPToI); + auto* FloatVal = FPToI->getOperand(0); + IntegerType *IntTy = cast(FPToI->getType()); + + unsigned BitWidth = FPToI->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1; + unsigned FloatWidth = pow(2, int(log2(FPMantissaWidth)) + 1); + unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; + unsigned signBit = 1u << FloatWidth; + unsigned implicitBit = 1u << FPMantissaWidth; + unsigned significandMask = implicitBit - 1; + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + + BasicBlock *Entry = Builder.GetInsertBlock(); + Entry->setName(Twine(Entry->getName(), "_entry")); + BasicBlock *End = IBB->splitBasicBlock(Builder.GetInsertPoint(), + "cleanup"); + BasicBlock *IfEnd = BasicBlock::Create(Builder.getContext(), + "if.end", F, End); + BasicBlock *IfThen5 = BasicBlock::Create(Builder.getContext(), + "if.then5", F, End); + BasicBlock *IfEnd9 = BasicBlock::Create(Builder.getContext(), + "if.end9", F, End); + BasicBlock *IfThen12 = BasicBlock::Create(Builder.getContext(), + "if.then12", F, End); + BasicBlock *IfElse = BasicBlock::Create(Builder.getContext(), + "if.else", F, End); + + Entry->getTerminator()->eraseFromParent(); + + //entry: + Builder.SetInsertPoint(Entry); + Value *aRep0 = Builder.CreateBitCast(FloatVal, Builder.getIntNTy(FloatWidth), "aRep0"); + Value *aRep = Builder.CreateZExt(aRep0, FPToI->getType(), "aRep"); + Value *tobool_not = Builder.CreateICmpSGT(aRep, ConstantInt::getSigned(IntTy, -1), "tobool.not"); + Value *sign = Builder.CreateSelect(tobool_not, ConstantInt::getSigned(IntTy, 1), ConstantInt::getSigned(IntTy, -1), "sign"); + Value *andf = Builder.CreateLShr(aRep, Builder.getIntN(BitWidth, FPMantissaWidth), "and"); + Value *exponent_with_bias = Builder.CreateAnd(andf, Builder.getIntN(BitWidth, (1u << ExponentWidth) - 1), "exponent_with_bias"); + Value *aAbs = Builder.CreateAnd(aRep, Builder.getIntN(BitWidth, significandMask), "aAbs"); + Value *significand = Builder.CreateOr(aAbs, Builder.getIntN(BitWidth, implicitBit), "significand"); + Value *cmp = Builder.CreateICmpULT(exponent_with_bias, Builder.getIntN(BitWidth, (1u << (ExponentWidth - 1)) - 1), "cmp"); + Builder.CreateCondBr(cmp, End, IfEnd); + + //if.end: + Builder.SetInsertPoint(IfEnd); + Value *add1 = Builder.CreateAdd(exponent_with_bias, ConstantInt::getSigned(IntTy, -int64_t((1u << (ExponentWidth - 1)) + FloatWidth - 1)), "add1"); + Value *cmp3 = Builder.CreateICmpULT(add1, ConstantInt::getSigned(IntTy, -int64_t(FloatWidth)), "cmp3"); + Builder.CreateCondBr(cmp3, IfThen5, IfEnd9); + + //if.then5: + Builder.SetInsertPoint(IfThen5); + Value *cond8 = Builder.CreateSelect(tobool_not, Builder.getIntN(BitWidth, (int64_t(1u << (FloatWidth - 1)) - 1)), ConstantInt::getSigned(IntTy, -int64_t(1u << (FloatWidth - 1))), "cond8"); + Builder.CreateBr(End); + + //if.end9: + Builder.SetInsertPoint(IfEnd9); + Value *cmp10 = Builder.CreateICmpULT(exponent_with_bias, Builder.getIntN(BitWidth, (1u << (ExponentWidth - 1)) + FPMantissaWidth - 1), "cmp10"); + Builder.CreateCondBr(cmp10, IfThen12, IfElse); + + //if.then12: + Builder.SetInsertPoint(IfThen12); + Value *sub13 = Builder.CreateSub(Builder.getIntN(BitWidth, (1u << (ExponentWidth - 1)) + FPMantissaWidth - 1), exponent_with_bias, "sub13"); + Value *shr14 = Builder.CreateLShr(significand, sub13, "shr14"); + Value *mul = Builder.CreateMul(shr14, sign, "mul"); + Builder.CreateBr(End); + + //if.else: + Builder.SetInsertPoint(IfElse); + Value *sub15 = Builder.CreateAdd(exponent_with_bias, ConstantInt::getSigned(IntTy, -int64_t((1u << (ExponentWidth - 1)) + FPMantissaWidth - 1)), "sub15"); + Value *shl = Builder.CreateShl(significand, sub15, "shl"); + Value *mul16 = Builder.CreateMul(shl, sign, "mul16"); + Builder.CreateBr(End); + + //cleanup: + Builder.SetInsertPoint(End, End->begin()); + PHINode *retval_0 = Builder.CreatePHI(FPToI->getType(), 4); + + retval_0->addIncoming(cond8, IfThen5); + retval_0->addIncoming(mul, IfThen12); + retval_0->addIncoming(mul16, IfElse); + retval_0->addIncoming(Builder.getIntN(BitWidth, 0), Entry); + + FPToI->replaceAllUsesWith(retval_0); + FPToI->dropAllReferences(); + FPToI->eraseFromParent(); + return true; +} + +/// Generate code to convert a fp number to integer, replacing S(U)IToFP with +/// the generated code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. +/// +/// Replace integer to fp with generated code. +static bool expandIToFP(Instruction* IToFP) { + IRBuilder<> Builder(IToFP); + auto* IntVal = IToFP->getOperand(0); + IntegerType *IntTy = cast(IntVal->getType()); + unsigned BitWidth = IntVal->getType()->getIntegerBitWidth(); + unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1; + unsigned FloatWidth = pow(2, int(log2(FPMantissaWidth)) + 1); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + IntTy); + Function *ABS = Intrinsic::getDeclaration(F->getParent(), Intrinsic::abs, + IntTy); + ConstantInt *True = Builder.getTrue(); + + BasicBlock *Entry = Builder.GetInsertBlock(); + Entry->setName(Twine(Entry->getName(), "_entry")); + BasicBlock *End = IBB->splitBasicBlock(Builder.GetInsertPoint(), + "cleanup"); + BasicBlock *IfEnd = BasicBlock::Create(Builder.getContext(), + "if.end", F, End); + BasicBlock *IfThen7 = BasicBlock::Create(Builder.getContext(), + "if.then7", F, End); + BasicBlock *IfElse = BasicBlock::Create(Builder.getContext(), + "if.else", F, End); + BasicBlock *IfEnd22 = BasicBlock::Create(Builder.getContext(), + "if.end22", F, End); + + Entry->getTerminator()->eraseFromParent(); + + //entry: + Builder.SetInsertPoint(Entry); + Value *cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0), "cmp"); + Builder.CreateCondBr(cmp, End, IfEnd); + + //if.end: + Builder.SetInsertPoint(IfEnd); + Value *a0 = Builder.CreateAnd(IntVal, ConstantInt::getSigned(IntTy, 1ull << (FloatWidth -1)), "a0"); + Value *a1 = Builder.CreateCall(ABS, {IntVal, True}, "a1"); + Value *a2 = Builder.CreateCall(CTLZ, {a1, True}, "a2"); + Value *sub4 = Builder.CreateXor(a2, Builder.getIntN(BitWidth, FloatWidth - 1), "sub4"); + Value *cmp5 = Builder.CreateICmpULT(sub4, Builder.getIntN(BitWidth, FPMantissaWidth + 1), "cmp5"); + Builder.CreateCondBr(cmp5, IfThen7, IfElse); + + //if.then7: + Builder.SetInsertPoint(IfThen7); + Value *sub8 = Builder.CreateSub(Builder.getIntN(BitWidth, FPMantissaWidth), sub4, "sub8"); + Value *shl = Builder.CreateShl(a1, sub8, "shl"); + Value *xorf = Builder.CreateXor(shl, Builder.getIntN(BitWidth, 1ull << FPMantissaWidth), "xor"); + Builder.CreateBr(IfEnd22); + + //if.else: + Builder.SetInsertPoint(IfElse); + Value *sub10 = Builder.CreateSub(Builder.getIntN(BitWidth, FloatWidth - FPMantissaWidth - 1), a2, "sub10"); + Value *shr = Builder.CreateLShr(a1, sub10, "shr"); + Value *xor11 = Builder.CreateXor(shr, Builder.getIntN(BitWidth, 1ull << FPMantissaWidth), "xor11"); + Value *sub12 = Builder.CreateAdd(a2, Builder.getIntN(BitWidth, FPMantissaWidth + 1), "sub12"); + Value *shl13 = Builder.CreateShl(a1, sub12, "shl13"); + Value *cmp14 = Builder.CreateICmpUGT(shl13, ConstantInt::getSigned(IntTy, 1ull<< (FloatWidth -1)), "cmp14"); + Value *inc = Builder.CreateZExt(cmp14, IntTy, "inc"); + Value *spec_select43 = Builder.CreateAdd(xor11, inc, "spec.select43"); + Value *cmp18 = Builder.CreateICmpEQ(shl13, ConstantInt::getSigned(IntTy, 1ull << (FloatWidth -1)), "cmp18"); + Value *andf = Builder.CreateAnd(spec_select43, Builder.getIntN(BitWidth, 1), "and"); + Value *add = Builder.CreateSelect(cmp18, andf, Builder.getIntN(BitWidth, 0), "add"); + Value *result_1 = Builder.CreateAdd(add, spec_select43, "result.1"); + Builder.CreateBr(IfEnd22); + + //if.end22: + Builder.SetInsertPoint(IfEnd22); + PHINode *result_2 = Builder.CreatePHI(IntTy, 2, "result.2"); + result_2->addIncoming(xorf, IfThen7); + result_2->addIncoming(result_1, IfElse); + Value *a3 = Builder.CreateShl(a2, Builder.getIntN(BitWidth, FPMantissaWidth), "a3"); + Value *shl24 = Builder.CreateSub(Builder.getIntN(BitWidth, 4890909195324358656), a3, "shl24"); + Value *add25 = Builder.CreateAdd(shl24, result_2, "add25"); + Value *orf = Builder.CreateOr(add25, a0, "or"); + Value *a4 = Builder.CreateBitCast(orf, IToFP->getType(), "a4"); + Builder.CreateBr(End); + + //cleanup: + Builder.SetInsertPoint(End, End->begin()); + PHINode *retval_0 = Builder.CreatePHI(IToFP->getType(), 2, "retval.0"); + retval_0->addIncoming(a4, IfEnd22); + retval_0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry); + + IToFP->replaceAllUsesWith(retval_0); + IToFP->dropAllReferences(); + IToFP->eraseFromParent(); + return true; +} + +static bool runImpl(Function &F, const TargetLowering &TLI) { + SmallVector Replace; + bool Modified = false; + + unsigned MaxLegalFpConvertBitWidth = TLI.getMaxDivRemBitWidthSupported(); + if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) + MaxLegalFpConvertBitWidth = ExpandFpConvertBits; + + if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) + return false; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::FPToUI: + case Instruction::FPToSI: { + // TODO: This doesn't handle vectors. + auto *IntTy = dyn_cast(I.getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + case Instruction::UIToFP: + case Instruction::SIToFP: { + auto *IntTy = dyn_cast(I.getOperand(0)->getType()); + if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) + continue; + + Replace.push_back(&I); + Modified = true; + break; + } + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + Instruction *I = Replace.pop_back_val(); + if (I->getOpcode() == Instruction::FPToUI || + I->getOpcode() == Instruction::FPToSI) { + expandFPToI(I); + } else { + expandIToFP(I); + } + } + + return Modified; +} + +class ExpandLargeFpConvertLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) { + initializeExpandLargeFpConvertLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TM = &getAnalysis().getTM(); + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + return runImpl(F, *TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; + +char ExpandLargeFpConvertLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) +INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert", + "Expand large fp convert", false, false) + +FunctionPass *llvm::createExpandLargeFpConvertPass() { + return new ExpandLargeFpConvertLegacyPass(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1114,6 +1114,7 @@ addPass(createPreISelIntrinsicLoweringPass()); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addPass(createExpandLargeDivRemPass()); + addPass(createExpandLargeFpConvertPass()); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/fptosi129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/fptosi129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeFpConvert/fptosi129.ll @@ -0,0 +1,4 @@ +define i129 @foo(float %a) { + %conv = fptosi float %a to i129 + ret i129 %conv +} diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/si129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/si129tofp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeFpConvert/si129tofp.ll @@ -0,0 +1,4 @@ +define double @foo(i64 %a) { + %conv = sitofp i64 %a to double + ret double %conv +} diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -456,7 +456,8 @@ "dot-regions", "dot-regions-only", "view-regions", "view-regions-only", "select-optimize", "expand-large-div-rem", - "structurizecfg", "fix-irreducible"}; + "structurizecfg", "fix-irreducible", + "expand-large-fp-convert"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -504,6 +505,7 @@ // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry);