Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -319,6 +319,14 @@ return false; } + /// Return true if it is safe to transform an integer-domain bitwise operation + /// into the equivalent floating-point operation. This should be set to true + /// if the target has IEEE-754-compliant fabs/fneg operations for the input + /// type. + virtual bool hasBitPreservingFPLogic(EVT VT) const { + return false; + } + /// \brief Return if the target supports combining a /// chain like: /// \code Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -991,8 +991,7 @@ } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || - SrcTy->isFloatingPointTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1316,12 +1315,6 @@ // of bits which might be set provided by popcnt KnownOne2. break; } - case Intrinsic::fabs: { - Type *Ty = II->getType(); - APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); - KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); - break; - } case Intrinsic::x86_sse42_crc32_64_64: KnownZero |= APInt::getHighBitsSet(64, 32); break; @@ -1381,9 +1374,8 @@ unsigned BitWidth = KnownZero.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->isFPOrFPVectorTy() || V->getType()->getScalarType()->isPointerTy()) && - "Not integer, floating point, or pointer type!"); + "Not integer or pointer type!"); assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7351,6 +7351,49 @@ return DAG.getDataLayout().isBigEndian() ? 1 : 0; } +static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // If this is not a bitcast to an FP type or if the target doesn't have + // IEEE754-compliant FP logic, we're done. + EVT VT = N->getValueType(0); + if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) + return SDValue(); + + // TODO: Use splat values for the constant-checking below and remove this + // restriction. + SDValue N0 = N->getOperand(0); + EVT SourceVT = N0.getValueType(); + if (SourceVT.isVector()) + return SDValue(); + + unsigned FPOpcode; + APInt SignMask; + switch (N0.getOpcode()) { + case ISD::AND: + FPOpcode = ISD::FABS; + SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + break; + case ISD::XOR: + FPOpcode = ISD::FNEG; + SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + break; + // TODO: ISD::OR --> ISD::FNABS? + default: + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + SDValue LogicOp0 = N0.getOperand(0); + ConstantSDNode *LogicOp1 = dyn_cast(N0.getOperand(1)); + if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && + LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0->getOperand(0).getValueType() == VT) + return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7415,6 +7458,9 @@ } } + if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) + return V; + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -395,6 +395,12 @@ bool isCheapToSpeculateCtlz() const override { return true; } + + bool hasBitPreservingFPLogic(EVT VT) const override { + // FIXME: Is this always true? It should be true for vectors at least. + return VT == MVT::f32 || VT == MVT::f64; + } + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1594,24 +1594,6 @@ if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) return CastedAnd; - if (CastInst *Op0C = dyn_cast(Op0)) { - Value *Op0COp = Op0C->getOperand(0); - Type *SrcTy = Op0COp->getType(); - - // If we are masking off the sign bit of a floating-point value, convert - // this to the canonical fabs intrinsic call and cast back to integer. - // The backend should know how to optimize fabs(). - // TODO: This transform should also apply to vectors. - ConstantInt *CI; - if (isa(Op0C) && SrcTy->isFloatingPointTy() && - match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) { - Module *M = I.getModule(); - Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy); - Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs"); - return CastInst::CreateBitOrPointerCast(Call, I.getType()); - } - } - if (Instruction *Select = foldBoolSextMaskToSelect(I)) return Select; Index: llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll +++ llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll @@ -153,9 +153,7 @@ define double @bitcast_fabs(double %x) { ; CHECK-LABEL: bitcast_fabs: ; CHECK: ; BB#0: -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fabs d0, d0 ; CHECK-NEXT: ret ; %bc1 = bitcast double %x to i64 @@ -167,9 +165,7 @@ define float @bitcast_fneg(float %x) { ; CHECK-LABEL: bitcast_fneg: ; CHECK: ; BB#0: -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: eor w8, w8, #0x80000000 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fneg s0, s0 ; CHECK-NEXT: ret ; %bc1 = bitcast float %x to i32 Index: llvm/trunk/test/Transforms/InstCombine/and2.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/and2.ll +++ llvm/trunk/test/Transforms/InstCombine/and2.ll @@ -103,45 +103,3 @@ ret i64 %add } -define i64 @fabs_double(double %x) { -; CHECK-LABEL: @fabs_double( -; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) -; CHECK-NEXT: %and = bitcast double %fabs to i64 -; CHECK-NEXT: ret i64 %and - %bc = bitcast double %x to i64 - %and = and i64 %bc, 9223372036854775807 - ret i64 %and -} - -define i64 @fabs_double_swap(double %x) { -; CHECK-LABEL: @fabs_double_swap( -; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) -; CHECK-NEXT: %and = bitcast double %fabs to i64 -; CHECK-NEXT: ret i64 %and - %bc = bitcast double %x to i64 - %and = and i64 9223372036854775807, %bc - ret i64 %and -} - -define i32 @fabs_float(float %x) { -; CHECK-LABEL: @fabs_float( -; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x) -; CHECK-NEXT: %and = bitcast float %fabs to i32 -; CHECK-NEXT: ret i32 %and - %bc = bitcast float %x to i32 - %and = and i32 %bc, 2147483647 - ret i32 %and -} - -; Make sure that only a bitcast is transformed. - -define i64 @fabs_double_not_bitcast(double %x) { -; CHECK-LABEL: @fabs_double_not_bitcast( -; CHECK-NEXT: %bc = fptoui double %x to i64 -; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807 -; CHECK-NEXT: ret i64 %and - %bc = fptoui double %x to i64 - %and = and i64 %bc, 9223372036854775807 - ret i64 %and -} - Index: llvm/trunk/test/Transforms/InstCombine/fabs.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fabs.ll +++ llvm/trunk/test/Transforms/InstCombine/fabs.ll @@ -41,7 +41,6 @@ declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) -declare <4 x float> @llvm.fabs.v4f32(<4 x float>) define float @square_fabs_intrinsic_f32(float %x) { %mul = fmul float %x, %x @@ -99,27 +98,3 @@ ; CHECK-NEXT: ret float %sq } -; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero. - -define i32 @fabs_value_tracking_f32(float %x) { - %call = call float @llvm.fabs.f32(float %x) - %bc = bitcast float %call to i32 - %and = and i32 %bc, 2147483648 - ret i32 %and - -; CHECK-LABEL: fabs_value_tracking_f32( -; CHECK: ret i32 0 -} - -; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero. - -define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) { - %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) - %bc = bitcast <4 x float> %call to <4 x i32> - %and = and <4 x i32> %bc, - ret <4 x i32> %and - -; CHECK-LABEL: fabs_value_tracking_v4f32( -; CHECK: ret <4 x i32> %and -} -