Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -84,6 +84,7 @@ STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); +STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops."); static cl::opt CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, @@ -9139,39 +9140,61 @@ if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) return SDValue(); - // TODO: Use splat values for the constant-checking below and remove this - // restriction. SDValue N0 = N->getOperand(0); EVT SourceVT = N0.getValueType(); - if (SourceVT.isVector()) - return SDValue(); + bool IsVector = SourceVT.isVector(); unsigned FPOpcode; APInt SignMask; + bool Negate = false; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; - SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; - SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); + break; + case ISD::OR: + FPOpcode = ISD::FABS; + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); + Negate = true; break; - // TODO: ISD::OR --> ISD::FNABS? default: return SDValue(); } - // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X - // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + // If the input to the logic op is not a bitcast or if it's a bitcast from + // the wrong type, bail out. SDValue LogicOp0 = N0.getOperand(0); - ConstantSDNode *LogicOp1 = dyn_cast(N0.getOperand(1)); - if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && - LogicOp0.getOpcode() == ISD::BITCAST && - LogicOp0->getOperand(0).getValueType() == VT) - return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + if (LogicOp0.getOpcode() != ISD::BITCAST || + LogicOp0->getOperand(0).getValueType() != VT) + return SDValue(); - return SDValue(); + // For vector nodes, the input must be a constant splat build vector and the + // splat value must be equal to SignMask. + if (IsVector) { + APInt ConstBVSplat; + if (!ISD::isConstantSplatVector(N0.getOperand(1).getNode(), ConstBVSplat) || + ConstBVSplat != SignMask) + return SDValue(); + } else { + // For scalar nodes, the input must be a constant equal to SignMask. + ConstantSDNode *LogicOp1 = dyn_cast(N0.getOperand(1)); + if (!LogicOp1 || LogicOp1->getAPIntValue() != SignMask) + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> + // fneg (fabs X) + NumFPLogicOpsConv++; + SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + if (Negate) + FPOp = DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); + return FPOp; } SDValue DAGCombiner::visitBITCAST(SDNode *N) { Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -1108,6 +1108,7 @@ // tail call. This will cause the optimizers to attempt to move, or // duplicate return instructions to help enable tail call optimizations. bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool hasBitPreservingFPLogic(EVT VT) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; }; // end class PPCTargetLowering Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -14009,6 +14009,15 @@ return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); } +bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { + if (!Subtarget.hasVSX()) + return false; + if (Subtarget.hasP9Vector() && VT == MVT::f128) + return true; + return VT == MVT::f32 || VT == MVT::f64 || + VT == MVT::v4f32 || VT == MVT::v2f64; +} + bool PPCTargetLowering:: isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { const Value *Mask = AndI.getOperand(1); Index: test/CodeGen/PowerPC/float-logic-ops.ll =================================================================== --- test/CodeGen/PowerPC/float-logic-ops.ll +++ test/CodeGen/PowerPC/float-logic-ops.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unkknown-unknown \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s + +define float @absf(float %a) { +; CHECK-LABEL: absf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fabs 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast float %a to i32 + %and = and i32 %conv, 2147483647 + %conv1 = bitcast i32 %and to float + ret float %conv1 +} + +define double @absd(double %a) { +; CHECK-LABEL: absd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsabsdp 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast double %a to i64 + %and = and i64 %conv, 9223372036854775807 + %conv1 = bitcast i64 %and to double + ret double %conv1 +} + +define <4 x float> @absv4f32(<4 x float> %a) { +; CHECK-LABEL: absv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabssp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <4 x float> %a to <4 x i32> + %and = and <4 x i32> %conv, + %conv1 = bitcast <4 x i32> %and to <4 x float> + ret <4 x float> %conv1 +} + +define <4 x float> @absv4f32_wundef(<4 x float> %a) { +; CHECK-LABEL: absv4f32_wundef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabssp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <4 x float> %a to <4 x i32> + %and = and <4 x i32> %conv, + %conv1 = bitcast <4 x i32> %and to <4 x float> + ret <4 x float> %conv1 +} + +define <4 x float> @absv4f32_invalid(<4 x float> %a) { +; CHECK-LABEL: absv4f32_invalid: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxland 34, 34, 35 +; CHECK-NEXT: blr +entry: + %conv = bitcast <4 x float> %a to <4 x i32> + %and = and <4 x i32> %conv, + %conv1 = bitcast <4 x i32> %and to <4 x float> + ret <4 x float> %conv1 +} + +define <2 x double> @absv2f64(<2 x double> %a) { +; CHECK-LABEL: absv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsdp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <2 x double> %a to <2 x i64> + %and = and <2 x i64> %conv, + %conv1 = bitcast <2 x i64> %and to <2 x double> + ret <2 x double> %conv1 +} + +define float @negf(float %a) { +; CHECK-LABEL: negf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fneg 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast float %a to i32 + %and = xor i32 %conv, -2147483648 + %conv1 = bitcast i32 %and to float + ret float %conv1 +} + +define double @negd(double %a) { +; CHECK-LABEL: negd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnegdp 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast double %a to i64 + %and = xor i64 %conv, -9223372036854775808 + %conv1 = bitcast i64 %and to double + ret double %conv1 +} + +define <4 x float> @negv4f32(<4 x float> %a) { +; CHECK-LABEL: negv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnegsp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <4 x float> %a to <4 x i32> + %and = xor <4 x i32> %conv, + %conv1 = bitcast <4 x i32> %and to <4 x float> + ret <4 x float> %conv1 +} + +define <2 x double> @negv2d64(<2 x double> %a) { +; CHECK-LABEL: negv2d64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnegdp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <2 x double> %a to <2 x i64> + %and = xor <2 x i64> %conv, + %conv1 = bitcast <2 x i64> %and to <2 x double> + ret <2 x double> %conv1 +} + +define float @nabsf(float %a) { +; CHECK-LABEL: nabsf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fnabs 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast float %a to i32 + %and = or i32 %conv, -2147483648 + %conv1 = bitcast i32 %and to float + ret float %conv1 +} + +define double @nabsd(double %a) { +; CHECK-LABEL: nabsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnabsdp 1, 1 +; CHECK-NEXT: blr +entry: + %conv = bitcast double %a to i64 + %and = or i64 %conv, -9223372036854775808 + %conv1 = bitcast i64 %and to double + ret double %conv1 +} + +define <4 x float> @nabsv4f32(<4 x float> %a) { +; CHECK-LABEL: nabsv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnabssp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <4 x float> %a to <4 x i32> + %and = or <4 x i32> %conv, + %conv1 = bitcast <4 x i32> %and to <4 x float> + ret <4 x float> %conv1 +} + +define <2 x double> @nabsv2d64(<2 x double> %a) { +; CHECK-LABEL: nabsv2d64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnabsdp 34, 34 +; CHECK-NEXT: blr +entry: + %conv = bitcast <2 x double> %a to <2 x i64> + %and = or <2 x i64> %conv, + %conv1 = bitcast <2 x i64> %and to <2 x double> + ret <2 x double> %conv1 +} +