Index: llvm/include/llvm/Analysis/ConstantFolding.h =================================================================== --- llvm/include/llvm/Analysis/ConstantFolding.h +++ llvm/include/llvm/Analysis/ConstantFolding.h @@ -71,6 +71,12 @@ Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr); +/// Attempt to constant fold a unary operation with the specified +/// operand. If it fails, it returns a constant expression of the specified +/// operands. +Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, + const DataLayout &DL); + /// Attempt to constant fold a binary operation with the specified /// operands. If it fails, it returns a constant expression of the specified /// operands. Index: llvm/include/llvm/Analysis/InstructionSimplify.h =================================================================== --- llvm/include/llvm/Analysis/InstructionSimplify.h +++ llvm/include/llvm/Analysis/InstructionSimplify.h @@ -125,6 +125,10 @@ Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); +/// Given operand for an FNeg, fold the result or return null. +Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q); + /// Given operands for an FAdd, fold the result or return null. Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); @@ -227,6 +231,15 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operand for a UnaryOperator, fold the result or return null. +Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); + +/// Given operand for an FP UnaryOperator, fold the result or return null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q); + /// Given operands for a BinaryOperator, fold the result or return null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q); Index: llvm/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/include/llvm/IR/PatternMatch.h +++ llvm/include/llvm/IR/PatternMatch.h @@ -667,18 +667,26 @@ FNeg_match(const Op_t &Op) : X(Op) {} template bool match(OpTy *V) { auto *FPMO = dyn_cast(V); - if (!FPMO || FPMO->getOpcode() != Instruction::FSub) - return false; - if (FPMO->hasNoSignedZeros()) { - // With 'nsz', any zero goes. - if (!cstfp_pred_ty().match(FPMO->getOperand(0))) - return false; - } else { - // Without 'nsz', we need fsub -0.0, X exactly. - if (!cstfp_pred_ty().match(FPMO->getOperand(0))) - return false; + if (!FPMO) return false; + + if (FPMO->getOpcode() == Instruction::FNeg) + return X.match(FPMO->getOperand(0)); + + if (FPMO->getOpcode() == Instruction::FSub) { + if (FPMO->hasNoSignedZeros()) { + // With 'nsz', any zero goes. + if (!cstfp_pred_ty().match(FPMO->getOperand(0))) + return false; + } else { + // Without 'nsz', we need fsub -0.0, X exactly. + if (!cstfp_pred_ty().match(FPMO->getOperand(0))) + return false; + } + + return X.match(FPMO->getOperand(1)); } - return X.match(FPMO->getOperand(1)); + + return false; } }; Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1262,6 +1262,13 @@ return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } +Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, + const DataLayout &DL) { + assert(Instruction::isUnaryOp(Opcode)); + + return ConstantExpr::get(Opcode, Op); +} + Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL) { Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -51,6 +51,9 @@ STATISTIC(NumReassoc, "Number of reassociations"); static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyFPUnOp(unsigned, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, @@ -4245,6 +4248,37 @@ return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +static Constant *foldConstant(Instruction::UnaryOps Opcode, + Value *&Op, const SimplifyQuery &Q) { + if (auto *C = dyn_cast(Op)) + return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL); + return nullptr; +} + +/// Given the operand for an FNeg, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldConstant(Instruction::FNeg, Op, Q)) + return C; + + // fneg undef ==> undef + if (isa(Op)) + return Op; + + Value *X; + // fneg (fneg X) ==> X + if (match(Op, m_FNeg(m_Value(X)))) + return X; + + return nullptr; +} + +Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFNegInst(Op, FMF, Q, RecursionLimit); +} + static Constant *propagateNaN(Constant *In) { // If the input is a vector with undef elements, just return a default NaN. if (!In->isNaN()) @@ -4472,6 +4506,37 @@ //=== Helper functions for higher up the class hierarchy. +/// Given the operand for a UnaryOperator, see if we can fold the result. +/// If not, this returns null. +/// TODO: Stubbed out for future non-FP UnaryOperators. +static Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, + unsigned MaxRecurse) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode"); + } +} + +/// Given the operand for a UnaryOperator, see if we can fold the result. +/// If not, this returns null. +/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +static Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, + const FastMathFlags &FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::FNeg: + return SimplifyFNegInst(Op, FMF, Q, MaxRecurse); + default: + return SimplifyUnOp(Opcode, Op, Q, MaxRecurse); + } +} + +Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); +} + /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, @@ -4959,6 +5024,9 @@ default: Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; + case Instruction::FNeg: + Result = SimplifyFNegInst(I->getOperand(0), I->getFastMathFlags(), Q); + break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); Index: llvm/lib/IR/ConstantFold.h =================================================================== --- llvm/lib/IR/ConstantFold.h +++ llvm/lib/IR/ConstantFold.h @@ -43,6 +43,7 @@ ArrayRef Idxs); Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef Idxs); + Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V); Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2); Constant *ConstantFoldCompareInstruction(unsigned short predicate, Index: llvm/lib/IR/ConstantFold.cpp =================================================================== --- llvm/lib/IR/ConstantFold.cpp +++ llvm/lib/IR/ConstantFold.cpp @@ -918,6 +918,52 @@ return ConstantVector::get(Result); } +Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) { + assert(Instruction::isUnaryOp(Opcode) && "Non-unary instruction detected"); + + // Handle scalar UndefValue. Vectors are always evaluated per element. + bool HasScalarUndef = !C->getType()->isVectorTy() && isa(C); + + if (HasScalarUndef) { + switch (static_cast(Opcode)) { + case Instruction::FNeg: + return C; // -undef -> undef + case Instruction::UnaryOpsEnd: + llvm_unreachable("Invalid UnaryOp"); + } + } + + // Constant should not be UndefValue, unless these are vector constants. + assert(!HasScalarUndef && "Unexpected UndefValue"); + // We only have FP UnaryOps right now. + assert(!isa(C) && "Unexpected Integer UnaryOp"); + + if (ConstantFP *CFP = dyn_cast(C)) { + const APFloat &CV = CFP->getValueAPF(); + switch (Opcode) { + default: + break; + case Instruction::FNeg: + return ConstantFP::get(C->getContext(), neg(CV)); + } + } else if (VectorType *VTy = dyn_cast(C->getType())) { + // Fold each element and create a vector constant from those constants. + SmallVector Result; + Type *Ty = IntegerType::get(VTy->getContext(), 32); + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *ExtractIdx = ConstantInt::get(Ty, i); + Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx); + + Result.push_back(ConstantExpr::get(Opcode, Elt)); + } + + return ConstantVector::get(Result); + } + + // We don't know how to fold this. + return nullptr; +} + Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *C2) { assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected"); Index: llvm/lib/IR/Constants.cpp =================================================================== --- llvm/lib/IR/Constants.cpp +++ llvm/lib/IR/Constants.cpp @@ -1830,7 +1830,8 @@ } #endif - // TODO: Try to constant fold operation. + if (Constant *FC = ConstantFoldUnaryInstruction(Opcode, C)) + return FC; // Fold a few common cases. if (OnlyIfReducedTy == C->getType()) return nullptr; @@ -2235,7 +2236,7 @@ Constant *ConstantExpr::getFNeg(Constant *C) { assert(C->getType()->isFPOrFPVectorTy() && "Cannot FNEG a non-floating-point value!"); - return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C); + return get(Instruction::FNeg, C); } Constant *ConstantExpr::getNot(Constant *C) { @@ -3025,6 +3026,8 @@ return CmpInst::Create((Instruction::OtherOps)getOpcode(), (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]); + case Instruction::FNeg: + return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]); default: assert(getNumOperands() == 2 && "Must be binary operator?"); BinaryOperator *BO = Index: llvm/test/Analysis/ConstantFolding/fneg.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ConstantFolding/fneg.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +; fneg constant folding + +@c = constant i32 0 + +define float @fneg_constant() { +; CHECK-LABEL: @fneg_constant( +; CHECK-NEXT: ret float 0.000000e+00 +; + %r = fneg float -0.0 + ret float %r +} + +define float @fneg_undef() { +; CHECK-LABEL: @fneg_undef( +; CHECK-NEXT: ret float undef +; + %r = fneg float undef + ret float %r +} + +define float @fneg_fneg_var(float %a) { +; CHECK-LABEL: @fneg_fneg_var( +; CHECK-NEXT: ret float [[A:%.*]] +; + %r = fneg float %a + %r1 = fneg float %r + ret float %r1 +} + +define i32 @fneg_bitcast_constant() { +; CHECK-LABEL: @fneg_bitcast_constant( +; CHECK-NEXT: ret i32 -2147483648 +; + %bc = bitcast i32* @c to float* + %l = load float, float* %bc + %neg = fneg float %l + %r = bitcast float %neg to i32 + ret i32 %r +} + +define <4 x float> @fneg_constant_elts_v4f32() { +; CHECK-LABEL: @fneg_constant_elts_v4f32( +; CHECK-NEXT: ret <4 x float> +; + %r = fneg <4 x float> + ret <4 x float> %r +} + +define <4 x float> @fneg_constant_undef_elts_v4f32() { +; CHECK-LABEL: @fneg_constant_undef_elts_v4f32( +; CHECK-NEXT: ret <4 x float> +; + %r = fneg <4 x float> + ret <4 x float> %r +} + +define <4 x float> @fneg_constant_all_undef_elts_v4f32() { +; CHECK-LABEL: @fneg_constant_all_undef_elts_v4f32( +; CHECK-NEXT: ret <4 x float> undef +; + %r = fneg <4 x float> + ret <4 x float> %r +} Index: llvm/test/CodeGen/X86/fp-fold.ll =================================================================== --- llvm/test/CodeGen/X86/fp-fold.ll +++ llvm/test/CodeGen/X86/fp-fold.ll @@ -223,3 +223,20 @@ %r = fmul reassoc float %mul, 4.0 ret float %r } + +define float @fneg_undef() { +; ANY-LABEL: fneg_undef: +; ANY: # %bb.0: +; ANY-NEXT: retq + %r = fneg float undef + ret float %r +} + +define float @fneg_zero() { +; ANY-LABEL: fneg_zero: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq + %r = fneg float 0.0 + ret float %r +} Index: llvm/test/CodeGen/X86/vec_fneg.ll =================================================================== --- llvm/test/CodeGen/X86/vec_fneg.ll +++ llvm/test/CodeGen/X86/vec_fneg.ll @@ -141,4 +141,56 @@ ret <4 x float> %tmp } +; fneg constant folding +define <4 x float> @fneg_constant_elts_v4f32() { +; X32-SSE-LABEL: fneg_constant_elts_v4f32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,1.0E+0,-1.0E+0] +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: fneg_constant_elts_v4f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,1.0E+0,-1.0E+0] +; X64-SSE-NEXT: retq + %r = fneg <4 x float> + ret <4 x float> %r +} + +define <4 x float> @fneg_constant_undef_elts_v4f32() { +; X32-SSE-LABEL: fneg_constant_undef_elts_v4f32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = <-0.0E+0,u,u,1.0E+0> +; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: fneg_constant_undef_elts_v4f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = <-0.0E+0,u,u,1.0E+0> +; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq + %r = fneg <4 x float> + ret <4 x float> %r +} + +define <4 x float> @fneg_constant_all_undef_elts_v4f32() { +; X32-SSE1-LABEL: fneg_constant_all_undef_elts_v4f32: +; X32-SSE1: # %bb.0: +; X32-SSE1-NEXT: xorps {{\.LCPI.*}}, %xmm0 +; X32-SSE1-NEXT: retl +; +; X32-SSE2-LABEL: fneg_constant_all_undef_elts_v4f32: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: retl +; +; X64-SSE1-LABEL: fneg_constant_all_undef_elts_v4f32: +; X64-SSE1: # %bb.0: +; X64-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0 +; X64-SSE1-NEXT: retq +; +; X64-SSE2-LABEL: fneg_constant_all_undef_elts_v4f32: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: retq + %r = fneg <4 x float> + ret <4 x float> %r +} Index: llvm/test/Transforms/InstCombine/fneg.ll =================================================================== --- llvm/test/Transforms/InstCombine/fneg.ll +++ llvm/test/Transforms/InstCombine/fneg.ll @@ -46,7 +46,7 @@ define <4 x double> @fmul_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fmul_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], ; CHECK-NEXT: ret <4 x double> [[R]] ; %m = fmul <4 x double> %x, @@ -97,7 +97,7 @@ define <4 x double> @fdiv_op1_constant_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op1_constant_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], ; CHECK-NEXT: ret <4 x double> [[R]] ; %d = fdiv <4 x double> %x, @@ -148,7 +148,7 @@ define <4 x double> @fdiv_op0_constant_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op0_constant_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> , [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; %d = fdiv <4 x double> , %x Index: llvm/test/Transforms/InstCombine/fsub.ll =================================================================== --- llvm/test/Transforms/InstCombine/fsub.ll +++ llvm/test/Transforms/InstCombine/fsub.ll @@ -64,8 +64,8 @@ define float @sub_add_neg_x(float %x, float %y) { ; CHECK-LABEL: @sub_add_neg_x( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], -5.000000e+00 -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz float [[X:%.*]], -5.000000e+00 +; CHECK-NEXT: ret float [[R]] ; %mul = fmul float %x, 5.000000e+00 %add = fadd float %mul, %y @@ -121,7 +121,7 @@ define <2 x float> @constant_op1_vec_undef(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @constant_op1_vec_undef( -; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[X:%.*]], ; CHECK-NEXT: ret <2 x float> [[R]] ; %r = fsub <2 x float> %x, Index: llvm/test/Transforms/InstCombine/inselt-binop.ll =================================================================== --- llvm/test/Transforms/InstCombine/inselt-binop.ll +++ llvm/test/Transforms/InstCombine/inselt-binop.ll @@ -504,7 +504,7 @@ define <2 x float> @fsub_constant_op1(float %x) { ; CHECK-LABEL: @fsub_constant_op1( ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], +; CHECK-NEXT: [[BO:%.*]] = fadd <2 x float> [[INS]], ; CHECK-NEXT: ret <2 x float> [[BO]] ; %ins = insertelement <2 x float> undef, float %x, i32 1 Index: llvm/test/Transforms/Reassociate/crash2.ll =================================================================== --- llvm/test/Transforms/Reassociate/crash2.ll +++ llvm/test/Transforms/Reassociate/crash2.ll @@ -7,7 +7,7 @@ define float @undef1() { ; CHECK-LABEL: @undef1( -; CHECK-NEXT: ret float fadd (float bitcast (i32 ptrtoint (i32* @g to i32) to float), float fadd (float bitcast (i32 ptrtoint (i32* @g to i32) to float), float fadd (float fsub (float -0.000000e+00, float bitcast (i32 ptrtoint (i32* @g to i32) to float)), float fsub (float -0.000000e+00, float bitcast (i32 ptrtoint (i32* @g to i32) to float))))) +; CHECK-NEXT: ret float fadd (float bitcast (i32 ptrtoint (i32* @g to i32) to float), float fadd (float bitcast (i32 ptrtoint (i32* @g to i32) to float), float fadd (float fneg (float bitcast (i32 ptrtoint (i32* @g to i32) to float)), float fneg (float bitcast (i32 ptrtoint (i32* @g to i32) to float))))) ; %t0 = fadd fast float bitcast (i32 ptrtoint (i32* @g to i32) to float), bitcast (i32 ptrtoint (i32* @g to i32) to float) %t1 = fsub fast float bitcast (i32 ptrtoint (i32* @g to i32) to float), %t0 Index: llvm/unittests/IR/ConstantsTest.cpp =================================================================== --- llvm/unittests/IR/ConstantsTest.cpp +++ llvm/unittests/IR/ConstantsTest.cpp @@ -229,7 +229,7 @@ #define P6STR "bitcast (i32 ptrtoint (i32** @dummy2 to i32) to <2 x i16>)" CHECK(ConstantExpr::getNeg(P0), "sub i32 0, " P0STR); - CHECK(ConstantExpr::getFNeg(P1), "fsub float -0.000000e+00, " P1STR); + CHECK(ConstantExpr::getFNeg(P1), "fneg float " P1STR); CHECK(ConstantExpr::getNot(P0), "xor i32 " P0STR ", -1"); CHECK(ConstantExpr::getAdd(P0, P0), "add i32 " P0STR ", " P0STR); CHECK(ConstantExpr::getAdd(P0, P0, false, true), "add nsw i32 " P0STR ", "