Diff 202255

llvm/trunk/include/llvm/IR/IRBuilder.h

Show First 20 Lines • Show All 1,377 Lines • ▼ Show 20 Lines	Value *CreateUnOp(Instruction::UnaryOps Opc,
if (auto *VC = dyn_cast<Constant>(V))		if (auto *VC = dyn_cast<Constant>(V))
return Insert(Folder.CreateUnOp(Opc, VC), Name);		return Insert(Folder.CreateUnOp(Opc, VC), Name);
Instruction *UnOp = UnaryOperator::Create(Opc, V);		Instruction *UnOp = UnaryOperator::Create(Opc, V);
if (isa<FPMathOperator>(UnOp))		if (isa<FPMathOperator>(UnOp))
UnOp = setFPAttrs(UnOp, FPMathTag, FMF);		UnOp = setFPAttrs(UnOp, FPMathTag, FMF);
return Insert(UnOp, Name);		return Insert(UnOp, Name);
}		}

		/// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
		/// Correct number of operands must be passed accordingly.
		Value CreateNAryOp(unsigned Opc, ArrayRef<Value > Ops,
		const Twine &Name = "",
		MDNode *FPMathTag = nullptr) {
		if (Instruction::isBinaryOp(Opc)) {
		assert(Ops.size() == 2 && "Invalid number of operands!");
		return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
		Ops[0], Ops[1], Name, FPMathTag);
		}
		if (Instruction::isUnaryOp(Opc)) {
		assert(Ops.size() == 1 && "Invalid number of operands!");
		return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
		Ops[0], Name, FPMathTag);
		}
		llvm_unreachable("Unexpected opcode!");
		}

//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//
// Instruction creation methods: Memory Instructions		// Instruction creation methods: Memory Instructions
//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//

AllocaInst CreateAlloca(Type Ty, unsigned AddrSpace,		AllocaInst CreateAlloca(Type Ty, unsigned AddrSpace,
Value *ArraySize = nullptr, const Twine &Name = "") {		Value *ArraySize = nullptr, const Twine &Name = "") {
return Insert(new AllocaInst(Ty, AddrSpace, ArraySize), Name);		return Insert(new AllocaInst(Ty, AddrSpace, ArraySize), Name);
}		}
▲ Show 20 Lines • Show All 985 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,963 Lines • ▼ Show 20 Lines	void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::UDiv:		case Instruction::UDiv:
case Instruction::SDiv:		case Instruction::SDiv:
case Instruction::SRem:		case Instruction::SRem:
case Instruction::URem:		case Instruction::URem:
case Instruction::Add:		case Instruction::Add:
case Instruction::FAdd:		case Instruction::FAdd:
case Instruction::Sub:		case Instruction::Sub:
case Instruction::FSub:		case Instruction::FSub:
		case Instruction::FNeg:
case Instruction::Mul:		case Instruction::Mul:
case Instruction::FMul:		case Instruction::FMul:
case Instruction::FDiv:		case Instruction::FDiv:
case Instruction::FRem:		case Instruction::FRem:
case Instruction::Shl:		case Instruction::Shl:
case Instruction::LShr:		case Instruction::LShr:
case Instruction::AShr:		case Instruction::AShr:
case Instruction::And:		case Instruction::And:
case Instruction::Or:		case Instruction::Or:
case Instruction::Xor: {		case Instruction::Xor: {
// Just widen binops.		// Just widen unops and binops.
auto *BinOp = cast<BinaryOperator>(&I);		setDebugLocFromInst(Builder, &I);
setDebugLocFromInst(Builder, BinOp);

for (unsigned Part = 0; Part < UF; ++Part) {		for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);		SmallVector<Value *, 2> Ops;
Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);		for (Value *Op : I.operands())
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);		Ops.push_back(getOrCreateVectorValue(Op, Part));

		Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);

if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))		if (auto *VecOp = dyn_cast<Instruction>(V))
VecOp->copyIRFlags(BinOp);		VecOp->copyIRFlags(&I);

// Use this vector value for all users of the original instruction.		// Use this vector value for all users of the original instruction.
VectorLoopValueMap.setVectorValue(&I, Part, V);		VectorLoopValueMap.setVectorValue(&I, Part, V);
addMetadata(V, BinOp);		addMetadata(V, &I);
}		}

break;		break;
}		}
case Instruction::Select: {		case Instruction::Select: {
// Widen selects.		// Widen selects.
// If the selector is loop invariant we can create a select		// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.		// instruction with a scalar condition. Otherwise, use vector-select.
▲ Show 20 Lines • Show All 1,950 Lines • ▼ Show 20 Lines	if (Op2VK == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
Op2VK = TargetTransformInfo::OK_UniformValue;		Op2VK = TargetTransformInfo::OK_UniformValue;

SmallVector<const Value *, 4> Operands(I->operand_values());		SmallVector<const Value *, 4> Operands(I->operand_values());
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;		unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
return N * TTI.getArithmeticInstrCost(		return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,		I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);		Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
}		}
		case Instruction::FNeg: {
		unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
		return N * TTI.getArithmeticInstrCost(
		I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
		TargetTransformInfo::OK_AnyValue,
		TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
		I->getOperand(0));
		}
case Instruction::Select: {		case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);		SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());		const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));		bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();		Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)		if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);		CondTy = VectorType::get(CondTy, VF);

▲ Show 20 Lines • Show All 613 Lines • ▼ Show 20 Lines	auto IsVectorizableOpcode = [](unsigned Opcode) {
case Instruction::AShr:		case Instruction::AShr:
case Instruction::BitCast:		case Instruction::BitCast:
case Instruction::Br:		case Instruction::Br:
case Instruction::Call:		case Instruction::Call:
case Instruction::FAdd:		case Instruction::FAdd:
case Instruction::FCmp:		case Instruction::FCmp:
case Instruction::FDiv:		case Instruction::FDiv:
case Instruction::FMul:		case Instruction::FMul:
		case Instruction::FNeg:
case Instruction::FPExt:		case Instruction::FPExt:
case Instruction::FPToSI:		case Instruction::FPToSI:
case Instruction::FPToUI:		case Instruction::FPToUI:
case Instruction::FPTrunc:		case Instruction::FPTrunc:
case Instruction::FRem:		case Instruction::FRem:
case Instruction::FSub:		case Instruction::FSub:
case Instruction::GetElementPtr:		case Instruction::GetElementPtr:
case Instruction::ICmp:		case Instruction::ICmp:
▲ Show 20 Lines • Show All 1,055 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/LoopVectorize/X86/fneg-cost.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt %s -loop-vectorize -debug-only=loop-vectorize -S 2>&1 \| FileCheck %s			; RUN: opt %s -loop-vectorize -debug-only=loop-vectorize -S 2>&1 \| FileCheck %s
	; REQUIRES: asserts			; REQUIRES: asserts

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
	target triple = "x86_64-apple-macosx10.8.0"			target triple = "x86_64-apple-macosx10.8.0"

	; CHECK: Found an estimated cost of 2 for VF 1 For instruction: %neg = fneg float %{{.*}}			; CHECK: Found an estimated cost of 4 for VF 1 For instruction: %neg = fneg float %{{.*}}
	; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %neg = fneg float %{{.*}}			; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %neg = fneg float %{{.*}}
	; CHECK: Found an estimated cost of 14 for VF 4 For instruction: %neg = fneg float %{{.*}}			; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %neg = fneg float %{{.*}}
	define void @fneg_cost(float* %a, i64 %n) {			define void @fneg_cost(float* %a, i64 %n) {
	entry:			entry:
	br label %for.body			br label %for.body
	for.body: ; preds = %for.body.preheader, %for.body			for.body: ; preds = %for.body.preheader, %for.body
	%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]			%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
	%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv			%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
	%0 = load float, float* %arrayidx, align 4			%0 = load float, float* %arrayidx, align 4
	%neg = fneg float %0			%neg = fneg float %0
	store float %neg, float* %arrayidx, align 4			store float %neg, float* %arrayidx, align 4
	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1			%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
	%cmp = icmp eq i64 %indvars.iv.next, %n			%cmp = icmp eq i64 %indvars.iv.next, %n
	br i1 %cmp, label %for.end, label %for.body			br i1 %cmp, label %for.end, label %for.body

	for.end:			for.end:
	ret void			ret void
	}			}

llvm/trunk/test/Transforms/LoopVectorize/fneg.ll

	; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \| FileCheck %s			; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \| FileCheck %s

	define void @foo(float* %a, i64 %n) {			define void @foo(float* %a, i64 %n) {
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: [[WIDE_LOAD:%.]] = load <4 x float>, <4 x float> {{.*}}, align 4			; CHECK: [[WIDE_LOAD:%.]] = load <4 x float>, <4 x float> {{.*}}, align 4
	; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0			; CHECK-NEXT: [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
	; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]			; CHECK: store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
	; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
	; CHECK-NEXT: [[TMP7:%.*]] = fneg float [[TMP6]]
	; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
	; CHECK-NEXT: [[TMP9:%.*]] = fneg float [[TMP8]]
	; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
	; CHECK-NEXT: [[TMP11:%.*]] = fneg float [[TMP10]]
	; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
	; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
	; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
	; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
	; CHECK: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.body:			for.body:
	%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]			%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
	%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv			%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
	%0 = load float, float* %arrayidx, align 4			%0 = load float, float* %arrayidx, align 4
	Show All 9 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Add FNeg instruction support
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 202255

llvm/trunk/include/llvm/IR/IRBuilder.h

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/test/Transforms/LoopVectorize/X86/fneg-cost.ll

llvm/trunk/test/Transforms/LoopVectorize/fneg.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Add FNeg instruction supportClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 202255

llvm/trunk/include/llvm/IR/IRBuilder.h

llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/trunk/test/Transforms/LoopVectorize/X86/fneg-cost.ll

llvm/trunk/test/Transforms/LoopVectorize/fneg.ll

[LoopVectorize] Add FNeg instruction support
ClosedPublic