Diff 201612

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Context not available.

	break;	break;
	}	}
		case Instruction::FNeg: {
		fhahnUnsubmitted Not Done Reply Inline Actions The duplication with the binary ops seems a little unfortunate to me. The widening is basically the same, except the number of arguments. What would it take to make it generic to support both binary & unary ops? If we would have a builder function CreateNAryOp(Opcode, ArrayRef<Value>) the generic version would be quite compact I think. The casts to UnaryOperator (and BinaryOperator) should not be necessary, right? For the copyIRFlags case, we only care if VecOp is an instruction? fhahn:* The duplication with the binary ops seems a little unfortunate to me. The widening is basically…
		// Just widen unary ops.
		auto *UnOp = cast<UnaryOperator>(&I);
		setDebugLocFromInst(Builder, UnOp);

		for (unsigned Part = 0; Part < UF; ++Part) {
		Value *A = getOrCreateVectorValue(UnOp->getOperand(0), Part);
		Value *V = Builder.CreateUnOp(UnOp->getOpcode(), A);

		if (UnaryOperator *VecOp = dyn_cast<UnaryOperator>(V))
		VecOp->copyIRFlags(UnOp);
		cameron.mcinallyUnsubmitted Not Done Reply Inline Actions Just posted D62521: Add CreateFNegFMF(...) to the IRBuilder. That should allow you to skip the explicit FMF copy. cameron.mcinally: Just posted D62521: Add CreateFNegFMF(...) to the IRBuilder. That should allow you to skip the…
		cameron.mcinallyUnsubmitted Not Done Reply Inline Actions Oh, I now see that the BinOperator code above is the generic form too. I don't think there are any other unary operators planned, but it's probably best to keep the UnaryOperator code in line with the BinaryOperator code anyway. cameron.mcinally: Oh, I now see that the BinOperator code above is the generic form too. I don't think there are…

		// Use this vector value for all users of the original instruction.
		VectorLoopValueMap.setVectorValue(&I, Part, V);
		addMetadata(V, UnOp);
		}

		break;
		}
	case Instruction::Select: {	case Instruction::Select: {
	// Widen selects.	// Widen selects.
	// If the selector is loop invariant we can create a select	// If the selector is loop invariant we can create a select
Context not available.
	I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,	I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
	Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);	Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
	}	}
		case Instruction::FNeg: {
		SmallVector<const Value *, 4> Operands(I->operand_values());
		unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
		return N * TTI.getArithmeticInstrCost(
		I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
		TargetTransformInfo::OK_AnyValue,
		TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
		Operands);
		}
	case Instruction::Select: {	case Instruction::Select: {
	SelectInst *SI = cast<SelectInst>(I);	SelectInst *SI = cast<SelectInst>(I);
	const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());	const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
Context not available.
	case Instruction::FCmp:	case Instruction::FCmp:
	case Instruction::FDiv:	case Instruction::FDiv:
	case Instruction::FMul:	case Instruction::FMul:
		case Instruction::FNeg:
	case Instruction::FPExt:	case Instruction::FPExt:
	case Instruction::FPToSI:	case Instruction::FPToSI:
	case Instruction::FPToUI:	case Instruction::FPToUI:
Context not available.

llvm/test/Transforms/LoopVectorize/X86/fneg.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -S \| FileCheck %s
				fhahnUnsubmitted Not Done Reply Inline Actions Can you make the test independent of X86? I think you can just pass `-force-vector-width=4`, as we just want to test the widening, not anything cost-modeling related. fhahn: Can you make the test independent of X86? I think you can just pass `-force-vector-width=4`, as…

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				define void @foo(float* %a, i64 %n) {
				; CHECK-LABEL: @foo(
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[CMP8:%.]] = icmp eq i64 [[N:%.]], 0
				; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
				; CHECK: for.body.preheader:
				; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
				; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
				; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
				; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
				; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
				; CHECK: vector.body:
				; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
				; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
				; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
				; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr inbounds float, float [[A:%.*]], i64 [[TMP0]]
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr inbounds float, float [[TMP1]], i32 0
				; CHECK-NEXT: [[TMP3:%.]] = bitcast float [[TMP2]] to <4 x float>*
				; CHECK-NEXT: [[WIDE_LOAD:%.]] = load <4 x float>, <4 x float> [[TMP3]], align 4
				; CHECK-NEXT: [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
				; CHECK-NEXT: [[TMP5:%.]] = bitcast float [[TMP2]] to <4 x float>*
				; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
				; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
				; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
				; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
				; CHECK: middle.block:
				; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
				; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
				; CHECK: scalar.ph:
				; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.cond.cleanup.loopexit:
				; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
				; CHECK: for.cond.cleanup:
				; CHECK-NEXT: ret void
				; CHECK: for.body:
				; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]
				; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds float, float [[A]], i64 [[INDVARS_IV]]
				; CHECK-NEXT: [[TMP7:%.]] = load float, float [[ARRAYIDX]], align 4
				; CHECK-NEXT: [[SUB:%.*]] = fneg float [[TMP7]]
				; CHECK-NEXT: store float [[SUB]], float* [[ARRAYIDX]], align 4
				; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
				; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
				; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2
				;
				entry:
				%cmp8 = icmp eq i64 %n, 0
				br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader

				for.body.preheader: ; preds = %entry
				br label %for.body

				for.cond.cleanup.loopexit: ; preds = %for.body
				br label %for.cond.cleanup

				for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
				ret void

				for.body: ; preds = %for.body.preheader, %for.body
				%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
				%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
				%0 = load float, float* %arrayidx, align 4
				%sub = fneg float %0
				store float %sub, float* %arrayidx, align 4
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%cmp = icmp eq i64 %indvars.iv.next, %n
				br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
				}

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Add FNeg instruction support
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 201612

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/X86/fneg.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Add FNeg instruction supportClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 201612

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/X86/fneg.ll

[LoopVectorize] Add FNeg instruction support
ClosedPublic