This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Prevent PerformVCVTCombine from combining a vmul/vcvt with 8 lanes
ClosedPublic

Authored by bsmith on Dec 15 2014, 6:57 AM.

Download Raw Diff

Details

Reviewers

Summary

This patch addresses the crash raised in PR21572. The problem occurs when PerformVCVTCombine tries to combine vmul and vcvt with 8 lane vectors. This results in use of v8i32 types which the intrinsic used does not support. This patch prevents this crash by bailing out when >4 lanes are used.

Regards,
Bradley Smith

Diff Detail

Repository: rL LLVM

Event Timeline

bsmith updated this revision to Diff 17281.Dec 15 2014, 6:57 AM

bsmith retitled this revision from to [ARM] Prevent PerformVCVTCombine from combining a vmul/vcvt with 8 lanes.

bsmith updated this object.

bsmith edited the test plan for this revision. (Show Details)

bsmith set the repository for this revision to rL LLVM.

bsmith added a subscriber: Unknown Object (MLST).

Herald added a subscriber: aemerson. · View Herald TranscriptDec 15 2014, 6:57 AM

Hi Bradley,

It's a simple and clear fix. LGTM.

Thanks,
-Hao

• HaoLiu accepted this revision.Dec 15 2014, 5:35 PM

• HaoLiu added a reviewer: • HaoLiu.

This revision is now accepted and ready to land.Dec 15 2014, 5:35 PM

Hi Bradley,

AArch64 backend also has such instructions (It even support double type), but it doesn't have such combine function to do such optimization. I think maybe we can also port this function to AArch64 backend.

Thanks,
-Hao

bsmith closed this revision.Dec 16 2014, 3:00 AM

Revision Contents

Path

Size

lib/

Target/

ARM/

ARMISelLowering.cpp

8 lines

test/

CodeGen/

ARM/

isel-v8i32-crash.ll

26 lines

Diff 17281

lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,347 Lines • ▼ Show 20 Lines	static SDValue PerformVCVTCombine(SDNode *N,
bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;		bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;

if (ConstVec.getOpcode() != ISD::BUILD_VECTOR \|\|		if (ConstVec.getOpcode() != ISD::BUILD_VECTOR \|\|
!isConstVecPow2(ConstVec, isSigned, C))		!isConstVecPow2(ConstVec, isSigned, C))
return SDValue();		return SDValue();

MVT FloatTy = Op.getSimpleValueType().getVectorElementType();		MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();		MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
if (FloatTy.getSizeInBits() != 32 \|\| IntTy.getSizeInBits() > 32) {		unsigned NumLanes = Op.getValueType().getVectorNumElements();
		if (FloatTy.getSizeInBits() != 32 \|\| IntTy.getSizeInBits() > 32 \|\|
		NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle		// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would		// smaller integers by generating an extra truncate, but larger ones would
// be lossy.		// be lossy. We also can't handle more then 4 lanes, since these intructions
		// only support v2i32/v4i32 types.
return SDValue();		return SDValue();
}		}

unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :		unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;		Intrinsic::arm_neon_vcvtfp2fxu;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),		SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,		NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,		DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
DAG.getConstant(Log2_64(C), MVT::i32));		DAG.getConstant(Log2_64(C), MVT::i32));

if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())		if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);		FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);

▲ Show 20 Lines • Show All 1,954 Lines • Show Last 20 Lines

test/CodeGen/ARM/isel-v8i32-crash.ll

This file was added.

				; RUN: llc < %s -mtriple=armv7-linux-gnu \| FileCheck %s

				; Check we don't crash when trying to combine:
				; (d1 = <float 8.000000e+00, float 8.000000e+00, ...>) (power of 2)
				; vmul.f32 d0, d1, d0
				; vcvt.s32.f32 d0, d0
				; into:
				; vcvt.s32.f32 d0, d0, #3
				; when we have a vector length of 8, due to use of v8i32 types.

				target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

				; CHECK: func:
				; CHECK: vcvt.s32.f32 q[[R:[0-9]]], q[[R]], #3
				define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
				entry:
				%0 = bitcast float* %pf to <8 x float>*
				%1 = load <8 x float>* %0, align 4
				%2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
				%3 = fptosi <8 x float> %2 to <8 x i16>
				%4 = bitcast i16* %pb to <8 x i16>*
				store <8 x i16> %3, <8 x i16>* %4, align 2
				ret void
				}

				attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }