This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Explicitly widen non-power-of-2 vector HFA base types.
ClosedPublic

Authored by ab on Apr 11 2016, 5:11 PM.

Download Raw Diff

Details

Reviewers

t.p.northover
rjmccall
uweigand

Commits

rG40a34c2e2af8: [CodeGen] Widen non-power-of-2 vector HFA base types.
rC266784: [CodeGen] Widen non-power-of-2 vector HFA base types.
rL266784: [CodeGen] Widen non-power-of-2 vector HFA base types.

Summary

Currently, for the ppc64--gnu and aarch64 ABIs, we recognize:

typedef __attribute__((__ext_vector_type__(3))) float v3f32;
typedef __attribute__((__ext_vector_type__(16))) char v16i8;

struct HFA {
  v3f32 a;
  v16i8 b;
};

as an HFA. Since the base type is the first type encountered, we pass the HFA as:

[2 x <3 x float>]

Which leads to incorrect IR (relying on padding values) when the second field is used.

This patch teaches isHomogeneousAggregate to explicitly uses the widened vector (after size rounding).

I don't know if this approach is either sufficient or ideal; alternatives I considered:

pass as [2 x <3 x float>], but bitcast the struct pointer itself to [2 x <4 x float>]*; this still relies on accessing padding.
return an llvm::Type (instead of QualType) in isHomogeneousAggregate: there are a couple callers that rely on size/alignment info which should come from the clang::Type.
reject non-power-of-2 vectors as HFA base types: this doesn't make sense, as we treat them everywhere else as the next-power-of-2 type.
fixup the base type in the various isHomogeneousAggregate callers: repetitive changes all over the place.

Diff Detail

Repository: rL LLVM

Event Timeline

ab updated this revision to Diff 53336.Apr 11 2016, 5:11 PM

ab retitled this revision from to [CodeGen] Explicitly widen non-power-of-2 vector HFA base types..

ab updated this object.

ab added reviewers: rjmccall, uweigand, t.p.northover.

ab added a subscriber: cfe-commits.

Herald added a subscriber: aemerson. · View Herald TranscriptApr 11 2016, 5:11 PM

Makes sense to me.

Thanks Ulrich. Tim?

Sorry, I thought I'd already commented here. It looked good to me too.

Tim.

This revision is now accepted and ready to land.Apr 19 2016, 10:39 AM

Closed by commit rL266784: [CodeGen] Widen non-power-of-2 vector HFA base types. (authored by ab). · Explain WhyApr 19 2016, 11:00 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

cfe/

trunk/

lib/

CodeGen/

TargetInfo.cpp

13 lines

test/

CodeGen/

aarch64-arguments-hfa-v3.c

20 lines

arm64-arguments.c

31 lines

ppc64le-aggregates.c

58 lines

Diff 54224

cfe/trunk/lib/CodeGen/TargetInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,114 Lines • ▼ Show 20 Lines	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
// Most ABIs only support float, double, and some vector type widths.		// Most ABIs only support float, double, and some vector type widths.
if (!isHomogeneousAggregateBaseType(Ty))		if (!isHomogeneousAggregateBaseType(Ty))
return false;		return false;

// The base type must be the same for all members. Types that		// The base type must be the same for all members. Types that
// agree in both total size and mode (float vs. vector) are		// agree in both total size and mode (float vs. vector) are
// treated as being equivalent here.		// treated as being equivalent here.
const Type *TyPtr = Ty.getTypePtr();		const Type *TyPtr = Ty.getTypePtr();
if (!Base)		if (!Base) {
Base = TyPtr;		Base = TyPtr;
		// If it's a non-power-of-2 vector, its size is already a power-of-2,
		// so make sure to widen it explicitly.
		if (const VectorType *VT = Base->getAs<VectorType>()) {
		QualType EltTy = VT->getElementType();
		unsigned NumElements =
		getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
		Base = getContext()
		.getVectorType(EltTy, NumElements, VT->getVectorKind())
		.getTypePtr();
		}
		}

if (Base->isVectorType() != TyPtr->isVectorType() \|\|		if (Base->isVectorType() != TyPtr->isVectorType() \|\|
getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))		getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
return false;		return false;
}		}
return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);		return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
}		}

▲ Show 20 Lines • Show All 3,769 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGen/aarch64-arguments-hfa-v3.c

				// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon -target-abi darwinpcs -fallow-half-arguments-and-returns -emit-llvm -o - %s \| FileCheck %s

				typedef __attribute__((__ext_vector_type__(16))) signed char int8x16_t;
				typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;

				// CHECK: %struct.HFAv3 = type { [4 x <3 x float>] }
				typedef struct { float32x3_t arr[4]; } HFAv3;

				// CHECK: %struct.MixedHFAv3 = type { [3 x <3 x float>], <16 x i8> }
				typedef struct { float32x3_t arr[3]; int8x16_t b; } MixedHFAv3;

				// CHECK: define %struct.HFAv3 @test([4 x <4 x float>] %{{.}}, [4 x <4 x float>] %{{.}}, [4 x <4 x float>] %{{.*}})
				HFAv3 test(HFAv3 a0, HFAv3 a1, HFAv3 a2) {
				return a2;
				}

				// CHECK: define %struct.MixedHFAv3 @test_mixed([4 x <4 x float>] %{{.}}, [4 x <4 x float>] %{{.}}, [4 x <4 x float>] %{{.*}})
				MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2) {
				return a2;
				}

cfe/trunk/test/CodeGen/arm64-arguments.c

	Show First 20 Lines • Show All 708 Lines • ▼ Show 20 Lines

	// CHECK: [[HVAPTRPTR:%.]] = bitcast i8 [[CURLIST]] to %struct.TooBigHVA**			// CHECK: [[HVAPTRPTR:%.]] = bitcast i8 [[CURLIST]] to %struct.TooBigHVA**
	// CHECK: [[HVAPTR:%.]] = load %struct.TooBigHVA, %struct.TooBigHVA** [[HVAPTRPTR]]			// CHECK: [[HVAPTR:%.]] = load %struct.TooBigHVA, %struct.TooBigHVA** [[HVAPTRPTR]]
	__builtin_va_list thelist;			__builtin_va_list thelist;
	__builtin_va_start(thelist, n);			__builtin_va_start(thelist, n);
	struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);			struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
	return h.d;			return h.d;
	}			}

				typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;
				typedef struct { float32x3_t arr[4]; } HFAv3;

				float32x3_t test_hva_v3(int n, ...) {
				// CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...)
				// CHECK: [[THELIST:%.]] = alloca i8
				// CHECK: [[CURLIST:%.]] = load i8, i8** [[THELIST]]

				// HVA is not indirect, so occupies its full 16 bytes on the stack. but it
				// must be properly aligned.
				// CHECK: [[ALIGN0:%.]] = ptrtoint i8 [[CURLIST]] to i64
				// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
				// CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
				// CHECK: [[ALIGNED_LIST:%.]] = inttoptr i64 [[ALIGN2]] to i8

				// CHECK: [[NEXTLIST:%.]] = getelementptr inbounds i8, i8 [[ALIGNED_LIST]], i64 64
				// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]

				// CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3*
				__builtin_va_list l;
				__builtin_va_start(l, n);
				HFAv3 r = __builtin_va_arg(l, HFAv3);
				return r.arr[2];
				}

				float32x3_t test_hva_v3_call(HFAv3 *a) {
				// CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a)
				// CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}})
				return test_hva_v3(1, *a);
				}

cfe/trunk/test/CodeGen/ppc64le-aggregates.c

	Show First 20 Lines • Show All 249 Lines • ▼ Show 20 Lines
	struct v3f6 { float3 v[6]; };			struct v3f6 { float3 v[6]; };
	struct v3f7 { float3 v[7]; };			struct v3f7 { float3 v[7]; };
	struct v3f8 { float3 v[8]; };			struct v3f8 { float3 v[8]; };
	struct v3f9 { float3 v[9]; };			struct v3f9 { float3 v[9]; };

	struct v3fab { float3 a; float3 b; };			struct v3fab { float3 a; float3 b; };
	struct v3fabc { float3 a; float3 b; float3 c; };			struct v3fabc { float3 a; float3 b; float3 c; };

	// CHECK: define [1 x <3 x float>] @func_v3f1(<3 x float> inreg %x.coerce)			// CHECK: define [1 x <4 x float>] @func_v3f1(<3 x float> inreg %x.coerce)
	struct v3f1 func_v3f1(struct v3f1 x) { return x; }			struct v3f1 func_v3f1(struct v3f1 x) { return x; }

	// CHECK: define [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %x.coerce)			// CHECK: define [2 x <4 x float>] @func_v3f2([2 x <4 x float>] %x.coerce)
	struct v3f2 func_v3f2(struct v3f2 x) { return x; }			struct v3f2 func_v3f2(struct v3f2 x) { return x; }

	// CHECK: define [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %x.coerce)			// CHECK: define [3 x <4 x float>] @func_v3f3([3 x <4 x float>] %x.coerce)
	struct v3f3 func_v3f3(struct v3f3 x) { return x; }			struct v3f3 func_v3f3(struct v3f3 x) { return x; }

	// CHECK: define [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %x.coerce)			// CHECK: define [4 x <4 x float>] @func_v3f4([4 x <4 x float>] %x.coerce)
	struct v3f4 func_v3f4(struct v3f4 x) { return x; }			struct v3f4 func_v3f4(struct v3f4 x) { return x; }

	// CHECK: define [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %x.coerce)			// CHECK: define [5 x <4 x float>] @func_v3f5([5 x <4 x float>] %x.coerce)
	struct v3f5 func_v3f5(struct v3f5 x) { return x; }			struct v3f5 func_v3f5(struct v3f5 x) { return x; }

	// CHECK: define [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %x.coerce)			// CHECK: define [6 x <4 x float>] @func_v3f6([6 x <4 x float>] %x.coerce)
	struct v3f6 func_v3f6(struct v3f6 x) { return x; }			struct v3f6 func_v3f6(struct v3f6 x) { return x; }

	// CHECK: define [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %x.coerce)			// CHECK: define [7 x <4 x float>] @func_v3f7([7 x <4 x float>] %x.coerce)
	struct v3f7 func_v3f7(struct v3f7 x) { return x; }			struct v3f7 func_v3f7(struct v3f7 x) { return x; }

	// CHECK: define [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %x.coerce)			// CHECK: define [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %x.coerce)
	struct v3f8 func_v3f8(struct v3f8 x) { return x; }			struct v3f8 func_v3f8(struct v3f8 x) { return x; }

	// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x)			// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x)
	struct v3f9 func_v3f9(struct v3f9 x) { return x; }			struct v3f9 func_v3f9(struct v3f9 x) { return x; }

	// CHECK: define [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %x.coerce)			// CHECK: define [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %x.coerce)
	struct v3fab func_v3fab(struct v3fab x) { return x; }			struct v3fab func_v3fab(struct v3fab x) { return x; }

	// CHECK: define [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %x.coerce)			// CHECK: define [3 x <4 x float>] @func_v3fabc([3 x <4 x float>] %x.coerce)
	struct v3fabc func_v3fabc(struct v3fabc x) { return x; }			struct v3fabc func_v3fabc(struct v3fabc x) { return x; }

	// CHECK-LABEL: @call_v3f1			// CHECK-LABEL: @call_v3f1
	// CHECK: %[[TMP:[^ ]+]] = load <3 x float>, <3 x float>* getelementptr inbounds (%struct.v3f1, %struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load <3 x float>, <3 x float>* getelementptr inbounds (%struct.v3f1, %struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1
	// CHECK: call [1 x <3 x float>] @func_v3f1(<3 x float> inreg %[[TMP]])			// CHECK: call [1 x <4 x float>] @func_v3f1(<3 x float> inreg %[[TMP]])
	struct v3f1 global_v3f1;			struct v3f1 global_v3f1;
	void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); }			void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); }

	// CHECK-LABEL: @call_v3f2			// CHECK-LABEL: @call_v3f2
	// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2, %struct.v3f2* @global_v3f2, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x float>], [2 x <4 x float>]* bitcast (%struct.v3f2* @global_v3f2 to [2 x <4 x float>]*), align 16
	// CHECK: call [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %[[TMP]])			// CHECK: call [2 x <4 x float>] @func_v3f2([2 x <4 x float>] %[[TMP]])
	struct v3f2 global_v3f2;			struct v3f2 global_v3f2;
	void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); }			void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); }

	// CHECK-LABEL: @call_v3f3			// CHECK-LABEL: @call_v3f3
	// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3, %struct.v3f3* @global_v3f3, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x float>], [3 x <4 x float>]* bitcast (%struct.v3f3* @global_v3f3 to [3 x <4 x float>]*), align 16
	// CHECK: call [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %[[TMP]])			// CHECK: call [3 x <4 x float>] @func_v3f3([3 x <4 x float>] %[[TMP]])
	struct v3f3 global_v3f3;			struct v3f3 global_v3f3;
	void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); }			void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); }

	// CHECK-LABEL: @call_v3f4			// CHECK-LABEL: @call_v3f4
	// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>], [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4, %struct.v3f4* @global_v3f4, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x float>], [4 x <4 x float>]* bitcast (%struct.v3f4* @global_v3f4 to [4 x <4 x float>]*), align 16
	// CHECK: call [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %[[TMP]])			// CHECK: call [4 x <4 x float>] @func_v3f4([4 x <4 x float>] %[[TMP]])
	struct v3f4 global_v3f4;			struct v3f4 global_v3f4;
	void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); }			void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); }

	// CHECK-LABEL: @call_v3f5			// CHECK-LABEL: @call_v3f5
	// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>], [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5, %struct.v3f5* @global_v3f5, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x float>], [5 x <4 x float>]* bitcast (%struct.v3f5* @global_v3f5 to [5 x <4 x float>]*), align 16
	// CHECK: call [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %[[TMP]])			// CHECK: call [5 x <4 x float>] @func_v3f5([5 x <4 x float>] %[[TMP]])
	struct v3f5 global_v3f5;			struct v3f5 global_v3f5;
	void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); }			void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); }

	// CHECK-LABEL: @call_v3f6			// CHECK-LABEL: @call_v3f6
	// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>], [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6, %struct.v3f6* @global_v3f6, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x float>], [6 x <4 x float>]* bitcast (%struct.v3f6* @global_v3f6 to [6 x <4 x float>]*), align 16
	// CHECK: call [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %[[TMP]])			// CHECK: call [6 x <4 x float>] @func_v3f6([6 x <4 x float>] %[[TMP]])
	struct v3f6 global_v3f6;			struct v3f6 global_v3f6;
	void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); }			void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); }

	// CHECK-LABEL: @call_v3f7			// CHECK-LABEL: @call_v3f7
	// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>], [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7, %struct.v3f7* @global_v3f7, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x float>], [7 x <4 x float>]* bitcast (%struct.v3f7* @global_v3f7 to [7 x <4 x float>]*), align 16
	// CHECK: call [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %[[TMP]])			// CHECK: call [7 x <4 x float>] @func_v3f7([7 x <4 x float>] %[[TMP]])
	struct v3f7 global_v3f7;			struct v3f7 global_v3f7;
	void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); }			void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); }

	// CHECK-LABEL: @call_v3f8			// CHECK-LABEL: @call_v3f8
	// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>], [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8, %struct.v3f8* @global_v3f8, i32 0, i32 0), align 1			// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x float>], [8 x <4 x float>]* bitcast (%struct.v3f8* @global_v3f8 to [8 x <4 x float>]*), align 16
	// CHECK: call [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %[[TMP]])			// CHECK: call [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %[[TMP]])
	struct v3f8 global_v3f8;			struct v3f8 global_v3f8;
	void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }			void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }

	// CHECK-LABEL: @call_v3f9			// CHECK-LABEL: @call_v3f9
	// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval align 16 @global_v3f9)			// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval align 16 @global_v3f9)
	struct v3f9 global_v3f9;			struct v3f9 global_v3f9;
	void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }			void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }

	// CHECK-LABEL: @call_v3fab			// CHECK-LABEL: @call_v3fab
	// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*)			// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x float>], [2 x <4 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <4 x float>]*), align 16
	// CHECK: call [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %[[TMP]])			// CHECK: call [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %[[TMP]])
	struct v3fab global_v3fab;			struct v3fab global_v3fab;
	void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); }			void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); }

	// CHECK-LABEL: @call_v3fabc			// CHECK-LABEL: @call_v3fabc
	// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*)			// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x float>], [3 x <4 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <4 x float>]*), align 16
	// CHECK: call [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %[[TMP]])			// CHECK: call [3 x <4 x float>] @func_v3fabc([3 x <4 x float>] %[[TMP]])
	struct v3fabc global_v3fabc;			struct v3fabc global_v3fabc;
	void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }			void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }


	// Test returning small aggregates.			// Test returning small aggregates.

	struct s1 { char c[1]; };			struct s1 { char c[1]; };
	struct s2 { char c[2]; };			struct s2 { char c[2]; };
	▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines