diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1265,6 +1265,21 @@ return CGF.Builder.CreateLoad(Src); } + // If coercing a fixed vector to a scalable vector for ABI compatibility, and + // the types match, use the llvm.experimental.vector.insert intrinsic to + // perform the conversion. + if (auto *ScalableDst = dyn_cast(Ty)) { + if (auto *FixedSrc = dyn_cast(SrcTy)) { + if (ScalableDst->getElementType() == FixedSrc->getElementType()) { + auto *Load = CGF.Builder.CreateLoad(Src); + auto *UndefVec = llvm::UndefValue::get(ScalableDst); + auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero, + "castScalableSve"); + } + } + } + // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName()); diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp @@ -48,14 +48,11 @@ // CHECK-SAME: [[#VBITS]] // CHECK-SAME: EES_( %x.coerce, %y.coerce) // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[X:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT: [[Y:%.*]] = call <[[#div(VBITS, 32)]] x i32> @llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE1:%.*]], i64 0) // CHECK-NEXT: [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>* -// CHECK-NEXT: store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32( undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] typedef svint32_t vec __attribute__((arm_sve_vector_bits(N))); auto f(vec x, vec y) { return x + y; } // Returns a vec. #endif diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -24,14 +24,11 @@ // CHECK-LABEL: @fixed_caller( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[CASTSCALABLESVE]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] // fixed_int32_t fixed_caller(fixed_int32_t x) { return sizeless_callee(x); @@ -39,12 +36,9 @@ // CHECK-LABEL: @fixed_callee( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[X]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[X]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t fixed_callee(fixed_int32_t x) { return x; @@ -52,18 +46,15 @@ // CHECK-LABEL: @sizeless_caller( // CHECK-NEXT: entry: -// CHECK-NEXT: [[COERCE_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[COERCE1:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X:%.*]], i64 0) -// CHECK-NEXT: [[COERCE_0__SROA_CAST:%.*]] = bitcast * [[COERCE_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[COERCE_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[COERCE_COERCE]], align 16 -// CHECK-NEXT: [[CALL:%.*]] = call @fixed_callee( [[TMP0]]) -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[COERCE1]] to * -// CHECK-NEXT: store [[CALL]], * [[TMP1]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP2]], i64 0) -// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: [[CALL:%.*]] = call @fixed_callee( [[CASTSCALABLESVE]]) +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[COERCE1]] to * +// CHECK-NEXT: store [[CALL]], * [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP1]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE2]] // svint32_t sizeless_caller(svint32_t x) { return fixed_callee(x); @@ -75,18 +66,15 @@ // CHECK-LABEL: @call_int32_ff( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[OP2:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE1:%.*]], i64 0) +// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[OP1_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OP2:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[OP2_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP1]], i64 0) -// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP2]], i64 0) +// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP2]], i64 0) // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[CASTSCALABLESVE]], [[CASTSCALABLESVE3]]) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[CASTSCALABLESVE]], [[CASTSCALABLESVE2]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE3]] // fixed_int32_t call_int32_ff(svbool_t pg, fixed_int32_t op1, fixed_int32_t op2) { return svsel(pg, op1, op2); @@ -94,18 +82,15 @@ // CHECK-LABEL: @call_float64_ff( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[X_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[OP2:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[X_COERCE1:%.*]], i64 0) +// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[OP1_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OP2:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[OP2_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[OP1]], i64 0) -// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[OP2]], i64 0) +// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[OP2]], i64 0) // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[CASTSCALABLESVE]], [[CASTSCALABLESVE3]]) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[CASTSCALABLESVE]], [[CASTSCALABLESVE2]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* -// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE3]] // fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_t op2) { return svsel(pg, op1, op2); @@ -150,16 +135,13 @@ // CHECK-LABEL: @call_int32_fs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[OP1_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[OP1]], i64 0) // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[CASTSCALABLESVE]], [[OP2:%.*]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] // fixed_int32_t call_int32_fs(svbool_t pg, fixed_int32_t op1, svint32_t op2) { return svsel(pg, op1, op2); @@ -167,16 +149,13 @@ // CHECK-LABEL: @call_float64_fs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[OP1_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[OP1]], i64 0) // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[CASTSCALABLESVE]], [[OP2:%.*]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* -// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] // fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op2) { return svsel(pg, op1, op2); @@ -213,14 +192,11 @@ // CHECK-LABEL: @call_int32_ss( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t call_int32_ss(svbool_t pg, svint32_t op1, svint32_t op2) { return svsel(pg, op1, op2); @@ -228,14 +204,11 @@ // CHECK-LABEL: @call_float64_ss( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[TMP1]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* -// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsel(pg, op1, op2); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -13,7 +13,7 @@ // CHECK-LABEL: @to_svint32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[TYPE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CASTSCALABLESVE]] // @@ -23,12 +23,9 @@ // CHECK-LABEL: @from_svint32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t from_svint32_t(svint32_t type) { return type; @@ -36,7 +33,7 @@ // CHECK-LABEL: @to_svfloat64_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[TYPE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[TYPE_COERCE:%.*]], i64 0) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CASTSCALABLESVE]] // @@ -46,12 +43,9 @@ // CHECK-LABEL: @from_svfloat64_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* -// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> [[CASTFIXEDSVE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_float64_t from_svfloat64_t(svfloat64_t type) { return type; @@ -111,12 +105,9 @@ // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA6]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* -// CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TYPE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { return type; @@ -124,7 +115,7 @@ // CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[TYPE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) // CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -17,7 +17,6 @@ // CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca , align 2 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[PG:%.*]] = alloca , align 2 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: store [[PRED:%.*]], * [[PRED_ADDR]], align 2 // CHECK-NEXT: store [[VEC:%.*]], * [[VEC_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load , * [[PRED_ADDR]], align 2 @@ -35,11 +34,9 @@ // CHECK-NEXT: [[TMP10:%.*]] = call @llvm.aarch64.sve.add.nxv4i32( [[TMP9]], [[CASTSCALABLESVE]], [[TMP8]]) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP10]], i64 0) // CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = bitcast * [[RETVAL_COERCE]] to i8* -// CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 64, i1 false) -// CHECK-NEXT: [[TMP13:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP13]] +// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16 +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP11]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] // fixed_int32_t foo(svbool_t pred, svint32_t vec) { svbool_t pg = svand_z(pred, global_pred, global_pred); @@ -50,16 +47,13 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[GLOBAL_VEC_PTR:%.*]] = alloca <16 x i32>*, align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: store <16 x i32>* @global_vec, <16 x i32>** [[GLOBAL_VEC_PTR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>*, <16 x i32>** [[GLOBAL_VEC_PTR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16 // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i8* -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP2]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t test_ptr_to_global() { fixed_int32_t *global_vec_ptr; @@ -73,17 +67,14 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca <16 x i32>*, align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: store <16 x i32>* [[ARR:%.*]], <16 x i32>** [[ARR_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>*, <16 x i32>** [[ARR_ADDR]], align 8 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[TMP0]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[ARRAYIDX]], align 16 // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i8* -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP2]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] // fixed_int32_t array_arg(fixed_int32_t arr[]) { return arr[0];