diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1271,12 +1271,26 @@ // perform the conversion. if (auto *ScalableDst = dyn_cast(Ty)) { if (auto *FixedSrc = dyn_cast(SrcTy)) { + // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + // vector, use a vector insert and bitcast the result. + bool NeedsBitcast = false; + auto PredType = + llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16); + llvm::Type *OrigType = Ty; + if (ScalableDst == PredType && + FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) { + ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2); + NeedsBitcast = true; + } if (ScalableDst->getElementType() == FixedSrc->getElementType()) { auto *Load = CGF.Builder.CreateLoad(Src); auto *UndefVec = llvm::UndefValue::get(ScalableDst); auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); - return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero, - "castScalableSve"); + llvm::Value *Result = CGF.Builder.CreateInsertVector( + ScalableDst, UndefVec, Load, Zero, "castScalableSve"); + if (NeedsBitcast) + Result = CGF.Builder.CreateBitCast(Result, OrigType); + return Result; } } } @@ -2857,9 +2871,18 @@ // llvm.experimental.vector.extract to convert back to the original // VLST. if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) { - auto *Coerced = Fn->getArg(FirstIRArg); + llvm::Value *Coerced = Fn->getArg(FirstIRArg); if (auto *VecTyFrom = dyn_cast(Coerced->getType())) { + // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // vector, bitcast the source and use a vector extract. + auto PredType = + llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + if (VecTyFrom == PredType && + VecTyTo->getElementType() == Builder.getInt8Ty()) { + VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + Coerced = Builder.CreateBitCast(Coerced, VecTyFrom); + } if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2063,11 +2063,25 @@ // perform the bitcast. if (const auto *FixedSrc = dyn_cast(SrcTy)) { if (const auto *ScalableDst = dyn_cast(DstTy)) { + // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + // vector, use a vector insert and bitcast the result. + bool NeedsBitCast = false; + auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + llvm::Type *OrigType = DstTy; + if (ScalableDst == PredType && + FixedSrc->getElementType() == Builder.getInt8Ty()) { + DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + ScalableDst = dyn_cast(DstTy); + NeedsBitCast = true; + } if (FixedSrc->getElementType() == ScalableDst->getElementType()) { llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); - return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero, - "castScalableSve"); + llvm::Value *Result = Builder.CreateInsertVector( + DstTy, UndefVec, Src, Zero, "castScalableSve"); + if (NeedsBitCast) + Result = Builder.CreateBitCast(Result, OrigType); + return Result; } } } @@ -2077,6 +2091,15 @@ // perform the bitcast. if (const auto *ScalableSrc = dyn_cast(SrcTy)) { if (const auto *FixedDst = dyn_cast(DstTy)) { + // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // vector, bitcast the source and use a vector extract. + auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + if (ScalableSrc == PredType && + FixedDst->getElementType() == Builder.getInt8Ty()) { + SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + ScalableSrc = dyn_cast(SrcTy); + Src = Builder.CreateBitCast(Src, SrcTy); + } if (ScalableSrc->getElementType() == FixedDst->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve"); @@ -2087,10 +2110,9 @@ // Perform VLAT <-> VLST bitcast through memory. // TODO: since the llvm.experimental.vector.{insert,extract} intrinsics // require the element types of the vectors to be the same, we - // need to keep this around for casting between predicates, or more - // generally for bitcasts between VLAT <-> VLST where the element - // types of the vectors are not the same, until we figure out a better - // way of doing these casts. + // need to keep this around for bitcasts between VLAT <-> VLST where + // the element types of the vectors are not the same, until we figure + // out a better way of doing these casts. if ((isa(SrcTy) && isa(DstTy)) || (isa(SrcTy) && diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -191,32 +191,26 @@ // CHECK-128-LABEL: @read_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 2 // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v2i8( undef, <2 x i8> [[TMP0]], i64 0) +// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bool( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <4 x i8>, align 4 // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] -// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[SAVED_VALUE]], align 4, !tbaa [[TBAA6]] -// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <4 x i8>* [[SAVED_VALUE]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 4, !tbaa [[TBAA6]] +// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v4i8( undef, <4 x i8> [[TMP0]], i64 0) +// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] -// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]] -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] +// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_bool(struct struct_bool *s) { @@ -225,32 +219,26 @@ // CHECK-128-LABEL: @write_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 2 -// CHECK-128-NEXT: store [[X:%.*]], * [[SAVED_VALUE]], align 2, !tbaa [[TBAA9:![0-9]+]] -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast %x to +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] +// CHECK-128-NEXT: store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bool( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 4 -// CHECK-256-NEXT: store [[X:%.*]], * [[SAVED_VALUE]], align 4, !tbaa [[TBAA9:![0-9]+]] -// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <4 x i8>* -// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[CASTFIXEDSVE]], align 4, !tbaa [[TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast %x to +// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8( [[TMP0]], i64 0) // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] +// CHECK-256-NEXT: store <4 x i8> [[CASTFIXEDSVE]], <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-512-NEXT: store [[X:%.*]], * [[SAVED_VALUE]], align 8, !tbaa [[TBAA9:![0-9]+]] -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>* -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast %x to +// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] +// CHECK-512-NEXT: store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -77,32 +77,8 @@ // CHECK-LABEL: @call_bool_ff( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[OP2:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE3:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE5:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * -// CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 8 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to * -// CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 8 -// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[SAVED_VALUE3]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTFIXEDSVE4:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE3]] to * -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[CASTFIXEDSVE4]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: store [[TMP4]], * [[SAVED_VALUE5]], align 8, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: [[CASTFIXEDSVE6:%.*]] = bitcast * [[SAVED_VALUE5]] to <8 x i8>* -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE6]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* -// CHECK-NEXT: store <8 x i8> [[TMP5]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load , * [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) { return svsel(pg, op1, op2); @@ -134,24 +110,8 @@ // CHECK-LABEL: @call_bool_fs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * -// CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 8 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP2]], * [[SAVED_VALUE2]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = bitcast * [[SAVED_VALUE2]] to <8 x i8>* -// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE3]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* -// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) { return svsel(pg, op1, op2); @@ -183,16 +143,8 @@ // CHECK-LABEL: @call_bool_ss( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP0]], * [[SAVED_VALUE]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>* -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* -// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) { return svsel(pg, op1, op2); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -45,15 +45,7 @@ // CHECK-LABEL: @to_svbool_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to * -// CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 8 -// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x i8>, <8 x i8>* [[TYPE]], align 8, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: store <8 x i8> [[TYPE1]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: ret [[TYPE:%.*]] // svbool_t to_svbool_t(fixed_bool_t type) { return type; @@ -61,23 +53,28 @@ // CHECK-LABEL: @from_svbool_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: store [[TYPE:%.*]], * [[SAVED_VALUE]], align 8, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>* -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* -// CHECK-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: ret [[TYPE:%.*]] // fixed_bool_t from_svbool_t(svbool_t type) { return type; } +// CHECK-LABEL: @lax_cast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = alloca <16 x i32>, align 64 +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) +// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[TMP0:%.*]], align 64, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[TMP0]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 64, !tbaa [[TBAA6]] +// CHECK-NEXT: ret [[TMP2]] +// +svint64_t lax_cast(fixed_int32_t type) { + return type; +} + // CHECK-LABEL: @to_svint32_t__from_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CASTSCALABLESVE]] // diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -18,19 +18,15 @@ // CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca , align 2 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[PG:%.*]] = alloca , align 2 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8 // CHECK-NEXT: store [[PRED:%.*]], * [[PRED_ADDR]], align 2 // CHECK-NEXT: store [[VEC:%.*]], * [[VEC_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load , * [[PRED_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2 -// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8 -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[CASTFIXEDSVE]], align 8 +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2 -// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[SAVED_VALUE1]], align 8 -// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE1]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[CASTFIXEDSVE2]], align 8 +// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast [[CASTFIXEDSVE2]] to // CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( [[TMP0]], [[TMP2]], [[TMP4]]) // CHECK-NEXT: store [[TMP5]], * [[PG]], align 2 // CHECK-NEXT: [[TMP6:%.*]] = load , * [[PG]], align 2 @@ -92,17 +88,15 @@ // CHECK-NEXT: [[RETVAL:%.*]] = alloca <8 x i8>, align 2 // CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <8 x i8>], align 2 // CHECK-NEXT: [[PARR:%.*]] = alloca <8 x i8>*, align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 2 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[ARR]], i64 0, i64 0 // CHECK-NEXT: store <8 x i8>* [[ARRAYIDX]], <8 x i8>** [[PARR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>*, <8 x i8>** [[PARR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 2 // CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i8* -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[RETVAL]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP2]], i8* align 2 [[TMP3]], i64 8, i1 false) -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 2 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[RETVAL]], align 2 +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[CASTFIXEDSVE]] to +// CHECK-NEXT: ret [[TMP3]] // fixed_bool_t address_of_array_idx() { fixed_bool_t arr[3]; @@ -119,23 +113,19 @@ // CHECK-NEXT: [[XX:%.*]] = alloca <8 x i8>, align 8 // CHECK-NEXT: [[YY:%.*]] = alloca <8 x i8>, align 8 // CHECK-NEXT: [[PG:%.*]] = alloca , align 2 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 -// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8 // CHECK-NEXT: store [[PRED:%.*]], * [[PRED_ADDR]], align 2 // CHECK-NEXT: store [[VEC:%.*]], * [[VEC_ADDR]], align 16 // CHECK-NEXT: store <8 x i8> , <8 x i8>* [[XX]], align 8 // CHECK-NEXT: store <8 x i8> , <8 x i8>* [[YY]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load , * [[PRED_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2 -// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8 -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[CASTFIXEDSVE]], align 8 +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[XX]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[YY]], align 8 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]] -// CHECK-NEXT: store <8 x i8> [[ADD]], <8 x i8>* [[SAVED_VALUE1]], align 8 -// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE1]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[CASTFIXEDSVE2]], align 8 +// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[ADD]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = bitcast [[CASTFIXEDSVE2]] to // CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( [[TMP0]], [[TMP2]], [[TMP5]]) // CHECK-NEXT: store [[TMP6]], * [[PG]], align 2 // CHECK-NEXT: [[TMP7:%.*]] = load , * [[PG]], align 2 diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -49,20 +49,16 @@ // CHECK-128-LABEL: @write_global_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 2 -// CHECK-128-NEXT: store [[V:%.*]], * [[SAVED_VALUE]], align 2, !tbaa [[TBAA9:![0-9]+]] -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0) +// CHECK-128-NEXT: store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6:![0-9]+]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-512-NEXT: store [[V:%.*]], * [[SAVED_VALUE]], align 8, !tbaa [[TBAA9:![0-9]+]] -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>* -// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] -// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0) +// CHECK-512-NEXT: store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } @@ -101,20 +97,16 @@ // CHECK-128-LABEL: @read_global_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 2 // CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 2, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v2i8( undef, <2 x i8> [[TMP0]], i64 0) +// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_global_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]] -// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]] -// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]] +// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-512-NEXT: [[TMP1:%.*]] = bitcast [[CASTFIXEDSVE]] to // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_global_bool() { return global_bool; }