diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2111,7 +2111,13 @@ return EmitLoadOfLValue(DestLV, CE->getExprLoc()); } - Address Addr = EmitLValue(E).getAddress(CGF); + Address Addr = Address::invalid(); + if (E->isPRValue() && !isa(E)) { + Addr = CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-prvalue"); + LValue LV = CGF.MakeAddrLValue(Addr, E->getType()); + CGF.EmitStoreOfScalar(Src, LV); + } else + Addr = EmitLValue(E).getAddress(CGF); Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -103,3 +103,46 @@ parr = &arr[0]; return *parr; } + +// CHECK-LABEL: @test_cast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16 +// CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca , align 2 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 16 +// CHECK-NEXT: [[XX:%.*]] = alloca <16 x i32>, align 16 +// CHECK-NEXT: [[YY:%.*]] = alloca <16 x i32>, align 16 +// CHECK-NEXT: [[PG:%.*]] = alloca , align 2 +// CHECK-NEXT: [[SAVED_PRVALUE:%.*]] = alloca <16 x i32>, align 64 +// CHECK-NEXT: store [[PRED:%.*]], * [[PRED_ADDR]], align 2 +// CHECK-NEXT: store [[VEC:%.*]], * [[VEC_ADDR]], align 16 +// CHECK-NEXT: store <16 x i32> , <16 x i32>* [[XX]], align 16 +// CHECK-NEXT: store <16 x i32> , <16 x i32>* [[YY]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load , * [[PRED_ADDR]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load , * bitcast (<8 x i8>* @global_pred to *), align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[XX]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[YY]], align 16 +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[TMP3]], [[TMP4]] +// CHECK-NEXT: store <16 x i32> [[ADD]], <16 x i32>* [[SAVED_PRVALUE]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32>* [[SAVED_PRVALUE]] to * +// CHECK-NEXT: [[TMP6:%.*]] = load , * [[TMP5]], align 64 +// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( [[TMP0]], [[TMP2]], [[TMP6]]) +// CHECK-NEXT: store [[TMP7]], * [[PG]], align 2 +// CHECK-NEXT: [[TMP8:%.*]] = load , * [[PG]], align 2 +// CHECK-NEXT: [[TMP9:%.*]] = load <16 x i32>, <16 x i32>* @global_vec, align 16 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP10:%.*]] = load , * [[VEC_ADDR]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[TMP8]]) +// CHECK-NEXT: [[TMP12:%.*]] = call @llvm.aarch64.sve.add.nxv4i32( [[TMP11]], [[CASTSCALABLESVE]], [[TMP10]]) +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( [[TMP12]], i64 0) +// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16 +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP13]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] +// +fixed_int32_t test_cast(svbool_t pred, svint32_t vec) { + fixed_int32_t xx = {1, 2, 3, 4}; + fixed_int32_t yy = {2, 5, 4, 6}; + svbool_t pg = svand_z(pred, global_pred, xx + yy); + return svadd_m(pg, global_vec, vec); +}