diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1390,6 +1390,19 @@ return; } + // If coercing a fixed vector from a scalable vector for ABI compatibility, + // and the types match, use the llvm.vector.extract intrinsic to perform the + // conversion. + if (auto *FixedDst = dyn_cast(DstTy)) { + if (auto *ScalableSrc = dyn_cast(SrcTy)) { + if (FixedDst->getElementType() == ScalableSrc->getElementType()) { + auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + Src = CGF.Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed"); + CGF.Builder.CreateStore(Src, Dst, DstIsVolatile); + return; + } + } + } llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy); // If store is legal, just bitcast the src pointer. diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -41,11 +41,7 @@ // CHECK-LABEL: @sizeless_caller( // CHECK-NEXT: entry: -// CHECK-NEXT: [[COERCE1:%.*]] = alloca <16 x i32>, align 16 -// CHECK-NEXT: store [[X:%.*]], ptr [[COERCE1]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[COERCE1]], align 16, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[TMP1]], i64 0) -// CHECK-NEXT: ret [[CASTSCALABLESVE2]] +// CHECK-NEXT: ret [[X:%.*]] // svint32_t sizeless_caller(svint32_t x) { return fixed_callee(x); diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c @@ -38,11 +38,7 @@ // CHECK-LABEL: @sizeless_caller( // CHECK-NEXT: entry: -// CHECK-NEXT: [[COERCE1:%.*]] = alloca <8 x i32>, align 8 -// CHECK-NEXT: store [[X:%.*]], ptr [[COERCE1]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[COERCE1]], align 8, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP0]], i64 0) -// CHECK-NEXT: ret [[CASTSCALABLESVE2]] +// CHECK-NEXT: ret [[X:%.*]] // vint32m1_t sizeless_caller(vint32m1_t x) { return fixed_callee(x);