diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def --- a/clang/include/clang/Basic/RISCVVTypes.def +++ b/clang/include/clang/Basic/RISCVVTypes.def @@ -144,6 +144,10 @@ RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2) RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1) +//===- Tuple vector types -------------------------------------------------===// + +RVV_VECTOR_TYPE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2, 32, 2, true) + #undef RVV_VECTOR_TYPE_FLOAT #undef RVV_VECTOR_TYPE_INT #undef RVV_VECTOR_TYPE diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3132,30 +3132,51 @@ llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); - llvm::Type *DstTy = Ptr.getElementType(); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); + llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy); + llvm::TypeSize PtrElementSize = + CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType()); + if (StructSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(StructSize == PtrElementSize && + "Only allow non-fractional movement of structure with" + "homogeneous scalable vector type"); + assert(STy->getNumElements() == NumIRArgs); + + llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto *AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + LoadedStructValue = + Builder.CreateInsertValue(LoadedStructValue, AI, i); + } - Address AddrToStoreInto = Address::invalid(); - if (SrcSize <= DstSize) { - AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); + Builder.CreateStore(LoadedStructValue, Ptr); } else { - AddrToStoreInto = - CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); - } + uint64_t SrcSize = StructSize.getFixedValue(); + uint64_t DstSize = PtrElementSize.getFixedValue(); + + Address AddrToStoreInto = Address::invalid(); + if (SrcSize <= DstSize) { + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); + } else { + AddrToStoreInto = + CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); + } - assert(STy->getNumElements() == NumIRArgs); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto AI = Fn->getArg(FirstIRArg + i); - AI->setName(Arg->getName() + ".coerce" + Twine(i)); - Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); - Builder.CreateStore(AI, EltPtr); - } + assert(STy->getNumElements() == NumIRArgs); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); + Builder.CreateStore(AI, EltPtr); + } - if (SrcSize > DstSize) { - Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); + if (SrcSize > DstSize) { + Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); + } } - } else { // Simple case, just do a coerced store of the argument into the alloca. assert(NumIRArgs == 1); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -630,13 +630,22 @@ #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" - { - ASTContext::BuiltinVectorTypeInfo Info = - Context.getBuiltinVectorTypeInfo(cast(Ty)); - return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.getKnownMinValue() * - Info.NumVectors); - } + { + ASTContext::BuiltinVectorTypeInfo Info = + Context.getBuiltinVectorTypeInfo(cast(Ty)); + // Tuple types are expressed as aggregregate types of the same scalable + // vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {, }). + if (Info.NumVectors != 1) { + llvm::Type *EltTy = llvm::ScalableVectorType::get( + ConvertType(Info.ElementType), Info.EC.getKnownMinValue()); + llvm::SmallVector EltTys(Info.NumVectors, EltTy); + return llvm::StructType::get(getLLVMContext(), EltTys); + } + return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), + Info.EC.getKnownMinValue() * + Info.NumVectors); + } #define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS) \ case BuiltinType::Id: { \ if (BuiltinType::Id == BuiltinType::WasmExternRef) \ diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c @@ -0,0 +1,90 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=O0 +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefix=AFTER_MEM2REG + + +#include + +// Declare local variable +// O0-LABEL: define dso_local void @foo +// O0-SAME: () #[[ATTR0:[0-9]+]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: ret void +// +// AFTER_MEM2REG-LABEL: define dso_local void @foo +// AFTER_MEM2REG-SAME: () #[[ATTR0:[0-9]+]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: ret void +// +void foo() { + __rvv_int32m1x2_t v_tuple; +} + +// Declare local variable and return +// O0-LABEL: define dso_local { , } @bar +// O0-SAME: () #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: ret { , } [[TMP0]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @bar +// AFTER_MEM2REG-SAME: () #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: ret { , } undef +// +__rvv_int32m1x2_t bar() { + __rvv_int32m1x2_t v_tuple; + return v_tuple; +} + +// Pass as function parameter +// O0-LABEL: define dso_local void @baz +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: ret void +// +// AFTER_MEM2REG-LABEL: define dso_local void @baz +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: ret void +// +void baz(__rvv_int32m1x2_t v_tuple) { +} + +// Pass as function parameter and return +// O0-LABEL: define dso_local { , } @qux +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: [[TMP2:%.*]] = load { , }, ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: ret { , } [[TMP2]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @qux +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: ret { , } [[TMP1]] +// +__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) { + return v_tuple; +} diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c --- a/clang/test/Sema/riscv-types.c +++ b/clang/test/Sema/riscv-types.c @@ -133,6 +133,9 @@ // CHECK: __rvv_int8mf2_t x43; __rvv_int8mf2_t x43; + + // CHECK: __rvv_int32m1x2_t x44; + __rvv_int32m1x2_t x44; } typedef __rvv_bool4_t vbool4_t;