Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -804,6 +804,7 @@ * 32-bit ARM * 64-bit ARM (AArch64) * X86 (see below) +* RISC-V On X86 targets, ``__bf16`` is supported as long as SSE2 is available, which includes all 64-bit and all recent 32-bit processors. Index: clang/lib/Basic/Targets/RISCV.h =================================================================== --- clang/lib/Basic/Targets/RISCV.h +++ clang/lib/Basic/Targets/RISCV.h @@ -32,6 +32,9 @@ public: RISCVTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { + BFloat16Width = 32; + BFloat16Align = 16; + BFloat16Format = &llvm::APFloat::BFloat(); LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); @@ -101,6 +104,8 @@ bool hasBitIntType() const override { return true; } + bool hasBFloat16Type() const override; + bool useFP16ConversionIntrinsics() const override { return false; } @@ -109,6 +114,8 @@ void fillValidCPUList(SmallVectorImpl &Values) const override; bool isValidTuneCPUName(StringRef Name) const override; void fillValidTuneCPUList(SmallVectorImpl &Values) const override; + + const char *getBFloat16Mangling() const override { return "DF16b"; }; }; class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo { public: Index: clang/lib/Basic/Targets/RISCV.cpp =================================================================== --- clang/lib/Basic/Targets/RISCV.cpp +++ clang/lib/Basic/Targets/RISCV.cpp @@ -300,6 +300,11 @@ return false; } +bool RISCVTargetInfo::hasBFloat16Type() const { + // The __bf16 type is generally available so long as we have any fp registers. + return HasBFloat16; +} + /// Perform initialization based on the user configured set of features. bool RISCVTargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { @@ -323,6 +328,11 @@ if (ISAInfo->hasExtension("zfh") || ISAInfo->hasExtension("zhinx")) HasLegalHalfType = true; + if (ISAInfo->hasExtension("experimental-zfbfmin") || + ISAInfo->hasExtension("experimental-zvfbfmin") || + ISAInfo->hasExtension("experimental-zvfbfwma")) + HasBFloat16 = true; + return true; } Index: clang/test/CodeGen/RISCV/bfloat-abi.c =================================================================== --- /dev/null +++ clang/test/CodeGen/RISCV/bfloat-abi.c @@ -0,0 +1,811 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zfbfmin -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zvfbfmin -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-V +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zvfbfwma -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-MA + +struct bfloat1 { + __bf16 a; +}; + +// CHECK-LABEL: define dso_local i64 @h1 +// CHECK-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 4, i1 false) +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: ret i64 [[TMP1]] +// +// CHECK-V-LABEL: define dso_local i64 @h1 +// CHECK-V-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-V-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 4, i1 false) +// CHECK-V-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8 +// CHECK-V-NEXT: ret i64 [[TMP1]] +// +// CHECK-MA-LABEL: define dso_local i64 @h1 +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-MA-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 4, i1 false) +// CHECK-MA-NEXT: [[TMP1:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8 +// CHECK-MA-NEXT: ret i64 [[TMP1]] +// +struct bfloat1 h1(__bf16 a) { + struct bfloat1 x; + x.a = a; + return x; +} + +struct bfloat2 { + __bf16 a; + __bf16 b; +}; + +// CHECK-LABEL: define dso_local i64 @h2 +// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CHECK-V-LABEL: define dso_local i64 @h2 +// CHECK-V-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-V-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 2 +// CHECK-V-NEXT: ret i64 [[TMP2]] +// +// CHECK-MA-LABEL: define dso_local i64 @h2 +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 2 +// CHECK-MA-NEXT: ret i64 [[TMP2]] +// +struct bfloat2 h2(__bf16 a, __bf16 b) { + struct bfloat2 x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat3 { + __bf16 a; + __bf16 b; + __bf16 c; +}; + +// CHECK-LABEL: define dso_local [2 x i64] @h3 +// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 12, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-V-LABEL: define dso_local [2 x i64] @h3 +// CHECK-V-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-V-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-V-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-V-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 12, i1 false) +// CHECK-V-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-V-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-MA-LABEL: define dso_local [2 x i64] @h3 +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-MA-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-MA-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 12, i1 false) +// CHECK-MA-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-MA-NEXT: ret [2 x i64] [[TMP3]] +// +struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) { + struct bfloat3 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloat4 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +// CHECK-LABEL: define dso_local [2 x i64] @h4 +// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 6 +// CHECK-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret [2 x i64] [[TMP4]] +// +// CHECK-V-LABEL: define dso_local [2 x i64] @h4 +// CHECK-V-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-V-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-V-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-V-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 6 +// CHECK-V-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-V-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 2 +// CHECK-V-NEXT: ret [2 x i64] [[TMP4]] +// +// CHECK-MA-LABEL: define dso_local [2 x i64] @h4 +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 4 +// CHECK-MA-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-MA-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 6 +// CHECK-MA-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-MA-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 2 +// CHECK-MA-NEXT: ret [2 x i64] [[TMP4]] +// +struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) { + struct bfloat4 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct floatbfloat { + float a; + __bf16 b; +}; + +// CHECK-LABEL: define dso_local i64 @fh +// CHECK-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CHECK-V-LABEL: define dso_local i64 @fh +// CHECK-V-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-V-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-V-NEXT: ret i64 [[TMP2]] +// +// CHECK-MA-LABEL: define dso_local i64 @fh +// CHECK-MA-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-MA-NEXT: ret i64 [[TMP2]] +// +struct floatbfloat fh(float a, __bf16 b) { + struct floatbfloat x; + x.a = a; + x.b = b; + return x; +} + +struct floatbfloat2 { + float a; + __bf16 b; + __bf16 c; +}; + +// CHECK-LABEL: define dso_local [2 x i64] @fh2 +// CHECK-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-V-LABEL: define dso_local [2 x i64] @fh2 +// CHECK-V-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-V-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-V-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-V-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-V-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-V-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-V-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-MA-LABEL: define dso_local [2 x i64] @fh2 +// CHECK-MA-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-MA-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-MA-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-MA-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-MA-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-MA-NEXT: ret [2 x i64] [[TMP3]] +// +struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) { + struct floatbfloat2 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloatfloat { + __bf16 a; + float b; +}; + +// CHECK-LABEL: define dso_local i64 @hf +// CHECK-SAME: (bfloat noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOATFLOAT:%.*]], align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-NEXT: store float [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i64 [[TMP2]] +// +// CHECK-V-LABEL: define dso_local i64 @hf +// CHECK-V-SAME: (bfloat noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOATFLOAT:%.*]], align 4 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store float [[B]], ptr [[B_ADDR]], align 4 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-V-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-V-NEXT: store float [[TMP1]], ptr [[B2]], align 4 +// CHECK-V-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-V-NEXT: ret i64 [[TMP2]] +// +// CHECK-MA-LABEL: define dso_local i64 @hf +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOATFLOAT:%.*]], align 4 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store float [[B]], ptr [[B_ADDR]], align 4 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-MA-NEXT: store float [[TMP1]], ptr [[B2]], align 4 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-MA-NEXT: ret i64 [[TMP2]] +// +struct bfloatfloat hf(__bf16 a, float b) { + struct bfloatfloat x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat2float { + __bf16 a; + __bf16 b; + float c; +}; + +// CHECK-LABEL: define dso_local [2 x i64] @h2f +// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2FLOAT:%.*]], align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store float [[C]], ptr [[C_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR]], align 4 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-NEXT: store float [[TMP2]], ptr [[C3]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-V-LABEL: define dso_local [2 x i64] @h2f +// CHECK-V-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2FLOAT:%.*]], align 4 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca float, align 4 +// CHECK-V-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store float [[C]], ptr [[C_ADDR]], align 4 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-V-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR]], align 4 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-V-NEXT: store float [[TMP2]], ptr [[C3]], align 4 +// CHECK-V-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-V-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-V-NEXT: ret [2 x i64] [[TMP3]] +// +// CHECK-MA-LABEL: define dso_local [2 x i64] @h2f +// CHECK-MA-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2FLOAT:%.*]], align 4 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca float, align 4 +// CHECK-MA-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store float [[C]], ptr [[C_ADDR]], align 4 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR]], align 4 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-MA-NEXT: store float [[TMP2]], ptr [[C3]], align 4 +// CHECK-MA-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false) +// CHECK-MA-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8 +// CHECK-MA-NEXT: ret [2 x i64] [[TMP3]] +// +struct bfloat2float h2f(__bf16 a, __bf16 b, float c) { + struct bfloat2float x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct floatbfloat3 { + float a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +// CHECK-LABEL: define dso_local [2 x i64] @fh3 +// CHECK-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT3:%.*]], align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 5 +// CHECK-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret [2 x i64] [[TMP4]] +// +// CHECK-V-LABEL: define dso_local [2 x i64] @fh3 +// CHECK-V-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT3:%.*]], align 4 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-V-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-V-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-V-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-V-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 5 +// CHECK-V-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 4 +// CHECK-V-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 4 +// CHECK-V-NEXT: ret [2 x i64] [[TMP4]] +// +// CHECK-MA-LABEL: define dso_local [2 x i64] @fh3 +// CHECK-MA-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT3:%.*]], align 4 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-MA-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-MA-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 4 +// CHECK-MA-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 5 +// CHECK-MA-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 4 +// CHECK-MA-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL]], align 4 +// CHECK-MA-NEXT: ret [2 x i64] [[TMP4]] +// +struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) { + struct floatbfloat3 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct bfloat5 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; + __bf16 e; +}; + +// CHECK-LABEL: define dso_local void @h5 +// CHECK-SAME: (ptr noalias sret([[STRUCT_BFLOAT5:%.*]]) align 2 [[AGG_RESULT:%.*]], bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]], bfloat noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[E]], ptr [[E_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 0 +// CHECK-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 2 +// CHECK-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 4 +// CHECK-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 6 +// CHECK-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[E_ADDR]], align 2 +// CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 8 +// CHECK-NEXT: store bfloat [[TMP4]], ptr [[E5]], align 2 +// CHECK-NEXT: ret void +// +// CHECK-V-LABEL: define dso_local void @h5 +// CHECK-V-SAME: (ptr noalias sret([[STRUCT_BFLOAT5:%.*]]) align 2 [[AGG_RESULT:%.*]], bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]], bfloat noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-V-NEXT: entry: +// CHECK-V-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// CHECK-V-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: [[E_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-V-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// CHECK-V-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: store bfloat [[E]], ptr [[E_ADDR]], align 2 +// CHECK-V-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-V-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 0 +// CHECK-V-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-V-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-V-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 2 +// CHECK-V-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-V-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-V-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 4 +// CHECK-V-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-V-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-V-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 6 +// CHECK-V-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-V-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[E_ADDR]], align 2 +// CHECK-V-NEXT: [[E5:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 8 +// CHECK-V-NEXT: store bfloat [[TMP4]], ptr [[E5]], align 2 +// CHECK-V-NEXT: ret void +// +// CHECK-MA-LABEL: define dso_local void @h5 +// CHECK-MA-SAME: (ptr noalias sret([[STRUCT_BFLOAT5:%.*]]) align 2 [[AGG_RESULT:%.*]], bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]], bfloat noundef [[E:%.*]]) #[[ATTR0]] { +// CHECK-MA-NEXT: entry: +// CHECK-MA-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// CHECK-MA-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: [[E_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-MA-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// CHECK-MA-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: store bfloat [[E]], ptr [[E_ADDR]], align 2 +// CHECK-MA-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-MA-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 0 +// CHECK-MA-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-MA-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-MA-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 2 +// CHECK-MA-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-MA-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-MA-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 4 +// CHECK-MA-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-MA-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-MA-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 6 +// CHECK-MA-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-MA-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[E_ADDR]], align 2 +// CHECK-MA-NEXT: [[E5:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 8 +// CHECK-MA-NEXT: store bfloat [[TMP4]], ptr [[E5]], align 2 +// CHECK-MA-NEXT: ret void +// +struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) { + struct bfloat5 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + x.e = e; + return x; +} Index: clang/test/CodeGen/RISCV/bfloat-mangle.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/RISCV/bfloat-mangle.cpp @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zfbfmin -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV64 +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zfbfmin -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV32 +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zvfbfmin -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV64-V +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zvfbfmin -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV32-V +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zvfbfwma -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV64-MA +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zvfbfwma -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-RV32-MA + +// CHECK-RV64-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV64-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV64-NEXT: ret void +// +// CHECK-RV32-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV32-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV32-NEXT: ret void +// +// CHECK-RV64-V-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV64-V-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-V-NEXT: entry: +// CHECK-RV64-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV64-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV64-V-NEXT: ret void +// +// CHECK-RV32-V-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV32-V-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV32-V-NEXT: entry: +// CHECK-RV32-V-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV32-V-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV32-V-NEXT: ret void +// +// CHECK-RV64-MA-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV64-MA-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-MA-NEXT: entry: +// CHECK-RV64-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV64-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV64-MA-NEXT: ret void +// +// CHECK-RV32-MA-LABEL: define dso_local void @_Z3fooDF16b +// CHECK-RV32-MA-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV32-MA-NEXT: entry: +// CHECK-RV32-MA-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-RV32-MA-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-RV32-MA-NEXT: ret void +// +void foo(__bf16 b) {} Index: clang/test/Sema/vector-decl-crash.c =================================================================== --- clang/test/Sema/vector-decl-crash.c +++ /dev/null @@ -1,7 +0,0 @@ -// RUN: %clang_cc1 %s -fsyntax-only -verify -triple riscv64-unknown-unknown - -// GH50171 -// This would previously crash when __bf16 was not a supported type. -__bf16 v64bf __attribute__((vector_size(128))); // expected-error {{__bf16 is not supported on this target}} \ - expected-error {{vector size not an integral multiple of component size}} - Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -431,6 +431,9 @@ if (Subtarget.hasStdExtZfa()) setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + + if (Subtarget.hasStdExtZfbfmin()) + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); } if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())