diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3124,20 +3124,6 @@ return R; } -// Emit code to clear the padding bits when returning or passing as an argument -// a 16-bit floating-point value. -llvm::Value *CodeGenFunction::EmitCMSEClearFP16(llvm::Value *Src) { - llvm::Type *RetTy = Src->getType(); - assert(RetTy->isFloatTy() || - (RetTy->isIntegerTy() && RetTy->getIntegerBitWidth() == 32)); - if (RetTy->isFloatTy()) { - llvm::Value *T0 = Builder.CreateBitCast(Src, Builder.getIntNTy(32)); - llvm::Value *T1 = Builder.CreateAnd(T0, 0xffff, "cmse.clear"); - return Builder.CreateBitCast(T1, RetTy); - } - return Builder.CreateAnd(Src, 0xffff, "cmse.clear"); -} - void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc) { @@ -3307,17 +3293,10 @@ if (CurFuncDecl && CurFuncDecl->hasAttr()) { // For certain return types, clear padding bits, as they may reveal // sensitive information. - const Type *RTy = RetTy.getCanonicalType().getTypePtr(); - if (RTy->isFloat16Type() || RTy->isHalfType()) { - // 16-bit floating-point types are passed in a 32-bit integer or float, - // with unspecified upper bits. - RV = EmitCMSEClearFP16(RV); - } else { - // Small struct/union types are passed as integers. - auto *ITy = dyn_cast(RV->getType()); - if (ITy != nullptr && isa(RetTy.getCanonicalType())) - RV = EmitCMSEClearRecord(RV, ITy, RetTy); - } + // Small struct/union types are passed as integers. + auto *ITy = dyn_cast(RV->getType()); + if (ITy != nullptr && isa(RetTy.getCanonicalType())) + RV = EmitCMSEClearRecord(RV, ITy, RetTy); } EmitReturnValueCheck(RV); Ret = Builder.CreateRet(RV); @@ -4620,17 +4599,10 @@ if (CallInfo.isCmseNSCall()) { // For certain parameter types, clear padding bits, as they may reveal // sensitive information. - const Type *PTy = I->Ty.getCanonicalType().getTypePtr(); - // 16-bit floating-point types are passed in a 32-bit integer or - // float, with unspecified upper bits. - if (PTy->isFloat16Type() || PTy->isHalfType()) { - Load = EmitCMSEClearFP16(Load); - } else { - // Small struct/union types are passed as integer arrays. - auto *ATy = dyn_cast(Load->getType()); - if (ATy != nullptr && isa(I->Ty.getCanonicalType())) - Load = EmitCMSEClearRecord(Load, ATy, I->Ty); - } + // Small struct/union types are passed as integer arrays. + auto *ATy = dyn_cast(Load->getType()); + if (ATy != nullptr && isa(I->Ty.getCanonicalType())) + Load = EmitCMSEClearRecord(Load, ATy, I->Ty); } IRCallArgs[FirstIRArg] = Load; } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3923,7 +3923,6 @@ QualType RTy); llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy, QualType RTy); - llvm::Value *EmitCMSEClearFP16(llvm::Value *V); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -6265,17 +6265,6 @@ if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty); - // _Float16 and __fp16 get passed as if it were an int or float, but - // with the top 16 bits unspecified. This is not done for OpenCL as it handles - // the half type natively, and does not need to interwork with AAPCS code. - if ((Ty->isFloat16Type() || Ty->isHalfType()) && - !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsAAPCS_VFP ? - llvm::Type::getFloatTy(getVMContext()) : - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - // __bf16 gets passed using the bfloat IR type, or using i32 but // with the top 16 bits unspecified. if (Ty->isBFloat16Type() && IsFloatABISoftFP) { @@ -6486,17 +6475,6 @@ return coerceIllegalVector(RetTy); } - // _Float16 and __fp16 get returned as if it were an int or float, but with - // the top 16 bits unspecified. This is not done for OpenCL as it handles the - // half type natively, and does not need to interwork with AAPCS code. - if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && - !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsAAPCS_VFP ? - llvm::Type::getFloatTy(getVMContext()) : - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - // if we're using the softfp float abi, __bf16 get returned as if it were an // int but with the top 16 bits unspecified. if (RetTy->isBFloat16Type()) { diff --git a/clang/test/CodeGen/arm-fp16-arguments.c b/clang/test/CodeGen/arm-fp16-arguments.c --- a/clang/test/CodeGen/arm-fp16-arguments.c +++ b/clang/test/CodeGen/arm-fp16-arguments.c @@ -1,51 +1,33 @@ // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=NATIVE __fp16 g; void t1(__fp16 a) { g = a; } -// SOFT: define void @t1(i32 [[PARAM:%.*]]) -// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16 -// HARD: define arm_aapcs_vfpcc void @t1(float [[PARAM:%.*]]) -// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32 -// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16 -// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @g to i16*) +// SOFT: define void @t1(half [[PARAM:%.*]]) +// HARD: define arm_aapcs_vfpcc void @t1(half [[PARAM:%.*]]) // NATIVE: define void @t1(half [[PARAM:%.*]]) -// NATIVE: store half [[PARAM]], half* @g +// CHECK: store half [[PARAM]], half* @g __fp16 t2() { return g; } -// SOFT: define i32 @t2() -// HARD: define arm_aapcs_vfpcc float @t2() +// SOFT: define half @t2() +// HARD: define arm_aapcs_vfpcc half @t2() // NATIVE: define half @t2() -// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @g to i16*) -// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32 -// SOFT: ret i32 [[ZEXT]] -// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float -// HARD: ret float [[BITCAST]] -// NATIVE: [[LOAD:%.*]] = load half, half* @g -// NATIVE: ret half [[LOAD]] +// CHECK: [[LOAD:%.*]] = load half, half* @g +// CHECK: ret half [[LOAD]] _Float16 h; void t3(_Float16 a) { h = a; } -// SOFT: define void @t3(i32 [[PARAM:%.*]]) -// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16 -// HARD: define arm_aapcs_vfpcc void @t3(float [[PARAM:%.*]]) -// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32 -// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16 -// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @h to i16*) +// SOFT: define void @t3(half [[PARAM:%.*]]) +// HARD: define arm_aapcs_vfpcc void @t3(half [[PARAM:%.*]]) // NATIVE: define void @t3(half [[PARAM:%.*]]) -// NATIVE: store half [[PARAM]], half* @h +// CHECK: store half [[PARAM]], half* @h _Float16 t4() { return h; } -// SOFT: define i32 @t4() -// HARD: define arm_aapcs_vfpcc float @t4() +// SOFT: define half @t4() +// HARD: define arm_aapcs_vfpcc half @t4() // NATIVE: define half @t4() -// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @h to i16*) -// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32 -// SOFT: ret i32 [[ZEXT]] -// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float -// HARD: ret float [[BITCAST]] -// NATIVE: [[LOAD:%.*]] = load half, half* @h -// NATIVE: ret half [[LOAD]] +// CHECK: [[LOAD:%.*]] = load half, half* @h +// CHECK: ret half [[LOAD]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c --- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c @@ -134,15 +134,12 @@ // CHECK-LABEL: @test_vcmpeqq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b) { @@ -433,18 +430,15 @@ // CHECK-LABEL: @test_vcmpeqq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpeqq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { @@ -732,15 +726,12 @@ // CHECK-LABEL: @test_vcmpneq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpneq_n_f16(float16x8_t a, float16_t b) { @@ -1031,18 +1022,15 @@ // CHECK-LABEL: @test_vcmpneq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpneq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { @@ -1330,15 +1318,12 @@ // CHECK-LABEL: @test_vcmpgeq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpgeq_n_f16(float16x8_t a, float16_t b) { @@ -1629,18 +1614,15 @@ // CHECK-LABEL: @test_vcmpgeq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpgeq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { @@ -1928,15 +1910,12 @@ // CHECK-LABEL: @test_vcmpgtq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpgtq_n_f16(float16x8_t a, float16_t b) { @@ -2227,18 +2206,15 @@ // CHECK-LABEL: @test_vcmpgtq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpgtq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { @@ -2478,15 +2454,12 @@ // CHECK-LABEL: @test_vcmpleq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b) { @@ -2666,18 +2639,15 @@ // CHECK-LABEL: @test_vcmpleq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { @@ -2854,15 +2824,12 @@ // CHECK-LABEL: @test_vcmpltq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b) { @@ -3042,18 +3009,15 @@ // CHECK-LABEL: @test_vcmpltq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP5]]) -// CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 -// CHECK-NEXT: ret i16 [[TMP7]] +// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] // mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp --- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp +++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp @@ -78,19 +78,12 @@ // CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh( // CHECK-NEXT: entry: -// CHECK-NEXT: [[B:%.*]] = alloca half, align 2 -// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4 -// CHECK-NEXT: store float [[B_COERCE:%.*]], float* [[TMP]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast half* [[B]] to i8* -// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP0]], i8* align 4 [[TMP1]], i32 2, i1 false) -// CHECK-NEXT: [[B1:%.*]] = load half, half* [[B]], align 2 -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 -// CHECK-NEXT: ret i16 [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] // mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c --- a/clang/test/CodeGen/arm-mve-intrinsics/dup.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c @@ -6,10 +6,7 @@ // CHECK-LABEL: @test_vdupq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer // CHECK-NEXT: ret <8 x half> [[DOTSPLAT]] // @@ -97,15 +94,12 @@ // CHECK-LABEL: @test_vdupq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]] -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vdupq_m_n_f16(float16x8_t inactive, float16_t a, mve_pred16_t p) { @@ -244,12 +238,9 @@ // CHECK-LABEL: @test_vdupq_x_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer // CHECK-NEXT: ret <8 x half> [[DOTSPLAT]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c --- a/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c @@ -7,10 +7,7 @@ // CHECK-LABEL: @test_vgetq_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x half> [[A:%.*]], i32 2 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16 -// CHECK-NEXT: [[TMP_0_INSERT_EXT:%.*]] = zext i16 [[TMP1]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP_0_INSERT_EXT]] to float -// CHECK-NEXT: ret float [[TMP2]] +// CHECK-NEXT: ret half [[TMP0]] // float16_t test_vgetq_lane_f16(float16x8_t a) { @@ -149,11 +146,8 @@ // CHECK-LABEL: @test_vsetq_lane_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x half> [[B:%.*]], half [[TMP1]], i32 4 -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x half> [[B:%.*]], half [[A:%.*]], i32 4 +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vsetq_lane_f16(float16_t a, float16x8_t b) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c --- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c @@ -32,13 +32,10 @@ // CHECK-LABEL: @test_vfmaq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { #ifdef POLYMORPHIC @@ -65,13 +62,10 @@ // CHECK-LABEL: @test_vfmasq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { #ifdef POLYMORPHIC @@ -319,7 +313,7 @@ // CHECK-LABEL: @test_vqdmlahq_n_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]]) // CHECK-NEXT: ret <16 x i8> [[TMP1]] // int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { @@ -333,7 +327,7 @@ // CHECK-LABEL: @test_vqdmlahq_n_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]]) // CHECK-NEXT: ret <8 x i16> [[TMP1]] // int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { @@ -346,7 +340,7 @@ // CHECK-LABEL: @test_vqdmlahq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { @@ -401,7 +395,7 @@ // CHECK-LABEL: @test_vqrdmlahq_n_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]]) // CHECK-NEXT: ret <16 x i8> [[TMP1]] // int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { @@ -415,7 +409,7 @@ // CHECK-LABEL: @test_vqrdmlahq_n_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]]) // CHECK-NEXT: ret <8 x i16> [[TMP1]] // int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { @@ -428,7 +422,7 @@ // CHECK-LABEL: @test_vqrdmlahq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { @@ -512,15 +506,12 @@ // CHECK-LABEL: @test_vfmaq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) { #ifdef POLYMORPHIC @@ -549,15 +540,12 @@ // CHECK-LABEL: @test_vfmasq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[C_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[C:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) { #ifdef POLYMORPHIC @@ -621,7 +609,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) // CHECK-NEXT: ret <16 x i8> [[TMP3]] // int8x16_t test_vmlaq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) { @@ -637,7 +625,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) // CHECK-NEXT: ret <8 x i16> [[TMP3]] // int16x8_t test_vmlaq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) { @@ -652,7 +640,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmlaq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) { @@ -668,7 +656,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) // CHECK-NEXT: ret <16 x i8> [[TMP3]] // uint8x16_t test_vmlaq_m_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c, mve_pred16_t p) { @@ -684,7 +672,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) // CHECK-NEXT: ret <8 x i16> [[TMP3]] // uint16x8_t test_vmlaq_m_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c, mve_pred16_t p) { @@ -699,7 +687,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmlaq_m_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c, mve_pred16_t p) { @@ -809,7 +797,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) // CHECK-NEXT: ret <16 x i8> [[TMP3]] // int8x16_t test_vqdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) { @@ -825,7 +813,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) // CHECK-NEXT: ret <8 x i16> [[TMP3]] // int16x8_t test_vqdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) { @@ -840,7 +828,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) { @@ -903,7 +891,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) // CHECK-NEXT: ret <16 x i8> [[TMP3]] // int8x16_t test_vqrdmlahq_m_n_s8(int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) { @@ -919,7 +907,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) // CHECK-NEXT: ret <8 x i16> [[TMP3]] // int16x8_t test_vqrdmlahq_m_n_s16(int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) { @@ -934,7 +922,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[C:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vqrdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c @@ -114,13 +114,10 @@ // CHECK-LABEL: @test_vaddq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b) { @@ -187,15 +184,12 @@ // CHECK-LABEL: @test_vaddq_x_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vaddq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c @@ -264,17 +264,8 @@ // CHECK-LABEL: @test_vminnmvq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmv.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP5]] +// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.minnmv.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret half [[TMP0]] // float16_t test_vminnmvq_f16(float16_t a, float16x8_t b) { #ifdef POLYMORPHIC @@ -299,17 +290,8 @@ // CHECK-LABEL: @test_vminnmavq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmav.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP5]] +// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.minnmav.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret half [[TMP0]] // float16_t test_vminnmavq_f16(float16_t a, float16x8_t b) { #ifdef POLYMORPHIC @@ -334,17 +316,8 @@ // CHECK-LABEL: @test_vmaxnmvq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmv.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP5]] +// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.maxnmv.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret half [[TMP0]] // float16_t test_vmaxnmvq_f16(float16_t a, float16x8_t b) { #ifdef POLYMORPHIC @@ -369,17 +342,8 @@ // CHECK-LABEL: @test_vmaxnmavq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmav.f16.v8f16(half [[TMP1]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP3]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP4]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP5]] +// CHECK-NEXT: [[TMP0:%.*]] = call half @llvm.arm.mve.maxnmav.f16.v8f16(half [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret half [[TMP0]] // float16_t test_vmaxnmavq_f16(float16_t a, float16x8_t b) { #ifdef POLYMORPHIC @@ -698,19 +662,10 @@ // CHECK-LABEL: @test_vminnmvq_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP7]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] // float16_t test_vminnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) { #ifdef POLYMORPHIC @@ -737,19 +692,10 @@ // CHECK-LABEL: @test_vminnmavq_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP7]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] // float16_t test_vminnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) { #ifdef POLYMORPHIC @@ -776,19 +722,10 @@ // CHECK-LABEL: @test_vmaxnmvq_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP7]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] // float16_t test_vmaxnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) { #ifdef POLYMORPHIC @@ -815,19 +752,10 @@ // CHECK-LABEL: @test_vmaxnmavq_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half [[TMP1]], <8 x half> [[B:%.*]], <8 x i1> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast half [[TMP4]] to i16 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast float undef to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i32 -// CHECK-NEXT: [[TMP2_0_INSERT_MASK:%.*]] = and i32 [[TMP6]], -65536 -// CHECK-NEXT: [[TMP2_0_INSERT_INSERT:%.*]] = or i32 [[TMP2_0_INSERT_MASK]], [[TMP2_0_INSERT_EXT]] -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2_0_INSERT_INSERT]] to float -// CHECK-NEXT: ret float [[TMP7]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] // float16_t test_vmaxnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) { #ifdef POLYMORPHIC diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c @@ -308,15 +308,12 @@ // CHECK-LABEL: @test_vmulq_m_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c @@ -114,13 +114,10 @@ // CHECK-LABEL: @test_vsubq_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b) { @@ -187,15 +184,12 @@ // CHECK-LABEL: @test_vsubq_x_n_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 -// CHECK-NEXT: [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[B:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP3]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vsubq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/cmse-clear-fp16.c b/clang/test/CodeGen/cmse-clear-fp16.c deleted file mode 100644 --- a/clang/test/CodeGen/cmse-clear-fp16.c +++ /dev/null @@ -1,59 +0,0 @@ -// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \ -// RUN: -fallow-half-arguments-and-returns %s -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-SOFT -// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \ -// RUN: -fallow-half-arguments-and-returns %s -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-SOFT -// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \ -// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-HARD -// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \ -// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-HARD - -__fp16 g0(); -__attribute__((cmse_nonsecure_entry)) __fp16 f0() { - return g0(); -} -// CHECK: define {{.*}}@f0() - -// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32 -// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-NOPT-SOFT: ret i32 %[[V1]] - -// CHECK-OPT-SOFT: %[[V0:.*]] = tail call {{.*}} @g0 -// CHECK-OPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-OPT-SOFT: ret i32 %[[V1]] - -// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 -// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float -// CHECK-NOPT-HARD: ret float %[[V2]] - -// CHECK-OPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 -// CHECK-OPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-OPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float -// CHECK-OPT-HARD: ret float %[[V2]] - -void __attribute__((cmse_nonsecure_call)) (*g1)(__fp16); -__fp16 x; -void f1() { - g1(x); -} -// CHECK: define {{.*}}@f1() - -// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32 -// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-NOPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]]) - -// CHECK-OPT-SOFT: %[[V1:.*]] = zext i16 {{.*}} to i32 -// CHECK-OPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]]) - -// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 -// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 -// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float -// CHECK-NOPT-HARD: call {{.*}}(float %[[V2]]) - -// CHECK-OPT-HARD: %[[V0:.*]] = zext i16 {{.*}} to i32 -// CHECK-OPT-HARD: %[[V1:.*]] = bitcast i32 %[[V0]] to float -// CHECK-OPT-HARD: call {{.*}}(float %[[V1]])