diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6106,6 +6106,8 @@ NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), + NEONMAP1(vbsl_v, aarch64_neon_bsl, 0), + NEONMAP1(vbslq_v, aarch64_neon_bsl, 0), NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), @@ -7567,7 +7569,16 @@ Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvtfp2bf"); } - + case NEON::BI__builtin_neon_vbsl_v: + case NEON::BI__builtin_neon_vbslq_v: { + // Even though LLVM IR intrinsic defines vbsl with llvm_anyvector_ty, only + // v8i8 and v16i8 are valid candidates for codegen here. + auto *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[1] = {InputTy}; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vbsl"); + } } assert(Int && "Expected valid intrinsic number"); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -959,308 +959,265 @@ } // CHECK-LABEL: @test_vbsl_s8( -// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <8 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) +// CHECK: ret <8 x i8> [[VBSL_I]] int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { return vbsl_s8(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_s16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> -// CHECK: ret <8 x i8> [[TMP4]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <4 x i16> +// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> +// CHECK: ret <8 x i8> [[TMP4]] +// int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { return (int8x8_t)vbsl_s16(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <2 x i32> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <2 x i32> +// CHECK: ret <2 x i32> [[TMP3]] +// int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { return vbsl_s32(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_s64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <1 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <1 x i64> +// CHECK: ret <1 x i64> [[TMP3]] +// int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) { return vbsl_s64(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_u8( -// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <8 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) +// CHECK: ret <8 x i8> [[VBSL_I]] +// uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { return vbsl_u8(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_u16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <4 x i16> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <4 x i16> +// CHECK: ret <4 x i16> [[TMP3]] +// uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { return vbsl_u16(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <2 x i32> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <2 x i32> +// CHECK: ret <2 x i32> [[TMP3]] +// uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { return vbsl_u32(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_u64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <1 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <1 x i64> +// CHECK: ret <1 x i64> [[TMP3]] +// uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { return vbsl_u64(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_f32( -// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] -// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP5]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <2 x float> +// CHECK: ret <2 x float> [[TMP3]] +// float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { return vbsl_f32(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_f64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] -// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> -// CHECK: ret <1 x double> [[TMP4]] +// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <1 x double> +// CHECK: ret <1 x double> [[TMP3]] +// float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { return vbsl_f64(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_p8( -// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <8 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) +// CHECK: ret <8 x i8> [[VBSL_I]] +// poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { return vbsl_p8(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_p16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <4 x i16> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <4 x i16> +// CHECK: ret <4 x i16> [[TMP3]] +// poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { return vbsl_p16(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_s8( -// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <16 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) +// CHECK: ret <16 x i8> [[VBSL_I]] +// int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { return vbslq_s8(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_s16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <8 x i16> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <8 x i16> +// CHECK: ret <8 x i16> [[TMP3]] +// int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { return vbslq_s16(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_s32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <4 x i32> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <4 x i32> +// CHECK: ret <4 x i32> [[TMP3]] +// int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { return vbslq_s32(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <2 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <2 x i64> +// CHECK: ret <2 x i64> [[TMP3]] +// int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { return vbslq_s64(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_u8( -// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <16 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) +// CHECK: ret <16 x i8> [[VBSL_I]] +// uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { return vbslq_u8(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_u16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <8 x i16> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <8 x i16> +// CHECK: ret <8 x i16> [[TMP3]] +// uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { return vbslq_u16(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_u32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <4 x i32> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <4 x i32> +// CHECK: ret <4 x i32> [[TMP3]] +// int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { return vbslq_s32(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <2 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <2 x i64> +// CHECK: ret <2 x i64> [[TMP3]] +// uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { return vbslq_u64(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] -// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP4]] +// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <4 x float> +// CHECK: ret <4 x float> [[TMP3]] +// float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { return vbslq_f32(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_p8( -// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, -// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 -// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK: ret <16 x i8> [[VBSL2_I]] +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) +// CHECK: ret <16 x i8> [[VBSL_I]] +// poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { return vbslq_p8(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_p16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 -// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <8 x i16> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <8 x i16> +// CHECK: ret <8 x i16> [[TMP3]] +// poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { return vbslq_p16(v1, v2, v3); } // CHECK-LABEL: @test_vbslq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] -// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> -// CHECK: ret <2 x double> [[TMP4]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <2 x double> +// CHECK: ret <2 x double> [[TMP3]] +// float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { return vbslq_f64(v1, v2, v3); } diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -41,21 +41,23 @@ } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 { -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c -// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <1 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> +// CHECK: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <1 x i64> +// CHECK: ret <1 x i64> [[TMP3]] poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { return vbsl_p64(a, b, c); } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 { -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c -// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] -// CHECK: ret <2 x i64> [[VBSL5_I]] +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> +// CHECK: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <2 x i64> +// CHECK: ret <2 x i64> [[TMP3]] poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { return vbslq_p64(a, b, c); } diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -2011,14 +2011,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> -// CHECK-NEXT: ret <4 x half> [[TMP4]] +// CHECK-NEXT: [[VBSL_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_I]] to <4 x half> // float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { return vbsl_f16(a, b, c); @@ -2030,14 +2024,9 @@ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> -// CHECK-NEXT: ret <8 x half> [[TMP4]] +// CHECK-NEXT: [[VBSL_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL_I]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP3]] // float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { return vbslq_f16(a, b, c); diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -492,6 +492,8 @@ def int_aarch64_neon_frint64x : AdvSIMD_1FloatArg_Intrinsic; def int_aarch64_neon_frint64z : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_bsl : AdvSIMD_3VectorArg_Intrinsic; + // Scalar FP->Int conversions // Vector FP Inexact Narrowing diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4966,7 +4966,7 @@ // It is expanded into BSL/BIT/BIF after register allocation. defm BSP : SIMDLogicalThreeVectorPseudo>; -defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; +defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", int_aarch64_neon_bsl>; defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; diff --git a/llvm/test/CodeGen/AArch64/aarch64-vbsl.ll b/llvm/test/CodeGen/AArch64/aarch64-vbsl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-vbsl.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s + +define <8 x i8> @vbsl.v8i8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2) { +entry: +; CHECK-LABEL: vbsl.v8i8 +; CHECK: bsl v0.8b, v1.8b, v2.8b + %vbsl.i = tail call <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2) + ret <8 x i8> %vbsl.i +} + +define <16 x i8> @vbslq.v8i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2) { +entry: +; CHECK-LABEL: vbslq.v8i8 +; CHECK: bsl v0.16b, v1.16b, v2.16b + %vbsl.i = tail call <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2) + ret <16 x i8> %vbsl.i +} + +declare <8 x i8> @llvm.aarch64.neon.bsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) #2 +declare <16 x i8> @llvm.aarch64.neon.bsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) #2 +