Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c @@ -215,14 +215,20 @@ // CHECK-LABEL: @test_svclasta_n_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i16 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CHECK-NEXT: ret i16 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s16u10__SVBool_tsu11__SVInt16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i16 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CPP-CHECK-NEXT: ret i16 [[TMP4]] // int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data) { @@ -232,14 +238,20 @@ // CHECK-LABEL: @test_svclasta_n_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s32u10__SVBool_tiu11__SVInt32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i32 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CPP-CHECK-NEXT: ret i32 [[TMP4]] // int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data) { @@ -249,14 +261,20 @@ // CHECK-LABEL: @test_svclasta_n_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i64 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CHECK-NEXT: ret i64 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s64u10__SVBool_tlu11__SVInt64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CPP-CHECK-NEXT: ret i64 [[TMP4]] // int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data) { @@ -281,14 +299,20 @@ // CHECK-LABEL: @test_svclasta_n_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i16 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CHECK-NEXT: ret i16 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u16u10__SVBool_ttu12__SVUint16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i16 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CPP-CHECK-NEXT: ret i16 [[TMP4]] // uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) { @@ -298,14 +322,20 @@ // CHECK-LABEL: @test_svclasta_n_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u32u10__SVBool_tju12__SVUint32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i32 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CPP-CHECK-NEXT: ret i32 [[TMP4]] // uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) { @@ -315,14 +345,20 @@ // CHECK-LABEL: @test_svclasta_n_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i64 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CHECK-NEXT: ret i64 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u64u10__SVBool_tmu12__SVUint64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CPP-CHECK-NEXT: ret i64 [[TMP4]] // uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) { Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c @@ -215,14 +215,20 @@ // CHECK-LABEL: @test_svclastb_n_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i16 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CHECK-NEXT: ret i16 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s16u10__SVBool_tsu11__SVInt16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i16 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CPP-CHECK-NEXT: ret i16 [[TMP4]] // int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data) { @@ -232,14 +238,20 @@ // CHECK-LABEL: @test_svclastb_n_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s32u10__SVBool_tiu11__SVInt32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i32 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CPP-CHECK-NEXT: ret i32 [[TMP4]] // int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data) { @@ -249,14 +261,20 @@ // CHECK-LABEL: @test_svclastb_n_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i64 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CHECK-NEXT: ret i64 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s64u10__SVBool_tlu11__SVInt64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CPP-CHECK-NEXT: ret i64 [[TMP4]] // int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data) { @@ -281,14 +299,20 @@ // CHECK-LABEL: @test_svclastb_n_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i16 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CHECK-NEXT: ret i16 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u16u10__SVBool_ttu12__SVUint16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( [[TMP0]], i16 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i16 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( [[TMP0]], half [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +// CPP-CHECK-NEXT: ret i16 [[TMP4]] // uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) { @@ -298,14 +322,20 @@ // CHECK-LABEL: @test_svclastb_n_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u32u10__SVBool_tju12__SVUint32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( [[TMP0]], i32 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i32 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( [[TMP0]], float [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +// CPP-CHECK-NEXT: ret i32 [[TMP4]] // uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) { @@ -315,14 +345,20 @@ // CHECK-LABEL: @test_svclastb_n_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret i64 [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CHECK-NEXT: ret i64 [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u64u10__SVBool_tmu12__SVUint64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( [[TMP0]], i64 [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double +// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast [[DATA:%.*]] to +// CPP-CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( [[TMP0]], double [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +// CPP-CHECK-NEXT: ret i64 [[TMP4]] // uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) { Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -796,6 +796,44 @@ return IC.replaceInstUsesWith(II, Extract); } +static Optional instCombineSVECondLast(InstCombiner &IC, + IntrinsicInst &II) { + // Replace scalar integer CLAST[AB] intrinsic with optimal SIMD&FP variant. + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Value *Pg = II.getArgOperand(0); + Value *Fallback = II.getArgOperand(1); + Value *Vec = II.getArgOperand(2); + Type *Ty = II.getType(); + + if (!Ty->isIntegerTy()) + return None; + + Type *FPTy; + switch (cast(Ty)->getBitWidth()) { + default: + return None; + case 16: + FPTy = Builder.getHalfTy(); + break; + case 32: + FPTy = Builder.getFloatTy(); + break; + case 64: + FPTy = Builder.getDoubleTy(); + break; + } + + Value *FPFallBack = Builder.CreateBitCast(Fallback, FPTy); + auto *FPVTy = VectorType::get( + FPTy, cast(Vec->getType())->getElementCount()); + Value *FPVec = Builder.CreateBitCast(Vec, FPVTy); + auto *FPII = Builder.CreateIntrinsic(II.getIntrinsicID(), {FPVec->getType()}, + {Pg, FPFallBack, FPVec}); + Value *FPIItoInt = Builder.CreateBitCast(FPII, II.getType()); + return IC.replaceInstUsesWith(II, FPIItoInt); +} + static Optional instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); @@ -1294,6 +1332,9 @@ case Intrinsic::aarch64_sve_lasta: case Intrinsic::aarch64_sve_lastb: return instCombineSVELast(IC, II); + case Intrinsic::aarch64_sve_clasta_n: + case Intrinsic::aarch64_sve_clastb_n: + return instCombineSVECondLast(IC, II); case Intrinsic::aarch64_sve_cntd: return instCombineSVECntElts(IC, II, 2); case Intrinsic::aarch64_sve_cntw: Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-clast.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-clast.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +target triple = "aarch64" + +define i16 @clastb_n_i16( %pg, i16 %a, %b) { +; CHECK-LABEL: @clastb_n_i16( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[A:%.*]] to half +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[B:%.*]] to +; CHECK-NEXT: [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( [[PG:%.*]], half [[TMP1]], [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[TMP3]] to i16 +; CHECK-NEXT: ret i16 [[TMP4]] +; + %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( %pg, i16 %a, %b) + ret i16 %out +} + +define i32 @clastb_n_i32( %pg, i32 %a, %b) { +; CHECK-LABEL: @clastb_n_i32( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[A:%.*]] to float +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[B:%.*]] to +; CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( [[PG:%.*]], float [[TMP1]], [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] +; + %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( %pg, i32 %a, %b) + ret i32 %out +} + +define i64 @clastb_n_i64( %pg, i64 %a, %b) { +; CHECK-LABEL: @clastb_n_i64( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[A:%.*]] to double +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[B:%.*]] to +; CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( [[PG:%.*]], double [[TMP1]], [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +; CHECK-NEXT: ret i64 [[TMP4]] +; + %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( %pg, i64 %a, %b) + ret i64 %out +} + +declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(, i16, ) +declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(, i32, ) +declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(, i64, )