Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
@@ -215,14 +215,20 @@
 // CHECK-LABEL: @test_svclasta_n_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CHECK-NEXT:    ret i16 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CHECK-NEXT:    ret i16 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s16u10__SVBool_tsu11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i16 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
 int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
 {
@@ -232,14 +238,20 @@
 // CHECK-LABEL: @test_svclasta_n_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s32u10__SVBool_tiu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i32 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
 int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
 {
@@ -249,14 +261,20 @@
 // CHECK-LABEL: @test_svclasta_n_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CHECK-NEXT:    ret i64 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CHECK-NEXT:    ret i64 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_s64u10__SVBool_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i64 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
 int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
 {
@@ -281,14 +299,20 @@
 // CHECK-LABEL: @test_svclasta_n_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CHECK-NEXT:    ret i16 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CHECK-NEXT:    ret i16 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u16u10__SVBool_ttu12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i16 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
 uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
 {
@@ -298,14 +322,20 @@
 // CHECK-LABEL: @test_svclasta_n_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u32u10__SVBool_tju12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i32 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
 uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
 {
@@ -315,14 +345,20 @@
 // CHECK-LABEL: @test_svclasta_n_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CHECK-NEXT:    ret i64 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CHECK-NEXT:    ret i64 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclasta_n_u64u10__SVBool_tmu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i64 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
 uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
 {
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
@@ -215,14 +215,20 @@
 // CHECK-LABEL: @test_svclastb_n_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CHECK-NEXT:    ret i16 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CHECK-NEXT:    ret i16 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s16u10__SVBool_tsu11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i16 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
 int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
 {
@@ -232,14 +238,20 @@
 // CHECK-LABEL: @test_svclastb_n_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s32u10__SVBool_tiu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i32 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
 int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
 {
@@ -249,14 +261,20 @@
 // CHECK-LABEL: @test_svclastb_n_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CHECK-NEXT:    ret i64 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CHECK-NEXT:    ret i64 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_s64u10__SVBool_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i64 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
 int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
 {
@@ -281,14 +299,20 @@
 // CHECK-LABEL: @test_svclastb_n_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CHECK-NEXT:    ret i16 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CHECK-NEXT:    ret i16 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u16u10__SVBool_ttu12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16 [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i16 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[FALLBACK:%.*]] to half
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x half>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+// CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
 uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
 {
@@ -298,14 +322,20 @@
 // CHECK-LABEL: @test_svclastb_n_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u32u10__SVBool_tju12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32 [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i32 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[FALLBACK:%.*]] to float
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x float>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
 uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
 {
@@ -315,14 +345,20 @@
 // CHECK-LABEL: @test_svclastb_n_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CHECK-NEXT:    ret i64 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CHECK-NEXT:    ret i64 [[TMP4]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svclastb_n_u64u10__SVBool_tmu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64 [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
-// CPP-CHECK-NEXT:    ret i64 [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[FALLBACK:%.*]] to double
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x double>
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
 uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
 {
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -796,6 +796,44 @@
   return IC.replaceInstUsesWith(II, Extract);
 }
 
+static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
+                                                      IntrinsicInst &II) {
+  // Replace scalar integer CLAST[AB] intrinsic with optimal SIMD&FP variant.
+  IRBuilder<> Builder(II.getContext());
+  Builder.SetInsertPoint(&II);
+  Value *Pg = II.getArgOperand(0);
+  Value *Fallback = II.getArgOperand(1);
+  Value *Vec = II.getArgOperand(2);
+  Type *Ty = II.getType();
+
+  if (!Ty->isIntegerTy())
+    return None;
+
+  Type *FPTy;
+  switch (cast<IntegerType>(Ty)->getBitWidth()) {
+  default:
+    return None;
+  case 16:
+    FPTy = Builder.getHalfTy();
+    break;
+  case 32:
+    FPTy = Builder.getFloatTy();
+    break;
+  case 64:
+    FPTy = Builder.getDoubleTy();
+    break;
+  }
+
+  Value *FPFallBack = Builder.CreateBitCast(Fallback, FPTy);
+  auto *FPVTy = VectorType::get(
+      FPTy, cast<VectorType>(Vec->getType())->getElementCount());
+  Value *FPVec = Builder.CreateBitCast(Vec, FPVTy);
+  auto *FPII = Builder.CreateIntrinsic(II.getIntrinsicID(), {FPVec->getType()},
+                                       {Pg, FPFallBack, FPVec});
+  Value *FPIItoInt = Builder.CreateBitCast(FPII, II.getType());
+  return IC.replaceInstUsesWith(II, FPIItoInt);
+}
+
 static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
                                                 IntrinsicInst &II) {
   LLVMContext &Ctx = II.getContext();
@@ -1294,6 +1332,9 @@
   case Intrinsic::aarch64_sve_lasta:
   case Intrinsic::aarch64_sve_lastb:
     return instCombineSVELast(IC, II);
+  case Intrinsic::aarch64_sve_clasta_n:
+  case Intrinsic::aarch64_sve_clastb_n:
+    return instCombineSVECondLast(IC, II);
   case Intrinsic::aarch64_sve_cntd:
     return instCombineSVECntElts(IC, II, 2);
   case Intrinsic::aarch64_sve_cntw:
Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-clast.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-clast.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+target triple = "aarch64"
+
+define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: @clastb_n_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[A:%.*]] to half
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 8 x i16> [[B:%.*]] to <vscale x 8 x half>
+; CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], half [[TMP1]], <vscale x 8 x half> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
+; CHECK-NEXT:    ret i16 [[TMP4]]
+;
+  %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b)
+  ret i16 %out
+}
+
+define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @clastb_n_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[A:%.*]] to float
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32> [[B:%.*]] to <vscale x 4 x float>
+; CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], float [[TMP1]], <vscale x 4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b)
+  ret i32 %out
+}
+
+define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @clastb_n_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[A:%.*]] to double
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i64> [[B:%.*]] to <vscale x 2 x double>
+; CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], double [[TMP1]], <vscale x 2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b)
+  ret i64 %out
+}
+
+declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
+declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
+declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)