diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9493,12 +9493,23 @@ Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, getSVEOverloadTypes(TypeFlags, Ty, Ops)); - Value *Call = Builder.CreateCall(F, Ops); + CallInst *Call = Builder.CreateCall(F, Ops); + + // These builtins don't have a defined result for inactive lanes. + // NOTE: The intention of these builtins is to allow the compiler to better + // utilise unpredicated SVE instructions. Arguably a better implementation + // is to have dedicated intrinsics for these builtins. However, there is a + // lot of them and most have no equivalent unpredicated variant so instead + // we treat them as SVETypeFlags::MergeOp1. Metadata is applied, which is + // freely ignorable, to help identify when the predicate can be dropped. + if (TypeFlags.getMergeType() == SVETypeFlags::MergeAny) + Call->setMetadata("inactive_lanes_undefined", + MDNode::get(getLLVMContext(), {})); // Predicate results must be converted to svbool_t. if (auto PredTy = dyn_cast(Call->getType())) if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + return EmitSVEPredicateCast(Call, cast(Ty)); return Call; } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c @@ -296,12 +296,12 @@ // CHECK-LABEL: @test_svadd_s8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svadd_s8_xu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) @@ -312,13 +312,13 @@ // CHECK-LABEL: @test_svadd_s16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_s16_xu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svint16_t test_svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) @@ -329,13 +329,13 @@ // CHECK-LABEL: @test_svadd_s32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_s32_xu10__SVBool_tu11__SVInt32_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svint32_t test_svadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) @@ -346,13 +346,13 @@ // CHECK-LABEL: @test_svadd_s64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_s64_xu10__SVBool_tu11__SVInt64_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svint64_t test_svadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) @@ -362,12 +362,12 @@ // CHECK-LABEL: @test_svadd_u8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svadd_u8_xu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) @@ -378,13 +378,13 @@ // CHECK-LABEL: @test_svadd_u16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_u16_xu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svuint16_t test_svadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) @@ -395,13 +395,13 @@ // CHECK-LABEL: @test_svadd_u32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_u32_xu10__SVBool_tu12__SVUint32_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svuint32_t test_svadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) @@ -412,13 +412,13 @@ // CHECK-LABEL: @test_svadd_u64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_u64_xu10__SVBool_tu12__SVUint64_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svuint64_t test_svadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) @@ -774,14 +774,14 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svadd_n_s8_xu10__SVBool_tu10__SVInt8_ta( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svint8_t test_svadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) @@ -794,7 +794,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_s16_xu10__SVBool_tu11__SVInt16_ts( @@ -802,7 +802,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svint16_t test_svadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) @@ -815,7 +815,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_s32_xu10__SVBool_tu11__SVInt32_ti( @@ -823,7 +823,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svint32_t test_svadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) @@ -836,7 +836,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_s64_xu10__SVBool_tu11__SVInt64_tl( @@ -844,7 +844,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svint64_t test_svadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) @@ -856,14 +856,14 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svadd_n_u8_xu10__SVBool_tu11__SVUint8_th( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]], [[OP1:%.*]], [[TMP0]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svuint8_t test_svadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) @@ -876,7 +876,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_u16_xu10__SVBool_tu12__SVUint16_tt( @@ -884,7 +884,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svuint16_t test_svadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) @@ -897,7 +897,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_u32_xu10__SVBool_tu12__SVUint32_tj( @@ -905,7 +905,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svuint32_t test_svadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) @@ -918,7 +918,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_u64_xu10__SVBool_tu12__SVUint64_tm( @@ -926,7 +926,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svuint64_t test_svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) @@ -1045,13 +1045,13 @@ // CHECK-LABEL: @test_svadd_f16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_f16_xu10__SVBool_tu13__SVFloat16_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svfloat16_t test_svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) @@ -1062,13 +1062,13 @@ // CHECK-LABEL: @test_svadd_f32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_f32_xu10__SVBool_tu13__SVFloat32_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svfloat32_t test_svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) @@ -1079,13 +1079,13 @@ // CHECK-LABEL: @test_svadd_f64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svadd_f64_xu10__SVBool_tu13__SVFloat64_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP1]] // svfloat64_t test_svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) @@ -1230,7 +1230,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, half [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_f16_xu10__SVBool_tu13__SVFloat16_tDh( @@ -1238,7 +1238,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, half [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svfloat16_t test_svadd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) @@ -1251,7 +1251,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_f32_xu10__SVBool_tu13__SVFloat32_tf( @@ -1259,7 +1259,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv4f32( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svfloat32_t test_svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) @@ -1272,7 +1272,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, double [[OP2:%.*]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svadd_n_f64_xu10__SVBool_tu13__SVFloat64_td( @@ -1280,7 +1280,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, double [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[TMP1:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv2f64( [[TMP0]], [[OP1:%.*]], [[TMP1]]), !inactive_lanes_undefined !2 // CPP-CHECK-NEXT: ret [[TMP2]] // svfloat64_t test_svadd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1203,6 +1203,16 @@ Intrinsic::aarch64_sve_mad>( IC, II, false)) return MAD; + + // The predicate is redundant if we don't care about inactive lanes. + if (II.getIntrinsicID() == Intrinsic::aarch64_sve_add && + II.hasMetadata("inactive_lanes_undefined")) { + auto *UnpredShift = + BinaryOperator::Create(Instruction::Add, II.getArgOperand(1), + II.getArgOperand(2), II.getName(), &II); + return IC.replaceInstUsesWith(II, UnpredShift); + } + return instCombineSVEVectorBinOp(IC, II); } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @unpredicate_add_x( %p, %a, %b) #0 { +; CHECK-LABEL: @unpredicate_add_x( +; CHECK-NEXT: [[OP1:%.*]] = add [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[OP1]] +; + %op = tail call @llvm.aarch64.sve.add.nxv4i32( %p, %a, %b), !inactive_lanes_undefined !0 + ret %op +} + +declare @llvm.aarch64.sve.add.nxv4i32(, , ) + +attributes #0 = { "target-features"="+sve" } + +!0 = !{}