Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2780,6 +2780,20 @@ [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; + class SME2_VG2_Multi_Single_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + class SME2_VG4_Multi_Single_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + // // Multi-vector fused multiply-add/subtract @@ -2839,4 +2853,13 @@ // def int_aarch64_sve_fcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; def int_aarch64_sve_bfcvtn_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic; + + // Multi-vector clamps + def int_aarch64_sve_sclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_uclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_fclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; + + def int_aarch64_sve_sclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_uclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_fclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; } Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -335,6 +335,10 @@ // e.g. structured loads and stores (ldN, stN). SDValue createZTuple(ArrayRef Vecs); + // Similar to above, except the register must start at a multiple of the + // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. + SDValue createZMulTuple(ArrayRef Regs); + /// Generic helper for the createDTuple/createQTuple /// functions. Those should almost always be called instead. SDValue createTuple(ArrayRef Vecs, const unsigned RegClassIDs[], @@ -357,6 +361,7 @@ unsigned Opc_rr, unsigned Opc_ri, bool IsIntr = false); void SelectWhilePair(SDNode *N, unsigned Opc); + void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); /// SVE Reg+Imm addressing mode. @@ -1453,6 +1458,18 @@ return createTuple(Regs, RegClassIDs, SubRegs); } +SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef Regs) { + assert(Regs.size() == 2 || Regs.size() == 4); + + // The createTuple interface requires 3 RegClassIDs for each possible + // tuple type even though we only have them for ZPR2 and ZPR4. + static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, + AArch64::ZPR4Mul4RegClassID}; + static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2, AArch64::zsub3}; + return createTuple(Regs, RegClassIDs, SubRegs); +} + SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, const unsigned RegClassIDs[], const unsigned SubRegs[]) { @@ -1691,6 +1708,8 @@ enum class SelectTypeKind { Int1 = 0, + Int = 1, + FP = 2, }; /// This function selects an opcode from a list of opcodes, which is @@ -1704,10 +1723,19 @@ EVT EltVT = VT.getVectorElementType(); switch (Kind) { + case SelectTypeKind::Int: + if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && + EltVT != MVT::i64) + return 0; + break; case SelectTypeKind::Int1: if (EltVT != MVT::i1) return 0; break; + case SelectTypeKind::FP: + if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) + return 0; + break; } unsigned Offset; @@ -1780,6 +1808,28 @@ CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, + unsigned Op) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); + SDValue Zd = createZMulTuple(Regs); + SDValue Zn = N->getOperand(1 + NumVecs); + SDValue Zm = N->getOperand(2 + NumVecs); + + SDValue Ops[] = {Zd, Zn, Zm}; + + SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); + SDValue SuperReg = SDValue(Intrinsic, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + + CurDAG->RemoveDeadNode(N); + return; +} + void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); @@ -4732,6 +4782,48 @@ AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) SelectWhilePair(Node, Op); return; + case Intrinsic::aarch64_sve_sclamp_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, + AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) + SelectClamp(Node, 2, Op); + return; + case Intrinsic::aarch64_sve_uclamp_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, + AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) + SelectClamp(Node, 2, Op); + return; + case Intrinsic::aarch64_sve_fclamp_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, + AArch64::FCLAMP_VG2_2Z2Z_D})) + SelectClamp(Node, 2, Op); + return; + case Intrinsic::aarch64_sve_sclamp_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, + AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) + SelectClamp(Node, 4, Op); + return; + case Intrinsic::aarch64_sve_uclamp_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, + AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) + SelectClamp(Node, 4, Op); + return; + case Intrinsic::aarch64_sve_fclamp_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, + AArch64::FCLAMP_VG4_4Z4Z_D})) + SelectClamp(Node, 4, Op); + return; } break; } Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll +++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll @@ -30,8 +30,91 @@ ret %res } +define { , } @test_fclamp_single_x2_f16( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_fclamp_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_fclamp_single_x2_f32( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_fclamp_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_fclamp_single_x2_f64( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_fclamp_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( %a, %b, %c, %d) + ret { , } %res +} + + +define { , , , } @test_fclamp_single_x4_f16( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_fclamp_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_fclamp_single_x4_f32( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_fclamp_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_fclamp_single_x4_f64( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_fclamp_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + + attributes #0 = { "target-features"="+sve2p1" } +attributes #1 = { "target-features"="+sme2" } declare @llvm.aarch64.sve.fclamp.nxv8f16(, , ) declare @llvm.aarch64.sve.fclamp.nxv4f32(, , ) declare @llvm.aarch64.sve.fclamp.nxv2f64(, , ) + +declare { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(, , , ) + +declare { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( %a, %b, %c, %d, %e, %f) Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll +++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s -define @test_sclamp_i8( %a, %b, %c) { +target triple = "aarch64-linux-gnu" + +define @test_sclamp_i8( %a, %b, %c) #0 { ; CHECK-LABEL: test_sclamp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sclamp z2.b, z0.b, z1.b @@ -11,7 +13,7 @@ ret %res } -define @test_sclamp_i16( %a, %b, %c) { +define @test_sclamp_i16( %a, %b, %c) #0 { ; CHECK-LABEL: test_sclamp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sclamp z2.h, z0.h, z1.h @@ -21,7 +23,7 @@ ret %res } -define @test_sclamp_i32( %a, %b, %c) { +define @test_sclamp_i32( %a, %b, %c) #0 { ; CHECK-LABEL: test_sclamp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sclamp z2.s, z0.s, z1.s @@ -31,7 +33,7 @@ ret %res } -define @test_sclamp_i64( %a, %b, %c) { +define @test_sclamp_i64( %a, %b, %c) #0 { ; CHECK-LABEL: test_sclamp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sclamp z2.d, z0.d, z1.d @@ -41,7 +43,117 @@ ret %res } +define { , } @test_sclamp_single_x2_i8( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_sclamp_single_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_sclamp_single_x2_i16( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_sclamp_single_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_sclamp_single_x2_i32( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_sclamp_single_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_sclamp_single_x2_i64( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_sclamp_single_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( %a, %b, %c, %d) + ret { , } %res +} + +define { , , , } @test_sclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_sclamp_single_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_sclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_sclamp_single_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_sclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_sclamp_single_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_sclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_sclamp_single_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + + +attributes #0 = { "target-features"="+sve2p1" } +attributes #1 = { "target-features"="+sme2" } + declare @llvm.aarch64.sve.sclamp.nxv16i8(, , ) declare @llvm.aarch64.sve.sclamp.nxv8i16(, , ) declare @llvm.aarch64.sve.sclamp.nxv4i32(, , ) declare @llvm.aarch64.sve.sclamp.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(, , , ) + +declare { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll +++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s -define @test_uclamp_i8( %a, %b, %c) { +target triple = "aarch64-linux-gnu" + +define @test_uclamp_i8( %a, %b, %c) #0 { ; CHECK-LABEL: test_uclamp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uclamp z2.b, z0.b, z1.b @@ -11,7 +13,7 @@ ret %res } -define @test_uclamp_i16( %a, %b, %c) { +define @test_uclamp_i16( %a, %b, %c) #0 { ; CHECK-LABEL: test_uclamp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uclamp z2.h, z0.h, z1.h @@ -21,7 +23,7 @@ ret %res } -define @test_uclamp_i32( %a, %b, %c) { +define @test_uclamp_i32( %a, %b, %c) #0 { ; CHECK-LABEL: test_uclamp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uclamp z2.s, z0.s, z1.s @@ -31,7 +33,7 @@ ret %res } -define @test_uclamp_i64( %a, %b, %c) { +define @test_uclamp_i64( %a, %b, %c) #0 { ; CHECK-LABEL: test_uclamp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: uclamp z2.d, z0.d, z1.d @@ -41,7 +43,117 @@ ret %res } +define { , } @test_uclamp_single_x2_i8( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_uclamp_single_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_uclamp_single_x2_i16( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_uclamp_single_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_uclamp_single_x2_i32( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_uclamp_single_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( %a, %b, %c, %d) + ret { , } %res +} + +define { , } @test_uclamp_single_x2_i64( %a, %b, %c, %d) #1 { +; CHECK-LABEL: test_uclamp_single_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( %a, %b, %c, %d) + ret { , } %res +} + +define { , , , } @test_uclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_uclamp_single_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_uclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_uclamp_single_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_uclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_uclamp_single_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + +define { , , , } @test_uclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { +; CHECK-LABEL: test_uclamp_single_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} + + +attributes #0 = { "target-features"="+sve2p1" } +attributes #1 = { "target-features"="+sme2" } + declare @llvm.aarch64.sve.uclamp.nxv16i8(, , ) declare @llvm.aarch64.sve.uclamp.nxv8i16(, , ) declare @llvm.aarch64.sve.uclamp.nxv4i32(, , ) declare @llvm.aarch64.sve.uclamp.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(, , , ) + +declare { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) +declare { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f)