Index: llvm/lib/Target/ARM/ARMInstrFormats.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrFormats.td +++ llvm/lib/Target/ARM/ARMInstrFormats.td @@ -408,6 +408,7 @@ bit thumbArithFlagSetting = 0; bit validForTailPredication = 0; + bit retainsPreviousHalf = 0; // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast(f), "Pseudo"); @@ -421,6 +422,7 @@ let TSFlags{18-15} = D.Value; let TSFlags{19} = thumbArithFlagSetting; let TSFlags{20} = validForTailPredication; + let TSFlags{21} = retainsPreviousHalf; let Constraints = cstr; let Itinerary = itin; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2688,6 +2688,7 @@ let Inst{4} = 0b0; let Inst{0} = 0b1; let validForTailPredication = 1; + let retainsPreviousHalf = 1; } def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> { @@ -2730,6 +2731,7 @@ let Inst{4} = 0b0; let Inst{0} = 0b0; let validForTailPredication = 1; + let retainsPreviousHalf = 1; } def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN< @@ -2779,6 +2781,7 @@ let Inst{4} = 0b0; let Inst{0} = bit_0; let validForTailPredication = 1; + let retainsPreviousHalf = 1; } multiclass MVE_VxQRSHRN_types { @@ -4492,6 +4495,7 @@ let Inst{7} = !if(!eq(bit_17, 0), 1, 0); let Inst{0} = 0b1; let validForTailPredication = 1; + let retainsPreviousHalf = 1; } multiclass MVE_VxMOVxN_halves { Index: llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h =================================================================== --- llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -396,6 +396,10 @@ // Whether an instruction can be included in an MVE tail-predicated loop. ValidForTailPredication = 1 << 20, + // Whether an instruction writes to the top/bottom half of a vector lane + // and leaves the other half untouched. + RetainsPreviousHalf = 1 << 21, + //===------------------------------------------------------------------===// // Code domain. DomainShift = 15, Index: llvm/unittests/Target/ARM/MachineInstrTest.cpp =================================================================== --- llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -10,11 +10,110 @@ using namespace llvm; +TEST(MachineInstructionRetainsPreviousHalf, IsCorrect) { + using namespace ARM; + + auto RetainsPreviousHalf = [](unsigned Opcode) { + switch (Opcode) { + default: + break; + case MVE_VMOVNi16bh: + case MVE_VMOVNi16th: + case MVE_VMOVNi32bh: + case MVE_VMOVNi32th: + case MVE_VQMOVNs16bh: + case MVE_VQMOVNs16th: + case MVE_VQMOVNs32bh: + case MVE_VQMOVNs32th: + case MVE_VQMOVNu16bh: + case MVE_VQMOVNu16th: + case MVE_VQMOVNu32bh: + case MVE_VQMOVNu32th: + case MVE_VQMOVUNs16bh: + case MVE_VQMOVUNs16th: + case MVE_VQMOVUNs32bh: + case MVE_VQMOVUNs32th: + case MVE_VQRSHRNbhs16: + case MVE_VQRSHRNbhs32: + case MVE_VQRSHRNbhu16: + case MVE_VQRSHRNbhu32: + case MVE_VQRSHRNths16: + case MVE_VQRSHRNths32: + case MVE_VQRSHRNthu16: + case MVE_VQRSHRNthu32: + case MVE_VQRSHRUNs16bh: + case MVE_VQRSHRUNs16th: + case MVE_VQRSHRUNs32bh: + case MVE_VQRSHRUNs32th: + case MVE_VQSHRNbhs16: + case MVE_VQSHRNbhs32: + case MVE_VQSHRNbhu16: + case MVE_VQSHRNbhu32: + case MVE_VQSHRNths16: + case MVE_VQSHRNths32: + case MVE_VQSHRNthu16: + case MVE_VQSHRNthu32: + case MVE_VQSHRUNs16bh: + case MVE_VQSHRUNs16th: + case MVE_VQSHRUNs32bh: + case MVE_VQSHRUNs32th: + case MVE_VRSHRNi16bh: + case MVE_VRSHRNi16th: + case MVE_VRSHRNi32bh: + case MVE_VRSHRNi32th: + case MVE_VSHRNi16bh: + case MVE_VSHRNi16th: + case MVE_VSHRNi32bh: + case MVE_VSHRNi32th: + case MVE_VCVTf16f32bh: + case MVE_VCVTf16f32th: + case MVE_VCVTf32f16bh: + case MVE_VCVTf32f16th: + return true; + } + return false; + }; + + LLVMInitializeARMTargetInfo(); + LLVMInitializeARMTarget(); + LLVMInitializeARMTargetMC(); + + auto TT(Triple::normalize("thumbv8.1m.main-arm-none-eabi")); + std::string Error; + const Target *T = TargetRegistry::lookupTarget(TT, Error); + if (!T) { + dbgs() << Error; + return; + } + + TargetOptions Options; + auto TM = std::unique_ptr( + static_cast( + T->createTargetMachine(TT, "generic", "", Options, None, None, + CodeGenOpt::Default))); + ARMSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), + *static_cast(TM.get()), false); + const ARMBaseInstrInfo *TII = ST.getInstrInfo(); + auto MII = TM->getMCInstrInfo(); + + for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { + const MCInstrDesc &Desc = TII->get(i); + + uint64_t Flags = Desc.TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + continue; + + bool Valid = (Flags & ARMII::RetainsPreviousHalf) != 0; + ASSERT_EQ(RetainsPreviousHalf(i), Valid) + << MII->getName(i) + << ": mismatched expectation for tail-predicated safety\n"; + } +} // Test for instructions that aren't immediately obviously valid within a // tail-predicated loop. This should be marked up in their tablegen // descriptions. Currently we, conservatively, disallow: // - cross beat carries. -// - narrowing of results. // - complex operations. // - horizontal operations. // - byte swapping.