This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Extend fp64 top zeroing peephole to all instructions
ClosedPublic

Authored by dmgreen on May 1 2023, 2:25 PM.

Details

Summary

D147235 added a fold to remove instructions that zero the upper half of a register if the instruction already implicitly zeros the register. As far as I can tell this applies to all instructions that define a FPR64 register in AArch64. This patch switches to a check for the register class. The full list of instructions is

BSPv8i8
FMOVD0
ABSv1i64
ABSv2i32
ABSv4i16
ABSv8i8
ADDHNv2i64_v2i32
ADDHNv4i32_v4i16
ADDHNv8i16_v8i8
ADDPv2i32
ADDPv2i64p
ADDPv4i16
ADDPv8i8
ADDv1i64
ADDv2i32
ADDv4i16
ADDv8i8
ANDv8i8
BF16DOTlanev4bf16
BFDOTv4bf16
BICv2i32
BICv4i16
BICv8i8
BIFv8i8
BITv8i8
BSLv8i8
CLASTA_VPZ_D
CLASTB_VPZ_D
CLSv2i32
CLSv4i16
CLSv8i8
CLZv2i32
CLZv4i16
CLZv8i8
CMEQv1i64
CMEQv1i64rz
CMEQv2i32
CMEQv2i32rz
CMEQv4i16
CMEQv4i16rz
CMEQv8i8
CMEQv8i8rz
CMGEv1i64
CMGEv1i64rz
CMGEv2i32
CMGEv2i32rz
CMGEv4i16
CMGEv4i16rz
CMGEv8i8
CMGEv8i8rz
CMGTv1i64
CMGTv1i64rz
CMGTv2i32
CMGTv2i32rz
CMGTv4i16
CMGTv4i16rz
CMGTv8i8
CMGTv8i8rz
CMHIv1i64
CMHIv2i32
CMHIv4i16
CMHIv8i8
CMHSv1i64
CMHSv2i32
CMHSv4i16
CMHSv8i8
CMLEv1i64rz
CMLEv2i32rz
CMLEv4i16rz
CMLEv8i8rz
CMLTv1i64rz
CMLTv2i32rz
CMLTv4i16rz
CMLTv8i8rz
CMTSTv1i64
CMTSTv2i32
CMTSTv4i16
CMTSTv8i8
CNTv8i8
DUPi64
DUPv2i32gpr
DUPv2i32lane
DUPv4i16gpr
DUPv4i16lane
DUPv8i8gpr
DUPv8i8lane
EORv8i8
EXTv8i8
FABD64
FABDv2f32
FABDv4f16
FABSDr
FABSv2f32
FABSv4f16
FACGE64
FACGEv2f32
FACGEv4f16
FACGT64
FACGTv2f32
FACGTv4f16
FADDDrr
FADDPv2f32
FADDPv2i64p
FADDPv4f16
FADDv2f32
FADDv4f16
FCADDv2f32
FCADDv4f16
FCMEQ64
FCMEQv1i64rz
FCMEQv2f32
FCMEQv2i32rz
FCMEQv4f16
FCMEQv4i16rz
FCMGE64
FCMGEv1i64rz
FCMGEv2f32
FCMGEv2i32rz
FCMGEv4f16
FCMGEv4i16rz
FCMGT64
FCMGTv1i64rz
FCMGTv2f32
FCMGTv2i32rz
FCMGTv4f16
FCMGTv4i16rz
FCMLAv2f32
FCMLAv4f16
FCMLAv4f16_indexed
FCMLEv1i64rz
FCMLEv2i32rz
FCMLEv4i16rz
FCMLTv1i64rz
FCMLTv2i32rz
FCMLTv4i16rz
FCSELDrrr
FCVTASv1i64
FCVTASv2f32
FCVTASv4f16
FCVTAUv1i64
FCVTAUv2f32
FCVTAUv4f16
FCVTDHr
FCVTDSr
FCVTMSv1i64
FCVTMSv2f32
FCVTMSv4f16
FCVTMUv1i64
FCVTMUv2f32
FCVTMUv4f16
FCVTNSv1i64
FCVTNSv2f32
FCVTNSv4f16
FCVTNUv1i64
FCVTNUv2f32
FCVTNUv4f16
FCVTNv2i32
FCVTNv4i16
FCVTPSv1i64
FCVTPSv2f32
FCVTPSv4f16
FCVTPUv1i64
FCVTPUv2f32
FCVTPUv4f16
FCVTXNv2f32
FCVTZSd
FCVTZSv1i64
FCVTZSv2f32
FCVTZSv2i32_shift
FCVTZSv4f16
FCVTZSv4i16_shift
FCVTZUd
FCVTZUv1i64
FCVTZUv2f32
FCVTZUv2i32_shift
FCVTZUv4f16
FCVTZUv4i16_shift
FDIVDrr
FDIVv2f32
FDIVv4f16
FMADDDrrr
FMAXDrr
FMAXNMDrr
FMAXNMPv2f32
FMAXNMPv2i64p
FMAXNMPv4f16
FMAXNMv2f32
FMAXNMv4f16
FMAXPv2f32
FMAXPv2i64p
FMAXPv4f16
FMAXv2f32
FMAXv4f16
FMINDrr
FMINNMDrr
FMINNMPv2f32
FMINNMPv2i64p
FMINNMPv4f16
FMINNMv2f32
FMINNMv4f16
FMINPv2f32
FMINPv2i64p
FMINPv4f16
FMINv2f32
FMINv4f16
FMLAL2lanev4f16
FMLAL2v4f16
FMLALlanev4f16
FMLALv4f16
FMLAv1i64_indexed
FMLAv2f32
FMLAv2i32_indexed
FMLAv4f16
FMLAv4i16_indexed
FMLSL2lanev4f16
FMLSL2v4f16
FMLSLlanev4f16
FMLSLv4f16
FMLSv1i64_indexed
FMLSv2f32
FMLSv2i32_indexed
FMLSv4f16
FMLSv4i16_indexed
FMOVDi
FMOVDr
FMOVXDr
FMOVv2f32_ns
FMOVv4f16_ns
FMSUBDrrr
FMULDrr
FMULX64
FMULXv1i64_indexed
FMULXv2f32
FMULXv2i32_indexed
FMULXv4f16
FMULXv4i16_indexed
FMULv1i64_indexed
FMULv2f32
FMULv2i32_indexed
FMULv4f16
FMULv4i16_indexed
FNEGDr
FNEGv2f32
FNEGv4f16
FNMADDDrrr
FNMSUBDrrr
FNMULDrr
FRECPEv1i64
FRECPEv2f32
FRECPEv4f16
FRECPS64
FRECPSv2f32
FRECPSv4f16
FRECPXv1i64
FRINT32XDr
FRINT32Xv2f32
FRINT32ZDr
FRINT32Zv2f32
FRINT64XDr
FRINT64Xv2f32
FRINT64ZDr
FRINT64Zv2f32
FRINTADr
FRINTAv2f32
FRINTAv4f16
FRINTIDr
FRINTIv2f32
FRINTIv4f16
FRINTMDr
FRINTMv2f32
FRINTMv4f16
FRINTNDr
FRINTNv2f32
FRINTNv4f16
FRINTPDr
FRINTPv2f32
FRINTPv4f16
FRINTXDr
FRINTXv2f32
FRINTXv4f16
FRINTZDr
FRINTZv2f32
FRINTZv4f16
FRSQRTEv1i64
FRSQRTEv2f32
FRSQRTEv4f16
FRSQRTS64
FRSQRTSv2f32
FRSQRTSv4f16
FSQRTDr
FSQRTv2f32
FSQRTv4f16
FSUBDrr
FSUBv2f32
FSUBv4f16
LASTA_VPZ_D
LASTB_VPZ_D
LD1Onev1d
LD1Onev2s
LD1Onev4h
LD1Onev8b
LD1Rv1d
LD1Rv2s
LD1Rv4h
LD1Rv8b
LDAPURdi
LDNPDi
LDPDi
LDRDl
LDRDroW
LDRDroX
LDRDui
LDURDi
MLAv2i32
MLAv2i32_indexed
MLAv4i16
MLAv4i16_indexed
MLAv8i8
MLSv2i32
MLSv2i32_indexed
MLSv4i16
MLSv4i16_indexed
MLSv8i8
MOVID
MOVIv2i32
MOVIv2s_msl
MOVIv4i16
MOVIv8b_ns
MULv2i32
MULv2i32_indexed
MULv4i16
MULv4i16_indexed
MULv8i8
MVNIv2i32
MVNIv2s_msl
MVNIv4i16
NEGv1i64
NEGv2i32
NEGv4i16
NEGv8i8
NOTv8i8
ORNv8i8
ORRv2i32
ORRv4i16
ORRv8i8
PMULv8i8
RADDHNv2i64_v2i32
RADDHNv4i32_v4i16
RADDHNv8i16_v8i8
RBITv8i8
REV16v8i8
REV32v4i16
REV32v8i8
REV64v2i32
REV64v4i16
REV64v8i8
RSHRNv2i32_shift
RSHRNv4i16_shift
RSHRNv8i8_shift
RSUBHNv2i64_v2i32
RSUBHNv4i32_v4i16
RSUBHNv8i16_v8i8
SABAv2i32
SABAv4i16
SABAv8i8
SABDv2i32
SABDv4i16
SABDv8i8
SADALPv2i32_v1i64
SADALPv4i16_v2i32
SADALPv8i8_v4i16
SADDLPv2i32_v1i64
SADDLPv4i16_v2i32
SADDLPv8i8_v4i16
SADDLVv4i32v
SCVTFSWDri
SCVTFSXDri
SCVTFUWDri
SCVTFUXDri
SCVTFd
SCVTFv1i64
SCVTFv2f32
SCVTFv2i32_shift
SCVTFv4f16
SCVTFv4i16_shift
SDOTlanev8i8
SDOTv8i8
SHADDv2i32
SHADDv4i16
SHADDv8i8
SHLd
SHLv2i32_shift
SHLv4i16_shift
SHLv8i8_shift
SHRNv2i32_shift
SHRNv4i16_shift
SHRNv8i8_shift
SHSUBv2i32
SHSUBv4i16
SHSUBv8i8
SLId
SLIv2i32_shift
SLIv4i16_shift
SLIv8i8_shift
SMAXPv2i32
SMAXPv4i16
SMAXPv8i8
SMAXv2i32
SMAXv4i16
SMAXv8i8
SMINPv2i32
SMINPv4i16
SMINPv8i8
SMINv2i32
SMINv4i16
SMINv8i8
SQABSv1i64
SQABSv2i32
SQABSv4i16
SQABSv8i8
SQADDv1i64
SQADDv2i32
SQADDv4i16
SQADDv8i8
SQDMLALi32
SQDMLALv1i64_indexed
SQDMLSLi32
SQDMLSLv1i64_indexed
SQDMULHv2i32
SQDMULHv2i32_indexed
SQDMULHv4i16
SQDMULHv4i16_indexed
SQDMULLi32
SQDMULLv1i64_indexed
SQNEGv1i64
SQNEGv2i32
SQNEGv4i16
SQNEGv8i8
SQRDMLAHv2i32
SQRDMLAHv2i32_indexed
SQRDMLAHv4i16
SQRDMLAHv4i16_indexed
SQRDMLSHv2i32
SQRDMLSHv2i32_indexed
SQRDMLSHv4i16
SQRDMLSHv4i16_indexed
SQRDMULHv2i32
SQRDMULHv2i32_indexed
SQRDMULHv4i16
SQRDMULHv4i16_indexed
SQRSHLv1i64
SQRSHLv2i32
SQRSHLv4i16
SQRSHLv8i8
SQRSHRNv2i32_shift
SQRSHRNv4i16_shift
SQRSHRNv8i8_shift
SQRSHRUNv2i32_shift
SQRSHRUNv4i16_shift
SQRSHRUNv8i8_shift
SQSHLUd
SQSHLUv2i32_shift
SQSHLUv4i16_shift
SQSHLUv8i8_shift
SQSHLd
SQSHLv1i64
SQSHLv2i32
SQSHLv2i32_shift
SQSHLv4i16
SQSHLv4i16_shift
SQSHLv8i8
SQSHLv8i8_shift
SQSHRNv2i32_shift
SQSHRNv4i16_shift
SQSHRNv8i8_shift
SQSHRUNv2i32_shift
SQSHRUNv4i16_shift
SQSHRUNv8i8_shift
SQSUBv1i64
SQSUBv2i32
SQSUBv4i16
SQSUBv8i8
SQXTNv2i32
SQXTNv4i16
SQXTNv8i8
SQXTUNv2i32
SQXTUNv4i16
SQXTUNv8i8
SRHADDv2i32
SRHADDv4i16
SRHADDv8i8
SRId
SRIv2i32_shift
SRIv4i16_shift
SRIv8i8_shift
SRSHLv1i64
SRSHLv2i32
SRSHLv4i16
SRSHLv8i8
SRSHRd
SRSHRv2i32_shift
SRSHRv4i16_shift
SRSHRv8i8_shift
SRSRAd
SRSRAv2i32_shift
SRSRAv4i16_shift
SRSRAv8i8_shift
SSHLv1i64
SSHLv2i32
SSHLv4i16
SSHLv8i8
SSHRd
SSHRv2i32_shift
SSHRv4i16_shift
SSHRv8i8_shift
SSRAd
SSRAv2i32_shift
SSRAv4i16_shift
SSRAv8i8_shift
SUBHNv2i64_v2i32
SUBHNv4i32_v4i16
SUBHNv8i16_v8i8
SUBv1i64
SUBv2i32
SUBv4i16
SUBv8i8
SUDOTlanev8i8
SUQADDv1i64
SUQADDv2i32
SUQADDv4i16
SUQADDv8i8
TBLv8i8Four
TBLv8i8One
TBLv8i8Three
TBLv8i8Two
TBXv8i8Four
TBXv8i8One
TBXv8i8Three
TBXv8i8Two
TRN1v2i32
TRN1v4i16
TRN1v8i8
TRN2v2i32
TRN2v4i16
TRN2v8i8
UABAv2i32
UABAv4i16
UABAv8i8
UABDv2i32
UABDv4i16
UABDv8i8
UADALPv2i32_v1i64
UADALPv4i16_v2i32
UADALPv8i8_v4i16
UADDLPv2i32_v1i64
UADDLPv4i16_v2i32
UADDLPv8i8_v4i16
UADDLVv4i32v
UCVTFSWDri
UCVTFSXDri
UCVTFUWDri
UCVTFUXDri
UCVTFd
UCVTFv1i64
UCVTFv2f32
UCVTFv2i32_shift
UCVTFv4f16
UCVTFv4i16_shift
UDOTlanev8i8
UDOTv8i8
UHADDv2i32
UHADDv4i16
UHADDv8i8
UHSUBv2i32
UHSUBv4i16
UHSUBv8i8
UMAXPv2i32
UMAXPv4i16
UMAXPv8i8
UMAXv2i32
UMAXv4i16
UMAXv8i8
UMINPv2i32
UMINPv4i16
UMINPv8i8
UMINv2i32
UMINv4i16
UMINv8i8
UQADDv1i64
UQADDv2i32
UQADDv4i16
UQADDv8i8
UQRSHLv1i64
UQRSHLv2i32
UQRSHLv4i16
UQRSHLv8i8
UQRSHRNv2i32_shift
UQRSHRNv4i16_shift
UQRSHRNv8i8_shift
UQSHLd
UQSHLv1i64
UQSHLv2i32
UQSHLv2i32_shift
UQSHLv4i16
UQSHLv4i16_shift
UQSHLv8i8
UQSHLv8i8_shift
UQSHRNv2i32_shift
UQSHRNv4i16_shift
UQSHRNv8i8_shift
UQSUBv1i64
UQSUBv2i32
UQSUBv4i16
UQSUBv8i8
UQXTNv2i32
UQXTNv4i16
UQXTNv8i8
URECPEv2i32
URHADDv2i32
URHADDv4i16
URHADDv8i8
URSHLv1i64
URSHLv2i32
URSHLv4i16
URSHLv8i8
URSHRd
URSHRv2i32_shift
URSHRv4i16_shift
URSHRv8i8_shift
URSQRTEv2i32
URSRAd
URSRAv2i32_shift
URSRAv4i16_shift
URSRAv8i8_shift
USDOTlanev8i8
USDOTv8i8
USHLv1i64
USHLv2i32
USHLv4i16
USHLv8i8
USHRd
USHRv2i32_shift
USHRv4i16_shift
USHRv8i8_shift
USQADDv1i64
USQADDv2i32
USQADDv4i16
USQADDv8i8
USRAd
USRAv2i32_shift
USRAv4i16_shift
USRAv8i8_shift
UZP1v2i32
UZP1v4i16
UZP1v8i8
UZP2v2i32
UZP2v4i16
UZP2v8i8
XTNv2i32
XTNv4i16
XTNv8i8
ZIP1v2i32
ZIP1v4i16
ZIP1v8i8
ZIP2v2i32
ZIP2v4i16
ZIP2v8i8

Diff Detail

Event Timeline

dmgreen created this revision.May 1 2023, 2:25 PM
Herald added a project: Restricted Project. · View Herald TranscriptMay 1 2023, 2:25 PM
dmgreen requested review of this revision.May 1 2023, 2:25 PM
Herald added a project: Restricted Project. · View Herald TranscriptMay 1 2023, 2:25 PM
SjoerdMeijer accepted this revision.May 2 2023, 12:27 AM

Looks like a good idea.

This revision is now accepted and ready to land.May 2 2023, 12:27 AM
georges added a subscriber: georges.May 2 2023, 2:49 AM
dewen added a subscriber: dewen.Aug 22 2023, 2:01 AM