Index: llvm/lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -385,6 +385,8 @@ bool hasOverlappingMultipleDef(const MachineInstr &MI, const MachineOperand &MODef, Register Def); + bool hasSameRegClass(Register Def, Register Src); + /// Candidates for deletion. SmallSetVector MaybeDeadCopies; @@ -422,6 +424,14 @@ } } +bool MachineCopyPropagation::hasSameRegClass(Register Def, Register Src) { + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(Def) && RC->contains(Src)) + return true; + } + return false; +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -539,6 +549,17 @@ /// so we have reduced the number of cross-class COPYs and potentially /// introduced a nop COPY that can be removed. + // We don't propagate for SubReg in cross-class COPYs. + // For example: (ecx is SubReg of rcx) + // renamable $rcx = COPY killed renamable $k0 + // renamable $k1 = COPY renamable $ecx + Register CopyDstReg = CopyOperands->Destination->getReg(); + Register UseSrcReg = UseICopyOperands->Source->getReg(); + if ((TRI->getRegSizeInBits(CopyDstReg, *MRI) != + TRI->getRegSizeInBits(UseSrcReg, *MRI)) && + !hasSameRegClass(CopyDstReg, CopySrcReg)) + return false; + // Allow forwarding if src and dst belong to any common class, so long as they // don't belong to any (possibly smaller) common class that requires copies to // go via a different class. @@ -560,7 +581,6 @@ return true; // The forwarded copy would be cross-class. Only do this if the original copy // was also cross-class. - Register CopyDstReg = CopyOperands->Destination->getReg(); for (const TargetRegisterClass *RC : TRI->regclasses()) { if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) && TRI->getCrossCopyRegClass(RC) != RC) @@ -1147,14 +1167,6 @@ if (CopySourceInvalid.count(Reload)) return; - auto CheckCopyConstraint = [this](Register Def, Register Src) { - for (const TargetRegisterClass *RC : TRI->regclasses()) { - if (RC->contains(Def) && RC->contains(Src)) - return true; - } - return false; - }; - auto UpdateReg = [](MachineInstr *MI, const MachineOperand *Old, const MachineOperand *New) { for (MachineOperand &MO : MI->operands()) { @@ -1171,10 +1183,10 @@ isCopyInstr(*RC[0], *TII, UseCopyInstr); std::optional OuterMostReloadCopy = isCopyInstr(*RC.back(), *TII, UseCopyInstr); - if (!CheckCopyConstraint(OuterMostSpillCopy->Source->getReg(), - InnerMostSpillCopy->Source->getReg()) || - !CheckCopyConstraint(InnerMostReloadCopy->Destination->getReg(), - OuterMostReloadCopy->Destination->getReg())) + if (!hasSameRegClass(OuterMostSpillCopy->Source->getReg(), + InnerMostSpillCopy->Source->getReg()) || + !hasSameRegClass(InnerMostReloadCopy->Destination->getReg(), + OuterMostReloadCopy->Destination->getReg())) return; SpillageChainsLength += SC.size() + RC.size(); Index: llvm/test/CodeGen/X86/machine-copy-subreg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/machine-copy-subreg.mir @@ -0,0 +1,207 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=machine-cp %s -o - | FileCheck %s +--- | + target triple = "x86_64-pc-windows-msvc19.29.30145" + + define void @foo(<256 x double> %0, ptr %1) #0 { + DIR.OMP.PARALLEL.LOOP.2: + %2 = fcmp olt <256 x double> %0, zeroinitializer + %3 = extractelement <256 x i1> %2, i64 64 + %4 = select i1 %3, double 0.000000e+00, double 1.000000e+00 + %5 = extractelement <256 x i1> %2, i64 65 + %6 = select i1 %5, double 0.000000e+00, double 1.000000e+00 + store double %4, ptr %1, align 16 + store double %6, ptr null, align 8 + ret void + } + + attributes #0 = { "target-cpu"="skylake-avx512" } + +... +--- +# CHECK-LABEL: name: foo +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: 256, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: 248, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: 240, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: 232, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, type: default, offset: 224, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, type: default, offset: 216, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, type: default, offset: 208, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, type: default, offset: 200, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, type: default, offset: 192, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, type: default, offset: 184, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, type: default, offset: 176, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, type: default, offset: 168, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, type: default, offset: 160, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, type: default, offset: 152, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, type: default, offset: 144, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, type: default, offset: 136, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, type: default, offset: 128, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, type: default, offset: 120, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, type: default, offset: 112, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 19, type: default, offset: 104, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 20, type: default, offset: 96, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 21, type: default, offset: 88, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 22, type: default, offset: 80, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 23, type: default, offset: 72, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 24, type: default, offset: 64, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 25, type: default, offset: 56, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 26, type: default, offset: 48, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 27, type: default, offset: 40, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 28, type: default, offset: 32, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: + - id: 0 + value: 'double 1.000000e+00' + alignment: 8 + isTargetSpecific: false +machineFunctionInfo: {} +body: | + bb.0.DIR.OMP.PARALLEL.LOOP.2: + ; CHECK: renamable $k2 = KUNPCKBWrr killed renamable $k3, killed renamable $k2 + ; CHECK-NEXT: renamable $k1 = KUNPCKWDrr killed renamable $k2, killed renamable $k1 + ; CHECK-NEXT: renamable $k0 = KUNPCKDQrr killed renamable $k1, killed renamable $k0 + ; CHECK-NEXT: renamable $rcx = COPY killed renamable $k0 + ; CHECK-NEXT: renamable $k1 = COPY renamable $ecx + + renamable $rax = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0, align 16) + renamable $rcx = MOV64rm %fixed-stack.17, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.17) + renamable $rdx = MOV64rm %fixed-stack.18, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.18, align 16) + renamable $r8 = MOV64rm %fixed-stack.19, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.19) + renamable $r9 = MOV64rm %fixed-stack.20, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.20, align 16) + renamable $r10 = MOV64rm %fixed-stack.21, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.21) + renamable $r11 = MOV64rm %fixed-stack.22, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.22, align 16) + renamable $rsi = MOV64rm %fixed-stack.23, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.23) + renamable $rdi = MOV64rm %fixed-stack.24, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.24, align 16) + renamable $zmm0 = AVX512_512_SET0 + renamable $k0 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $rdi, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k1 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $rsi, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k0 = KUNPCKBWrr killed renamable $k1, killed renamable $k0 + renamable $k1 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $r11, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k2 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $r10, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k1 = KUNPCKBWrr killed renamable $k2, killed renamable $k1 + renamable $k0 = KUNPCKWDrr killed renamable $k1, killed renamable $k0 + renamable $k1 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $r9, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k2 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $r8, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k1 = KUNPCKBWrr killed renamable $k2, killed renamable $k1 + renamable $k2 = nofpexcept VCMPPDZrmi renamable $zmm0, killed renamable $rdx, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k3 = nofpexcept VCMPPDZrmi killed renamable $zmm0, killed renamable $rcx, 1, $noreg, 0, $noreg, 14, implicit $mxcsr :: (load (s512)) + renamable $k2 = KUNPCKBWrr killed renamable $k3, killed renamable $k2 + renamable $k1 = KUNPCKWDrr killed renamable $k2, killed renamable $k1 + renamable $k0 = KUNPCKDQrr killed renamable $k1, killed renamable $k0 + renamable $rcx = COPY killed renamable $k0 + renamable $k1 = COPY renamable $ecx + renamable $ecx = KILL renamable $ecx, implicit killed $rcx + renamable $ecx = AND32ri8 killed renamable $ecx, 2, implicit-def dead $eflags + renamable $ecx = SHR32r1 killed renamable $ecx, implicit-def dead $eflags + renamable $xmm0 = AVX512_FsFLD0SD + renamable $xmm1 = VMOVSDZrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) + renamable $xmm2 = COPY renamable $xmm1 + renamable $xmm2 = VMOVSDZrrk killed renamable $xmm2, killed renamable $k1, undef renamable $xmm0, renamable $xmm0 + renamable $k1 = COPY killed renamable $ecx + renamable $xmm1 = VMOVSDZrrk killed renamable $xmm1, killed renamable $k1, undef renamable $xmm0, killed renamable $xmm0 + VMOVSDZmr killed renamable $rax, 1, $noreg, 0, $noreg, killed renamable $xmm2 :: (store (s64) into %ir.1, align 16) + VMOVSDZmr $noreg, 1, $noreg, 0, $noreg, killed renamable $xmm1 :: (store (s64) into `ptr null`) + RET 0 + +...