Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1259,17 +1259,17 @@ static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const MachineInstr &MI, Register Reg) { + const MachineInstr &FirstMI, Register Reg) { LaneBitmask Mask; - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || MO.getReg() != Reg) - continue; + SmallVector, 8> Ops; + (void)AnalyzeVirtRegInBundle(const_cast(FirstMI), Reg, &Ops); + for (auto [MI, OpIdx] : Ops) { + const MachineOperand &MO = MI->getOperand(OpIdx); + assert(MO.isReg() && MO.getReg() == Reg); unsigned SubReg = MO.getSubReg(); - if (SubReg == 0 && MO.isUse()) { - Mask |= MRI.getMaxLaneMaskForVReg(Reg); - continue; - } + if (SubReg == 0 && MO.isUse() && !MO.isUndef()) + return MRI.getMaxLaneMaskForVReg(Reg); LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); if (MO.isDef()) { @@ -1287,8 +1287,9 @@ static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use) { - // Early check the common case. - if (MI->isCopy() && + // Early check the common case. Beware of the semi-formed bundles SplitKit + // creates by setting the bundle flag on copies without a matching BUNDLE. + if (MI->isCopy() && !MI->isBundled() && MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg()) return false; @@ -1303,7 +1304,7 @@ // If the live lanes aren't different from the lanes used by the instruction, // this doesn't help. - return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any(); + return (LiveAtMask & ReadMask) != LiveAtMask; } /// tryInstructionSplit - Split a live range around individual instructions. Index: llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir +++ llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir @@ -21,15 +21,12 @@ ; CHECK-LABEL: name: split_instruction_subranges ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %8.sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit %9.sub1 - ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %11.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 - ; CHECK-NEXT: S_NOP 0, implicit %11.sub0 - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit %7.sub1 + ; CHECK-NEXT: undef %6.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit %8.sub0 + ; CHECK-NEXT: S_NOP 0, implicit %6.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) @@ -62,23 +59,23 @@ ; CHECK-LABEL: name: split_instruction_subranges_use_is_subreg_def ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: undef %16.sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0 + ; CHECK-NEXT: SI_SPILL_V64_SAVE %16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %10.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 + ; CHECK-NEXT: SI_SPILL_V64_SAVE %10, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %13.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 - ; CHECK-NEXT: S_NOP 0, implicit-def %13.sub1 - ; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %13.sub0 + ; CHECK-NEXT: undef %14.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 + ; CHECK-NEXT: S_NOP 0, implicit-def %14.sub1 + ; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %14.sub0 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit-def %7.sub0 - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %7.sub1 + ; CHECK-NEXT: undef %8.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit-def %8.sub0 + ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %8.sub1 ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: undef %14.sub0:vreg_64 = COPY %15.sub0 - ; CHECK-NEXT: S_NOP 0, implicit %14.sub0 - ; CHECK-NEXT: undef %8.sub1:vreg_64 = COPY %9.sub1 - ; CHECK-NEXT: S_NOP 0, implicit %8.sub1 + ; CHECK-NEXT: S_NOP 0, implicit %15.sub0 + ; CHECK-NEXT: S_NOP 0, implicit %9.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) Index: llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -30,28 +30,49 @@ ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK-NEXT: } - ; CHECK-NEXT: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: undef %54.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: undef %61.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: undef %68.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: undef %75.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: undef %82.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: undef %89.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: undef %94.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: undef %99.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef %104.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef %139.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: undef %185.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef %166.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: undef %118.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: undef %123.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef %48.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec + ; CHECK-NEXT: undef %47.sub2:vreg_128 = COPY %48.sub2 + ; CHECK-NEXT: undef %56.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK-NEXT: undef %55.sub2:vreg_128 = COPY %56.sub2 + ; CHECK-NEXT: undef %64.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK-NEXT: undef %63.sub2:vreg_128 = COPY %64.sub2 + ; CHECK-NEXT: undef %72.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK-NEXT: undef %71.sub2:vreg_128 = COPY %72.sub2 + ; CHECK-NEXT: undef %80.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK-NEXT: undef %79.sub2:vreg_128 = COPY %80.sub2 + ; CHECK-NEXT: undef %88.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK-NEXT: undef %87.sub2:vreg_128 = COPY %88.sub2 + ; CHECK-NEXT: undef %96.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK-NEXT: undef %95.sub2:vreg_128 = COPY %96.sub2 + ; CHECK-NEXT: undef %102.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK-NEXT: undef %101.sub2:vreg_128 = COPY %102.sub2 + ; CHECK-NEXT: undef %108.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK-NEXT: undef %107.sub2:vreg_128 = COPY %108.sub2 + ; CHECK-NEXT: undef %114.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK-NEXT: undef %113.sub2:vreg_128 = COPY %114.sub2 + ; CHECK-NEXT: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK-NEXT: undef %153.sub2:vreg_128 = COPY %154.sub2 + ; CHECK-NEXT: undef %200.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK-NEXT: undef %199.sub2:vreg_128 = COPY %200.sub2 + ; CHECK-NEXT: undef %188.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK-NEXT: undef %187.sub2:vreg_128 = COPY %188.sub2 + ; CHECK-NEXT: undef %124.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: undef %123.sub2:vreg_128 = COPY %124.sub2 + ; CHECK-NEXT: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK-NEXT: undef %129.sub2:vreg_128 = COPY %130.sub2 + ; CHECK-NEXT: undef %136.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef %135.sub2:vreg_128 = COPY %136.sub2 ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %128.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %133.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK-NEXT: undef %142.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK-NEXT: undef %141.sub2:vreg_128 = COPY %142.sub2 + ; CHECK-NEXT: undef %148.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef %147.sub2:vreg_128 = COPY %148.sub2 + ; CHECK-NEXT: undef %162.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK-NEXT: undef %161.sub2:vreg_128 = COPY %162.sub2 + ; CHECK-NEXT: undef %168.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK-NEXT: undef %167.sub2:vreg_128 = COPY %168.sub2 ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, implicit $exec :: (load (s128), addrspace 1) + ; CHECK-NEXT: undef %195.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) ; CHECK-NEXT: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec @@ -62,141 +83,137 @@ ; CHECK-NEXT: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK-NEXT: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; CHECK-NEXT: undef %48.sub2:vreg_128 = COPY %47.sub2 - ; CHECK-NEXT: %48.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: undef %50.sub0:vreg_128 = COPY %48.sub0 { - ; CHECK-NEXT: internal %50.sub2:vreg_128 = COPY %48.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %50, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %55.sub2:vreg_128 = COPY %54.sub2 - ; CHECK-NEXT: %55.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: undef %57.sub0:vreg_128 = COPY %55.sub0 { - ; CHECK-NEXT: internal %57.sub2:vreg_128 = COPY %55.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %57, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %62.sub2:vreg_128 = COPY %61.sub2 - ; CHECK-NEXT: %62.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: undef %64.sub0:vreg_128 = COPY %62.sub0 { - ; CHECK-NEXT: internal %64.sub2:vreg_128 = COPY %62.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %64, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %69.sub2:vreg_128 = COPY %68.sub2 - ; CHECK-NEXT: %69.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: undef %71.sub0:vreg_128 = COPY %69.sub0 { - ; CHECK-NEXT: internal %71.sub2:vreg_128 = COPY %69.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %71, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %76.sub2:vreg_128 = COPY %75.sub2 - ; CHECK-NEXT: %76.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: undef %78.sub0:vreg_128 = COPY %76.sub0 { - ; CHECK-NEXT: internal %78.sub2:vreg_128 = COPY %76.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %78, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %83.sub2:vreg_128 = COPY %82.sub2 - ; CHECK-NEXT: %83.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: undef %85.sub0:vreg_128 = COPY %83.sub0 { - ; CHECK-NEXT: internal %85.sub2:vreg_128 = COPY %83.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %85, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %90.sub2:vreg_128 = COPY %89.sub2 - ; CHECK-NEXT: %90.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: undef %140.sub0:vreg_128 = COPY %90.sub0 { - ; CHECK-NEXT: internal %140.sub2:vreg_128 = COPY %90.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %140, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %95.sub2:vreg_128 = COPY %94.sub2 - ; CHECK-NEXT: %95.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: undef %107.sub0:vreg_128 = COPY %95.sub0 { - ; CHECK-NEXT: internal %107.sub2:vreg_128 = COPY %95.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %107, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %100.sub2:vreg_128 = COPY %99.sub2 - ; CHECK-NEXT: %100.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef %101.sub0:vreg_128 = COPY %100.sub0 { - ; CHECK-NEXT: internal %101.sub2:vreg_128 = COPY %100.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %105.sub2:vreg_128 = COPY %104.sub2 - ; CHECK-NEXT: %105.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef %106.sub0:vreg_128 = COPY %105.sub0 { - ; CHECK-NEXT: internal %106.sub2:vreg_128 = COPY %105.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %139.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: undef %158.sub0:vreg_128 = COPY %139.sub0 { - ; CHECK-NEXT: internal %158.sub2:vreg_128 = COPY %139.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %158, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %186.sub2:vreg_128 = COPY %185.sub2 - ; CHECK-NEXT: %186.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef %188.sub0:vreg_128 = COPY %186.sub0 { - ; CHECK-NEXT: internal %188.sub2:vreg_128 = COPY %186.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %188, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %167.sub2:vreg_128 = COPY %166.sub2 - ; CHECK-NEXT: %167.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: undef %169.sub0:vreg_128 = COPY %167.sub0 { - ; CHECK-NEXT: internal %169.sub2:vreg_128 = COPY %167.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %169, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %114.sub2:vreg_128 = COPY %113.sub2 - ; CHECK-NEXT: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: undef %115.sub0:vreg_128 = COPY %114.sub0 { - ; CHECK-NEXT: internal %115.sub2:vreg_128 = COPY %114.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %119.sub2:vreg_128 = COPY %118.sub2 - ; CHECK-NEXT: %119.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: undef %181.sub0:vreg_128 = COPY %119.sub0 { - ; CHECK-NEXT: internal %181.sub2:vreg_128 = COPY %119.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE %181, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %124.sub2:vreg_128 = COPY %123.sub2 - ; CHECK-NEXT: %124.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK-NEXT: undef %125.sub0:vreg_128 = COPY %124.sub0 { - ; CHECK-NEXT: internal %125.sub2:vreg_128 = COPY %124.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %129.sub2:vreg_128 = COPY %128.sub2 - ; CHECK-NEXT: %129.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %130.sub0:vreg_128 = COPY %129.sub0 { - ; CHECK-NEXT: internal %130.sub2:vreg_128 = COPY %129.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %134.sub2:vreg_128 = COPY %133.sub2 - ; CHECK-NEXT: %134.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef %135.sub0:vreg_128 = COPY %134.sub0 { - ; CHECK-NEXT: internal %135.sub2:vreg_128 = COPY %134.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %145.sub2:vreg_128 = COPY %144.sub2 - ; CHECK-NEXT: %145.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef %146.sub0:vreg_128 = COPY %145.sub0 { - ; CHECK-NEXT: internal %146.sub2:vreg_128 = COPY %145.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %150.sub2:vreg_128 = COPY %149.sub2 - ; CHECK-NEXT: %150.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: undef %151.sub0:vreg_128 = COPY %150.sub0 { - ; CHECK-NEXT: internal %151.sub2:vreg_128 = COPY %150.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %157.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %155.sub2:vreg_128 = COPY %157.sub2 - ; CHECK-NEXT: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %156.sub0:vreg_128 = COPY %155.sub0 { - ; CHECK-NEXT: internal %156.sub2:vreg_128 = COPY %155.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %165.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %163.sub2:vreg_128 = COPY %165.sub2 - ; CHECK-NEXT: %163.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK-NEXT: undef %49.sub2:vreg_128 = COPY %47.sub2 + ; CHECK-NEXT: %49.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec + ; CHECK-NEXT: undef %51.sub0:vreg_128 = COPY %49.sub0 { + ; CHECK-NEXT: internal %51.sub2:vreg_128 = COPY %49.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %51, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %57.sub2:vreg_128 = COPY %55.sub2 + ; CHECK-NEXT: %57.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK-NEXT: undef %59.sub0:vreg_128 = COPY %57.sub0 { + ; CHECK-NEXT: internal %59.sub2:vreg_128 = COPY %57.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %59, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: undef %65.sub2:vreg_128 = COPY %63.sub2 + ; CHECK-NEXT: %65.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK-NEXT: undef %67.sub0:vreg_128 = COPY %65.sub0 { + ; CHECK-NEXT: internal %67.sub2:vreg_128 = COPY %65.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %67, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: undef %73.sub2:vreg_128 = COPY %71.sub2 + ; CHECK-NEXT: %73.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK-NEXT: undef %75.sub0:vreg_128 = COPY %73.sub0 { + ; CHECK-NEXT: internal %75.sub2:vreg_128 = COPY %73.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %75, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: undef %81.sub2:vreg_128 = COPY %79.sub2 + ; CHECK-NEXT: %81.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK-NEXT: undef %83.sub0:vreg_128 = COPY %81.sub0 { + ; CHECK-NEXT: internal %83.sub2:vreg_128 = COPY %81.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %83, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: undef %89.sub2:vreg_128 = COPY %87.sub2 + ; CHECK-NEXT: %89.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK-NEXT: undef %91.sub0:vreg_128 = COPY %89.sub0 { + ; CHECK-NEXT: internal %91.sub2:vreg_128 = COPY %89.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %91, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: undef %97.sub2:vreg_128 = COPY %95.sub2 + ; CHECK-NEXT: %97.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK-NEXT: undef %157.sub0:vreg_128 = COPY %97.sub0 { + ; CHECK-NEXT: internal %157.sub2:vreg_128 = COPY %97.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %157, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) + ; CHECK-NEXT: undef %103.sub2:vreg_128 = COPY %101.sub2 + ; CHECK-NEXT: %103.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK-NEXT: undef %117.sub0:vreg_128 = COPY %103.sub0 { + ; CHECK-NEXT: internal %117.sub2:vreg_128 = COPY %103.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %117, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: undef %109.sub2:vreg_128 = COPY %107.sub2 + ; CHECK-NEXT: %109.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY %109.sub0 { + ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY %109.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %115.sub2:vreg_128 = COPY %113.sub2 + ; CHECK-NEXT: %115.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK-NEXT: undef %116.sub0:vreg_128 = COPY %115.sub0 { + ; CHECK-NEXT: internal %116.sub2:vreg_128 = COPY %115.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %155.sub2:vreg_128 = COPY %153.sub2 + ; CHECK-NEXT: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK-NEXT: undef %191.sub0:vreg_128 = COPY %155.sub0 { + ; CHECK-NEXT: internal %191.sub2:vreg_128 = COPY %155.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %191, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef %201.sub2:vreg_128 = COPY %199.sub2 + ; CHECK-NEXT: %201.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK-NEXT: undef %203.sub0:vreg_128 = COPY %201.sub0 { + ; CHECK-NEXT: internal %203.sub2:vreg_128 = COPY %201.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %203, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef %189.sub2:vreg_128 = COPY %187.sub2 + ; CHECK-NEXT: %189.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK-NEXT: undef %190.sub0:vreg_128 = COPY %189.sub0 { + ; CHECK-NEXT: internal %190.sub2:vreg_128 = COPY %189.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %125.sub2:vreg_128 = COPY %123.sub2 + ; CHECK-NEXT: %125.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: undef %126.sub0:vreg_128 = COPY %125.sub0 { + ; CHECK-NEXT: internal %126.sub2:vreg_128 = COPY %125.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %131.sub2:vreg_128 = COPY %129.sub2 + ; CHECK-NEXT: %131.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK-NEXT: undef %177.sub0:vreg_128 = COPY %131.sub0 { + ; CHECK-NEXT: internal %177.sub2:vreg_128 = COPY %131.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %177, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef %137.sub2:vreg_128 = COPY %135.sub2 + ; CHECK-NEXT: %137.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef %138.sub0:vreg_128 = COPY %137.sub0 { + ; CHECK-NEXT: internal %138.sub2:vreg_128 = COPY %137.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %143.sub2:vreg_128 = COPY %141.sub2 + ; CHECK-NEXT: %143.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK-NEXT: undef %144.sub0:vreg_128 = COPY %143.sub0 { + ; CHECK-NEXT: internal %144.sub2:vreg_128 = COPY %143.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %149.sub2:vreg_128 = COPY %147.sub2 + ; CHECK-NEXT: %149.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef %150.sub0:vreg_128 = COPY %149.sub0 { + ; CHECK-NEXT: internal %150.sub2:vreg_128 = COPY %149.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %163.sub2:vreg_128 = COPY %161.sub2 + ; CHECK-NEXT: %163.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec ; CHECK-NEXT: undef %164.sub0:vreg_128 = COPY %163.sub0 { ; CHECK-NEXT: internal %164.sub2:vreg_128 = COPY %163.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %176.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %174.sub2:vreg_128 = COPY %176.sub2 - ; CHECK-NEXT: %174.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %175.sub0:vreg_128 = COPY %174.sub0 { - ; CHECK-NEXT: internal %175.sub2:vreg_128 = COPY %174.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef %195.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: undef %180.sub2:vreg_128 = COPY %195.sub2 - ; CHECK-NEXT: %180.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: undef %194.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec - ; CHECK-NEXT: undef %193.sub2:vreg_128 = COPY %194.sub2 - ; CHECK-NEXT: %193.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK-NEXT: undef %169.sub2:vreg_128 = COPY %167.sub2 + ; CHECK-NEXT: %169.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK-NEXT: undef %170.sub0:vreg_128 = COPY %169.sub0 { + ; CHECK-NEXT: internal %170.sub2:vreg_128 = COPY %169.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %179.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK-NEXT: undef %175.sub2:vreg_128 = COPY %179.sub2 + ; CHECK-NEXT: %175.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK-NEXT: undef %176.sub0:vreg_128 = COPY %175.sub0 { + ; CHECK-NEXT: internal %176.sub2:vreg_128 = COPY %175.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %186.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK-NEXT: undef %184.sub2:vreg_128 = COPY %186.sub2 + ; CHECK-NEXT: %184.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK-NEXT: undef %185.sub0:vreg_128 = COPY %184.sub0 { + ; CHECK-NEXT: internal %185.sub2:vreg_128 = COPY %184.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %196.sub2:vreg_128 = COPY %195.sub2 + ; CHECK-NEXT: %196.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK-NEXT: undef %215.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK-NEXT: undef %208.sub2:vreg_128 = COPY %215.sub2 + ; CHECK-NEXT: %208.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK-NEXT: undef %218.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK-NEXT: undef %212.sub2:vreg_128 = COPY %218.sub2 + ; CHECK-NEXT: %212.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK-NEXT: %38.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -225,164 +242,163 @@ ; CHECK-NEXT: %36.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %36.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %191.sub0:vreg_128 = COPY %193.sub0 { - ; CHECK-NEXT: internal %191.sub2:vreg_128 = COPY %193.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %191.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %191.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %191, %2, 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %178.sub0:vreg_128 = COPY %180.sub0 { - ; CHECK-NEXT: internal %178.sub2:vreg_128 = COPY %180.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %178.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %178.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %178, %2, 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %172.sub0:vreg_128 = COPY %175.sub0 { - ; CHECK-NEXT: internal %172.sub2:vreg_128 = COPY %175.sub2 + ; CHECK-NEXT: undef %210.sub0:vreg_128 = COPY %212.sub0 { + ; CHECK-NEXT: internal %210.sub2:vreg_128 = COPY %212.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %210.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %210.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %210, %2, 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %206.sub0:vreg_128 = COPY %208.sub0 { + ; CHECK-NEXT: internal %206.sub2:vreg_128 = COPY %208.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %206.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %206.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %206, %2, 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef %194.sub0:vreg_128 = COPY %196.sub0 { + ; CHECK-NEXT: internal %194.sub2:vreg_128 = COPY %196.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %194.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %194.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %194, %2, 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %181.sub0:vreg_128 = COPY %185.sub0 { + ; CHECK-NEXT: internal %181.sub2:vreg_128 = COPY %185.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %181.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %181.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %181, %2, 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: undef %172.sub0:vreg_128 = COPY %176.sub0 { + ; CHECK-NEXT: internal %172.sub2:vreg_128 = COPY %176.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %172.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %172.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %172, %2, 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %161.sub0:vreg_128 = COPY %164.sub0 { - ; CHECK-NEXT: internal %161.sub2:vreg_128 = COPY %164.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %161.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %161.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %161, %2, 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %153.sub0:vreg_128 = COPY %156.sub0 { - ; CHECK-NEXT: internal %153.sub2:vreg_128 = COPY %156.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %153.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %153.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %148.sub0:vreg_128 = COPY %151.sub0 { - ; CHECK-NEXT: internal %148.sub2:vreg_128 = COPY %151.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %148.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %148.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %143.sub0:vreg_128 = COPY %146.sub0 { - ; CHECK-NEXT: internal %143.sub2:vreg_128 = COPY %146.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %143.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %143.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %132.sub0:vreg_128 = COPY %135.sub0 { - ; CHECK-NEXT: internal %132.sub2:vreg_128 = COPY %135.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %132.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %132.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %132, %2, 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - ; CHECK-NEXT: undef %127.sub0:vreg_128 = COPY %130.sub0 { - ; CHECK-NEXT: internal %127.sub2:vreg_128 = COPY %130.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %127.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %127.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %127, %2, 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %125.sub0 { - ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %125.sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %172, %2, 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %166.sub0:vreg_128 = COPY %170.sub0 { + ; CHECK-NEXT: internal %166.sub2:vreg_128 = COPY %170.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %166.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %166.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %166, %2, 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef %160.sub0:vreg_128 = COPY %164.sub0 { + ; CHECK-NEXT: internal %160.sub2:vreg_128 = COPY %164.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %160.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %160.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %160, %2, 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %146.sub0:vreg_128 = COPY %150.sub0 { + ; CHECK-NEXT: internal %146.sub2:vreg_128 = COPY %150.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %146.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %146.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %146, %2, 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) + ; CHECK-NEXT: undef %140.sub0:vreg_128 = COPY %144.sub0 { + ; CHECK-NEXT: internal %140.sub2:vreg_128 = COPY %144.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %140.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %140.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %140, %2, 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %134.sub0:vreg_128 = COPY %138.sub0 { + ; CHECK-NEXT: internal %134.sub2:vreg_128 = COPY %138.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %134.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %134.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef %128.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { + ; CHECK-NEXT: internal %128.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %128.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %128.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %128, %2, 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %126.sub0 { + ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %126.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %122.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %122.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %117.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { - ; CHECK-NEXT: internal %117.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %117.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %117.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %117, %2, 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %112.sub0:vreg_128 = COPY %115.sub0 { - ; CHECK-NEXT: internal %112.sub2:vreg_128 = COPY %115.sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: undef %120.sub0:vreg_128 = COPY %190.sub0 { + ; CHECK-NEXT: internal %120.sub2:vreg_128 = COPY %190.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %120.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %120.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %120, %2, 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef %198.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { + ; CHECK-NEXT: internal %198.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %198.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %198.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %198, %2, 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef %152.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { + ; CHECK-NEXT: internal %152.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %152.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %152.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %152, %2, 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %112.sub0:vreg_128 = COPY %116.sub0 { + ; CHECK-NEXT: internal %112.sub2:vreg_128 = COPY %116.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %112.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %112.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { - ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %110.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %110.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %110, %2, 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %184.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { - ; CHECK-NEXT: internal %184.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %184.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %184.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %184, %2, 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %137.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { - ; CHECK-NEXT: internal %137.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %137.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %137.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %137, %2, 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %103.sub0:vreg_128 = COPY %106.sub0 { - ; CHECK-NEXT: internal %103.sub2:vreg_128 = COPY %106.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %103.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %103.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %103, %2, 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %98.sub0:vreg_128 = COPY %101.sub0 { - ; CHECK-NEXT: internal %98.sub2:vreg_128 = COPY %101.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %98.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %98.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %98, %2, 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %93.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { - ; CHECK-NEXT: internal %93.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %93.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %93.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %93, %2, 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %88.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { - ; CHECK-NEXT: internal %88.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %88.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %88.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %88, %2, 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { - ; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %81.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %81.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %81, %2, 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %74.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { - ; CHECK-NEXT: internal %74.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %74.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %74.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %74, %2, 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %67.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { - ; CHECK-NEXT: internal %67.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %67.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %67.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %67, %2, 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %60.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { - ; CHECK-NEXT: internal %60.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %60.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %60.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %60, %2, 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %53.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { - ; CHECK-NEXT: internal %53.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %53.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %53.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %53, %2, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { - ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) + ; CHECK-NEXT: undef %106.sub0:vreg_128 = COPY %110.sub0 { + ; CHECK-NEXT: internal %106.sub2:vreg_128 = COPY %110.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %106.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %106.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %106, %2, 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: undef %100.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { + ; CHECK-NEXT: internal %100.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %100.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %100.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %100, %2, 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) + ; CHECK-NEXT: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { + ; CHECK-NEXT: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %94.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %94.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: undef %86.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { + ; CHECK-NEXT: internal %86.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %86.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %86.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %86, %2, 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: undef %78.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { + ; CHECK-NEXT: internal %78.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %78.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %78.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %78, %2, 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { + ; CHECK-NEXT: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %70.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %70.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: undef %62.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { + ; CHECK-NEXT: internal %62.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %62.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %62.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %62, %2, 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: undef %54.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { + ; CHECK-NEXT: internal %54.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %54.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %54.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %54, %2, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { + ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %46.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %46.sub3:vreg_128 = COPY %43.sub1 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1556,9 +1556,9 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -1568,21 +1568,42 @@ ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vslidedown.vi v0, v16, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwadd.vv v0, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vwadd.vv v0, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwadd.vv v0, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1594,7 +1615,7 @@ ; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1604,9 +1625,9 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -1616,21 +1637,42 @@ ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v0, v16, 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v24, v16, 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwadd.vv v0, v24, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vwadd.vv v0, v24, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwadd.vv v0, v8, v16 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1639,7 +1681,7 @@ ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add sp, sp, a1 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1655,9 +1697,9 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -1667,21 +1709,42 @@ ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vslidedown.vi v0, v16, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwaddu.vv v0, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vwaddu.vv v0, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwaddu.vv v0, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1693,7 +1756,7 @@ ; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1703,9 +1766,9 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -1715,21 +1778,42 @@ ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v0, v16, 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v24, v16, 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwaddu.vv v0, v24, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vwaddu.vv v0, v24, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwaddu.vv v0, v8, v16 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1738,7 +1822,7 @@ ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add sp, sp, a1 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret Index: llvm/test/CodeGen/Thumb2/mve-vst3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -195,46 +195,45 @@ ; CHECK-NEXT: vmov.f32 s19, s10 ; CHECK-NEXT: vmov.f32 s13, s8 ; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.f32 s18, s6 ; CHECK-NEXT: vmov.f64 d14, d4 +; CHECK-NEXT: vmov.f32 s15, s5 +; CHECK-NEXT: vmov.f32 s5, s27 +; CHECK-NEXT: vmov.f32 s8, s24 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: vmov.f32 s9, s0 +; CHECK-NEXT: vmov.f32 s24, s1 +; CHECK-NEXT: vmov.f32 s27, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vmov r0, r3, d14 ; CHECK-NEXT: vldrw.u32 q7, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s7, s11 -; CHECK-NEXT: vmov.f32 s8, s24 -; CHECK-NEXT: vmov.f32 s9, s0 +; CHECK-NEXT: vstrw.32 q0, [r1, #128] ; CHECK-NEXT: vmov.f32 s11, s25 +; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vmov.32 q6[1], r3 ; CHECK-NEXT: vmov.f32 s12, s4 +; CHECK-NEXT: vstrw.32 q6, [r1, #64] ; CHECK-NEXT: vmov.f32 s4, s10 ; CHECK-NEXT: vmov.32 q2[2], r0 ; CHECK-NEXT: vmov r0, lr, d14 ; CHECK-NEXT: vldrw.u32 q7, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s6 -; CHECK-NEXT: vmov.32 q5[2], r0 -; CHECK-NEXT: vmov.f64 d12, d14 -; CHECK-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-NEXT: vstrw.32 q5, [r1, #144] -; CHECK-NEXT: vmov.f32 s15, s5 -; CHECK-NEXT: vmov.f32 s5, s27 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: vmov.f32 s24, s1 -; CHECK-NEXT: vstrw.32 q1, [r1, #80] -; CHECK-NEXT: vmov.f32 s27, s2 -; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov r2, r4, d14 -; CHECK-NEXT: vmov.32 q6[1], r3 -; CHECK-NEXT: vstrw.32 q0, [r1, #128] -; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: vmov.32 q4[1], r4 ; CHECK-NEXT: vmov.32 q0[1], lr -; CHECK-NEXT: vstrw.32 q6, [r1, #64] +; CHECK-NEXT: vmov.32 q5[2], r0 ; CHECK-NEXT: vstrw.32 q0, [r1, #160] ; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q3, [r1, #96] -; CHECK-NEXT: vstrw.32 q4, [r1, #112] +; CHECK-NEXT: vmov r2, r4, d14 +; CHECK-NEXT: vstrw.32 q2, [r1, #48] ; CHECK-NEXT: vstrw.32 q0, [r1, #176] ; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.32 q4[1], r4 ; CHECK-NEXT: vmov.32 q0[2], r12 +; CHECK-NEXT: vstrw.32 q1, [r1, #80] +; CHECK-NEXT: vstrw.32 q3, [r1, #96] +; CHECK-NEXT: vstrw.32 q4, [r1, #112] +; CHECK-NEXT: vstrw.32 q5, [r1, #144] ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: add sp, #160 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}