diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -88,6 +88,10 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_ds_64bit_4byte_aligned : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_mubuf_addr64 : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -178,6 +178,8 @@ InstructionSelector::ComplexRendererFns selectDS1Addr1Offset(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectDS64Bit4ByteAligned(MachineOperand &Root) const; std::pair getPtrBaseWithConstantOffset(Register Root, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2338,6 +2338,50 @@ }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const { + const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); + if (!RootDef) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(1); } + }}; + } + + int64_t ConstAddr = 0; + Register PtrBase; + int64_t Offset; + + std::tie(PtrBase, Offset) = + getPtrBaseWithConstantOffset(Root.getReg(), *MRI); + + if (Offset) { + int64_t DWordOffset0 = Offset / 4; + int64_t DWordOffset1 = DWordOffset0 + 1; + if (isDSOffsetLegal(PtrBase, DWordOffset1, 8)) { + // (add n0, c0) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset0); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset1); } + }}; + } + } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { + // TODO + + } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { + // TODO + + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(1); } + }}; +} + /// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return /// the base value with the constant offset. There may be intervening copies /// between \p Root and the identified constant. Returns \p Root, 0 if this does diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -737,31 +737,35 @@ def : DSWritePat ; } - -class DS64Bit4ByteAlignedReadPat : GCNPat < - (v2i32 (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), +class DS64Bit4ByteAlignedReadPat : GCNPat < + (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), (inst $ptr, $offset0, $offset1, (i1 0)) >; -class DS64Bit4ByteAlignedWritePat : GCNPat< - (frag v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), - (inst $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), - (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1, +class DS64Bit4ByteAlignedWritePat : GCNPat< + (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), + (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1, (i1 0)) >; -// v2i32 loads are split into i32 loads on SI during lowering, due to a bug -// related to bounds checking. -let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in { -def : DS64Bit4ByteAlignedReadPat; -def : DS64Bit4ByteAlignedWritePat; -} +multiclass DS64Bit4ByteAlignedPat_mc { + let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in { + def : DS64Bit4ByteAlignedReadPat; + def : DS64Bit4ByteAlignedWritePat; + } -let OtherPredicates = [NotLDSRequiresM0Init] in { -def : DS64Bit4ByteAlignedReadPat; -def : DS64Bit4ByteAlignedWritePat; + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DS64Bit4ByteAlignedReadPat; + def : DS64Bit4ByteAlignedWritePat; + } } +// v2i32 loads are split into i32 loads on SI during lowering, due to a bug +// related to bounds checking. +foreach vt = VReg_64.RegTypes in { +defm : DS64Bit4ByteAlignedPat_mc; +} let AddedComplexity = 100 in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -28,12 +28,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -68,12 +62,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_2 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: load_local_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -112,12 +100,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_1 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -152,12 +134,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] - ; GFX7-DS128-LABEL: name: load_local_v2s32 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v2s32 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -188,21 +164,15 @@ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_local_v2s32_align4 ; GFX7: liveins: $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX7-DS128-LABEL: name: load_local_v2s32_align4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -232,12 +202,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] - ; GFX7-DS128-LABEL: name: load_local_s64 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_s64 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -268,21 +232,15 @@ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4 ; GFX7: liveins: $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX7-DS128-LABEL: name: load_local_s64_align4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4 ; GFX9: liveins: $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -312,12 +270,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] - ; GFX7-DS128-LABEL: name: load_local_p3_from_4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -352,12 +304,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] - ; GFX7-DS128-LABEL: name: load_local_p5_from_4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -392,12 +338,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] - ; GFX7-DS128-LABEL: name: load_local_p1_align8 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -428,21 +368,15 @@ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_local_p1_align4 ; GFX7: liveins: $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) - ; GFX7-DS128-LABEL: name: load_local_p1_align4 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_p1_align4 ; GFX9: liveins: $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -472,12 +406,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) - ; GFX7-DS128-LABEL: name: load_local_p999_from_8 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_local_p999_from_8 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 @@ -512,12 +440,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX7-DS128-LABEL: name: load_local_v2p3 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_local_v2p3 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 @@ -552,12 +474,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] - ; GFX7-DS128-LABEL: name: load_local_v2s16 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -592,12 +508,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] - ; GFX7-DS128-LABEL: name: load_local_v4s16 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v4s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -659,12 +569,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65535 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -750,14 +654,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65536 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -800,14 +696,6 @@ ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] - ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_m1 - ; GFX7-DS128: liveins: $vgpr0 - ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX7-DS128: $m0 = S_MOV_B32 -1 - ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -822,3 +710,83 @@ $vgpr0 = COPY %3 ... + +--- + +name: load_local_s64_align4_from_1_gep_1016 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1016 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 1016 + %2:vgpr(p3) = G_PTR_ADD %0, %1 + %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %3 + +... + +--- + +name: load_local_s64_align4_from_1_gep_1020 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1020 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 1020 + %2:vgpr(p3) = G_PTR_ADD %0, %1 + %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %3 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -284,15 +284,19 @@ ; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) ; GFX7-LABEL: name: store_local_s64_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store 8, align 4, addrspace 3) @@ -322,15 +326,19 @@ ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) ; GFX7-LABEL: name: store_local_p1_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) ; GFX9-LABEL: name: store_local_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store 8, align 4, addrspace 3) @@ -360,15 +368,19 @@ ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) ; GFX7-LABEL: name: store_local_v2s32_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store 8, align 4, addrspace 3) @@ -398,15 +410,19 @@ ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) ; GFX7-LABEL: name: store_local_v4s16_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store 8, align 4, addrspace 3) @@ -564,3 +580,99 @@ G_STORE %0, %1 :: (store 8, align 8, addrspace 3) ... + +--- + +name: store_local_s64_align4_from_1_gep_1016 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store 8, align 4, addrspace 3) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + %2:vgpr(s32) = G_CONSTANT i32 1016 + %3:vgpr(p3) = G_PTR_ADD %1, %2 + G_STORE %0, %3 :: (store 8, align 4, addrspace 3) + +... + +--- + +name: store_local_s64_align4_from_1_gep_1020 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3) + ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec + ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + %2:vgpr(s32) = G_CONSTANT i32 1020 + %3:vgpr(p3) = G_PTR_ADD %1, %2 + G_STORE %0, %3 :: (store 8, align 4, addrspace 3) + +...