Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2886,13 +2886,14 @@ MachineMemOperand &MMO = LoadMI.getMMO(); LLT MemTy = MMO.getMemoryType(); MachineFunction &MF = MIRBuilder.getMF(); - if (MemTy.isVector()) - return UnableToLegalize; unsigned MemSizeInBits = MemTy.getSizeInBits(); unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); if (MemSizeInBits != MemStoreSizeInBits) { + if (MemTy.isVector()) + return UnableToLegalize; + // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); @@ -2928,9 +2929,6 @@ return Legalized; } - if (DstTy.isVector()) - return UnableToLegalize; - // Big endian lowering not implemented. if (MIRBuilder.getDataLayout().isBigEndian()) return UnableToLegalize; @@ -2953,9 +2951,12 @@ uint64_t LargeSplitSize, SmallSplitSize; if (!isPowerOf2_32(MemSizeInBits)) { + // This load needs splitting into power of 2 sized loads. LargeSplitSize = PowerOf2Floor(MemSizeInBits); SmallSplitSize = MemSizeInBits - LargeSplitSize; } else { + // This is already a power of 2, but we still need to split this in half. + // // Assume we're being asked to decompose an unaligned load. // TODO: If this requires multiple splits, handle them all at once. auto &Ctx = MF.getFunction().getContext(); @@ -2965,6 +2966,16 @@ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; } + if (MemTy.isVector()) { + // TODO: Handle vector extloads + if (MemTy != DstTy) + return UnableToLegalize; + + // TODO: We can do better than scalarizing the vector and at least split it + // in half. + return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType()); + } + MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); MachineMemOperand *SmallMMO = Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -4152,21 +4152,84 @@ ; CI-LABEL: name: test_load_constant_v3s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -3898,21 +3898,84 @@ ; CI-LABEL: name: test_load_flat_v3s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -6602,39 +6602,165 @@ ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; SI-LABEL: name: test_load_global_v3s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) @@ -6681,9 +6807,33 @@ ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6747,9 +6897,24 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6846,9 +7011,33 @@ ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6964,9 +7153,24 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7789,64 +7993,218 @@ ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; VI: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -7910,64 +8268,218 @@ ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; VI: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -8035,16 +8547,52 @@ ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -8143,16 +8691,39 @@ ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -8302,16 +8873,52 @@ ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -8494,16 +9101,39 @@ ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>) - ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -9544,82 +10174,342 @@ ; SI-LABEL: name: test_load_global_v7s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; SI: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; VI: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -9640,82 +10530,342 @@ ; SI-LABEL: name: test_load_global_v7s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; SI: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; VI: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -9893,17 +11043,66 @@ ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) + ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -10220,17 +11419,49 @@ ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -10565,17 +11796,66 @@ ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) + ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) + ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1) + ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -10994,17 +12274,49 @@ ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) - ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>) - ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>) - ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -9660,9 +9660,24 @@ ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 2, addrspace 3) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -9999,9 +10014,24 @@ ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 1, addrspace 3) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0