Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -479,6 +479,18 @@ return UnableToLegalize; const auto &MMO = **MI.memoperands_begin(); + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (8 * MMO.getSize() != DstTy.getSizeInBits()) { + unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildAnyExt(DstReg, TmpReg); + MI.eraseFromParent(); + return Legalized; + } + // This implementation doesn't work for atomics. Give up instead of doing // something invalid. if (MMO.getOrdering() != AtomicOrdering::NotAtomic || @@ -508,8 +520,8 @@ DstRegs.push_back(DstReg); } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) + + if (DstTy.isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else MIRBuilder.buildMerge(DstReg, DstRegs); @@ -550,6 +562,19 @@ return UnableToLegalize; const auto &MMO = **MI.memoperands_begin(); + + unsigned SrcReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { + unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildTrunc(TmpReg, SrcReg); + MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); + MI.eraseFromParent(); + return Legalized; + } + // This implementation doesn't work for atomics. Give up instead of doing // something invalid. if (MMO.getOrdering() != AtomicOrdering::NotAtomic || Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -36,6 +36,7 @@ const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); + const LLT S128 = LLT::scalar(128); const LLT S256 = LLT::scalar(256); const LLT S512 = LLT::scalar(512); @@ -162,7 +163,9 @@ getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, - {S32, S1}, {S64, S1}, {S16, S1}}); + {S32, S1}, {S64, S1}, {S16, S1}, + // FIXME: Hack + {S128, S32}}); setAction({G_FPTOSI, S32}, Legal); setAction({G_FPTOSI, 1, S32}, Legal); @@ -212,12 +215,30 @@ }); getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .narrowScalarIf([](const LegalityQuery &Query) { + unsigned Size = Query.Types[0].getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + return (Size > 32 && MemSize < Size); + }, + [](const LegalityQuery &Query) { + return std::make_pair(0, LLT::scalar(32)); + }) .legalIf([=, &ST](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; + unsigned Size = Ty0.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + if (Size > 32 && MemSize < Size) + return false; + + if (Ty0.isVector() && Size != MemSize) + return false; + // TODO: Decompose private loads into 4-byte components. // TODO: Illegal flat loads on SI - switch (Ty0.getSizeInBits()) { + switch (MemSize) { + case 8: + case 16: case 32: case 64: case 128: @@ -233,7 +254,8 @@ default: return false; } - }); + }) + .clampScalar(0, S32, S64); auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -129,3 +129,71 @@ $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_ext_load_global_s64_from_1_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s64_from_2_align2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s64_from_4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s128_from_4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4) + + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -120,3 +120,67 @@ %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store 12, align 4, addrspace 1) ... + +--- +name: test_truncestore_global_s64_to_s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncestore_global_s64_to_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 1, addrspace 1) +... + +--- +name: test_truncestore_global_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncestore_global_s64_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 1, addrspace 1) +... + +--- +name: test_truncestore_global_s64_to_s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncestore_global_s64_to_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 4, addrspace 1) +... + +--- +name: test_truncestore_global_s128_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_truncestore_global_s128_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 1, addrspace 1) +...