Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2808,60 +2808,62 @@ return Legalized; } - if (DstTy.getSizeInBits() != MMO.getSizeInBits()) + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(MemSizeInBits)) + return UnableToLegalize; // Don't know what we're being asked to do. - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + // Big endian lowering not implemented. + if (MIRBuilder.getDataLayout().isBigEndian()) + return UnableToLegalize; - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); - auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr, - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits); + uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize; + + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + auto SmallLoad = MIRBuilder.buildLoadInstr( + MI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + + if (AnyExtTy == DstTy) + MIRBuilder.buildOr(DstReg, Shift, LargeLoad); + else { auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); MIRBuilder.buildTrunc(DstReg, {Or}); - MI.eraseFromParent(); - return Legalized; } - return UnableToLegalize; + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1274,6 +1274,7 @@ .lower(); } + // FIXME: Unaligned accesses not lowered. auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8}, {S32, GlobalPtr, S16, 2 * 8}, @@ -1302,7 +1303,6 @@ ExtLoads.clampScalar(0, S32, S32) .widenScalarToNextPow2(0) - .unsupportedIfMemSizeNotPow2() .lower(); auto &Atomics = getActionDefinitionsBuilder( Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -15691,6 +15691,374 @@ $vgpr0 = COPY %1 ... +--- +name: test_ext_load_global_s32_from_s24_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load (s24), align 1, addrspace 1) + $vgpr0 = COPY %1 +... +--- +name: test_ext_load_global_s32_from_s24_align2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; SI: $vgpr0 = COPY [[COPY5]](s32) + ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA: $vgpr0 = COPY [[OR]](s32) + ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) + ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY5]](s32) + ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32) + ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load (s24), align 2, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_ext_load_global_s32_from_s24_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) + ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1) + $vgpr0 = COPY %1 +... + --- name: test_ext_load_global_s64_from_1_align4 body: | Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -6,7 +6,6 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24) # ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16) @@ -64,12 +63,24 @@ ; GFX8-LABEL: name: test_sextload_global_i32_i24 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX8: $vgpr0 = COPY [[OR]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i24 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6: $vgpr0 = COPY [[OR]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1) $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -6,7 +6,6 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24) # ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4) @@ -65,12 +64,24 @@ ; GFX8-LABEL: name: test_zextload_global_i32_i24 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX8: $vgpr0 = COPY [[OR]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i24 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX6: $vgpr0 = COPY [[OR]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1) $vgpr0 = COPY %1