Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2744,7 +2744,51 @@ Register DstReg = MI.getOperand(0).getReg(); Register PtrReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); + MachineMemOperand &MMO = **MI.memoperands_begin(); + LLT MemTy = MMO.getMemoryType(); + MachineFunction &MF = MIRBuilder.getMF(); + if (MemTy.isVector()) + return UnableToLegalize; + + unsigned MemSizeInBits = MemTy.getSizeInBits(); + unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); + + if (MemSizeInBits != MemStoreSizeInBits) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy); + + Register LoadReg = DstReg; + LLT LoadTy = DstTy; + + // If this wasn't already an extending load, we need to widen the result + // register to avoid creating a load with a narrower result than the source. + if (MemStoreSizeInBits > DstTy.getSizeInBits()) { + LoadTy = WideMemTy; + LoadReg = MRI.createGenericVirtualRegister(WideMemTy); + } + + if (MI.getOpcode() == TargetOpcode::G_SEXTLOAD) { + auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); + MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); + } else if (MI.getOpcode() == TargetOpcode::G_ZEXTLOAD || + WideMemTy == DstTy) { + auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from Wide thus automatically gives zext from MemVT. + MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits); + } else { + MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO); + } + + if (DstTy != LoadTy) + MIRBuilder.buildTrunc(DstReg, LoadReg); + + MI.eraseFromParent(); + return Legalized; + } if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { if (MI.getOpcode() == TargetOpcode::G_LOAD) { @@ -2816,20 +2860,46 @@ Register SrcReg = MI.getOperand(0).getReg(); Register PtrReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); + MachineFunction &MF = MIRBuilder.getMF(); MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; + LLT MemTy = MMO.getMemoryType(); + if (SrcTy.isVector()) return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) + + unsigned StoreWidth = MemTy.getSizeInBits(); + unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); + + if (StoreWidth != StoreSizeInBits) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + LLT WideTy = LLT::scalar(StoreSizeInBits); + + if (StoreSizeInBits > SrcTy.getSizeInBits()) { + // Avoid creating a store with a narrower source than result. + SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); + SrcTy = WideTy; + } + + auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth); + + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy); + MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO); + MI.eraseFromParent(); + return Legalized; + } + + if (isPowerOf2_32(MemTy.getSizeInBits())) return UnableToLegalize; // Don't know what we're being asked to do. // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); @@ -2842,7 +2912,6 @@ auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); - MachineFunction &MF = MIRBuilder.getMF(); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); MachineMemOperand *SmallMMO = Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -6,6 +6,7 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24) # ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16) @@ -14,6 +15,100 @@ # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_sextload_global_s128_8) # ERR-NOT: remark +--- +name: test_sextload_global_i32_i1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_i32_i1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_i32_i7 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_i32_i7 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i7 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1) + $vgpr0 = COPY %1 +... +--- +name: test_sextload_global_i32_i24 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_i32_i24 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i24 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_i32_i30 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_i32_i30 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i30 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_i32_i31 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_i32_i31 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i31 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1) + $vgpr0 = COPY %1 +... + --- name: test_sextload_global_i32_i8 body: | @@ -32,6 +127,7 @@ %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) $vgpr0 = COPY %1 ... + --- name: test_sextload_global_i32_i16 body: | Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -6,6 +6,7 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24) # ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4) @@ -14,6 +15,101 @@ # ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_s128_8) # ERR-NOT: remark +--- +name: test_zextload_global_i32_i1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_i32_i1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_i32_i7 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_i32_i7 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i7 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_i32_i24 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_i32_i24 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i24 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_i32_i30 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_i32_i30 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i30 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_i32_i31 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_i32_i31 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i31 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1) + $vgpr0 = COPY %1 +... + --- name: test_zextload_global_i32_i8 body: |