Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -733,6 +733,7 @@ else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) return false; + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer( {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}})) return false; @@ -813,21 +814,24 @@ MachineInstr &MI, std::tuple &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register DstReg = MI.getOperand(0).getReg(); + LLT RegTy = MRI.getType(DstReg); + // Only supports scalars for now. - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + if (RegTy.isVector()) return false; Register SrcReg = MI.getOperand(1).getReg(); auto *LoadDef = getOpcodeDef(SrcReg, MRI); - if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || - !LoadDef->isSimple()) + if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg)) return false; + uint64_t MemBits = LoadDef->getMemSizeInBits(); + // If the sign extend extends from a narrower width than the load's width, // then we can narrow the load width when we combine to a G_SEXTLOAD. // Avoid widening the load at all. - unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), - LoadDef->getMemSizeInBits()); + unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits); // Don't generate G_SEXTLOADs with a < 1 byte width. if (NewSizeBits < 8) @@ -839,7 +843,15 @@ const MachineMemOperand &MMO = LoadDef->getMMO(); LegalityQuery::MemDesc MMDesc(MMO); - MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadDef->isSimple()) + MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits()) + return false; + + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, {MRI.getType(LoadDef->getDstReg()), MRI.getType(LoadDef->getPointerReg())}, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir @@ -132,9 +132,9 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 8 @@ -171,9 +171,9 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 16 - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 16