Index: include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -237,6 +237,10 @@ return *State.MF; } + const DataLayout &getDataLayout() const { + return getMF().getFunction().getParent()->getDataLayout(); + } + /// Getter for DebugLoc const DebugLoc &getDL() { return State.DL; } @@ -465,6 +469,11 @@ /// \return The newly created instruction. MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op); + /// Build and insert a G_PTRTOINT instruction. + MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; @@ -655,7 +664,7 @@ /// \pre \p Res and \p Src must be generic virtual registers. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index); + MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index); /// Build and insert \p Res = IMPLICIT_DEF. MachineInstrBuilder buildUndef(const DstOp &Res); Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -345,10 +345,13 @@ return NonIntegralAddressSpaces; } - bool isNonIntegralPointerType(PointerType *PT) const { + bool isNonIntegralAddressSpace(unsigned AddrSpace) const { ArrayRef NonIntegralSpaces = getNonIntegralAddressSpaces(); - return find(NonIntegralSpaces, PT->getAddressSpace()) != - NonIntegralSpaces.end(); + return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end(); + } + + bool isNonIntegralPointerType(PointerType *PT) const { + return isNonIntegralAddressSpace(PT->getAddressSpace()); } bool isNonIntegralPointerType(Type *Ty) const { Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -846,20 +846,62 @@ LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx != 1) - return UnableToLegalize; - + unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(DstReg); + unsigned Offset = MI.getOperand(2).getImm(); + + if (TypeIdx == 0) { + if (SrcTy.isVector() || DstTy.isVector()) + return UnableToLegalize; + + SrcOp Src(SrcReg); + if (SrcTy.isPointer()) { + // Extracts from pointers can be handled only if they are really just + // simple integers. + const DataLayout &DL = MIRBuilder.getDataLayout(); + if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + return UnableToLegalize; + + LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); + SrcTy = SrcAsIntTy; + } + + if (DstTy.isPointer()) + return UnableToLegalize; + + if (Offset == 0) { + // Avoid a shift in the degenerate case. + MIRBuilder.buildTrunc(DstReg, + MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); + MI.eraseFromParent(); + return Legalized; + } + + // Do a shift in the source type. + LLT ShiftTy = SrcTy; + if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { + Src = MIRBuilder.buildAnyExt(WideTy, Src); + ShiftTy = WideTy; + } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; + + auto LShr = MIRBuilder.buildLShr( + ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); + MIRBuilder.buildTrunc(DstReg, LShr); + MI.eraseFromParent(); + return Legalized; + } + if (!SrcTy.isVector()) return UnableToLegalize; - unsigned DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); if (DstTy != SrcTy.getElementType()) return UnableToLegalize; - unsigned Offset = MI.getOperand(2).getImm(); if (Offset % SrcTy.getScalarSizeInBits() != 0) return UnableToLegalize; Index: lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -452,26 +452,29 @@ return buildInstr(Opcode, Dst, Src); } -MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, +MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst, + const SrcOp &Src, uint64_t Index) { + LLT SrcTy = Src.getLLTTy(*getMRI()); + LLT DstTy = Dst.getLLTTy(*getMRI()); + #ifndef NDEBUG - assert(getMRI()->getType(Src).isValid() && "invalid operand type"); - assert(getMRI()->getType(Res).isValid() && "invalid operand type"); - assert(Index + getMRI()->getType(Res).getSizeInBits() <= - getMRI()->getType(Src).getSizeInBits() && + assert(SrcTy.isValid() && "invalid operand type"); + assert(DstTy.isValid() && "invalid operand type"); + assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() && "extracting off end of register"); #endif - if (getMRI()->getType(Res).getSizeInBits() == - getMRI()->getType(Src).getSizeInBits()) { + if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) { assert(Index == 0 && "insertion past the end of a register"); - return buildCast(Res, Src); + return buildCast(Dst, Src); } - return buildInstr(TargetOpcode::G_EXTRACT) - .addDef(Res) - .addUse(Src) - .addImm(Index); + auto Extract = buildInstr(TargetOpcode::G_EXTRACT); + Dst.addDefToMIB(*getMRI(), Extract); + Src.addSrcToMIB(Extract); + Extract.addImm(Index); + return Extract; } void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef Ops, Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -516,9 +516,10 @@ .widenScalarIf( [=](const LegalityQuery &Query) { const LLT Ty1 = Query.Types[1]; - return (Ty1.getScalarSizeInBits() < 16); + return Ty1.isVector() && Ty1.getScalarSizeInBits() < 16; }, - LegalizeMutations::widenScalarOrEltToNextPow2(1, 16)); + LegalizeMutations::widenScalarOrEltToNextPow2(1, 16)) + .clampScalar(0, S16, S256); // TODO: vectors of pointers getActionDefinitionsBuilder(G_BUILD_VECTOR) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -256,3 +256,296 @@ %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 ... + +--- +name: test_extract_s8_s16_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 0 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s16_offset1 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s16_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 8 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s32_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset1 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 1 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset16 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset24 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset24 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_p3_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_p3_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_p3_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_p3_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s1_s8_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s1_s8_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s1) = G_EXTRACT %1, 0 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s1_s8_offset2 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s1_s8_offset2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s1) = G_EXTRACT %1, 2 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s64_offset2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s8_s64_offset2 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s8) = G_EXTRACT %0, 2 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s64_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s8_s64_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 32 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset48 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset48 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 48 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 48 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +...