Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -235,6 +235,7 @@ LegalizeResult lowerShuffleVector(MachineInstr &MI); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); private: MachineRegisterInfo &MRI; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2272,6 +2272,8 @@ return lowerDynStackAlloc(MI); case G_EXTRACT: return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -4218,3 +4220,42 @@ return UnableToLegalize; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -917,13 +917,9 @@ unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1; // FIXME: Doesn't handle extract of illegal sizes. - auto &Builder = getActionDefinitionsBuilder(Op); - - // FIXME: Cleanup when G_INSERT lowering implemented. - if (Op == G_EXTRACT) - Builder.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))); - - Builder + auto &Builder = getActionDefinitionsBuilder(Op) + .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; Index: test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -762,15 +762,48 @@ ; CHECK-LABEL: name: test_insert_v2s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 %3:_(<2 x s16>) = G_INSERT %0, %2, 0 $vgpr0 = COPY %3 ... + +--- +name: test_insert_v2s16_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_v2s16_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(<2 x s16>) = G_INSERT %0, %2, 1 + $vgpr0 = COPY %3 +... --- name: test_insert_v2s16_s16_offset16 body: | @@ -780,9 +813,17 @@ ; CHECK-LABEL: name: test_insert_v2s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1247,3 +1288,104 @@ %3:_(s64) = G_INSERT %0, %2, 48 $vgpr0_vgpr1 = COPY %3 ... +--- +name: test_insert_s32_s16_offset0 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 1 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65281 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 8 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 16 + $vgpr0 = COPY %3 +...