Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -469,6 +469,38 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_CONSTANT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + const APInt &Val = MI.getOperand(1).getCImm()->getValue(); + unsigned TotalSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = TotalSize / NarrowSize; + + SmallVector PartRegs; + for (int I = 0; I != NumParts; ++I) { + unsigned Offset = I * NarrowSize; + auto K = MIRBuilder.buildConstant(NarrowTy, + Val.lshr(Offset).trunc(NarrowSize)); + PartRegs.push_back(K.getReg(0)); + } + + LLT LeftoverTy; + unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; + SmallVector LeftoverRegs; + if (LeftoverBits != 0) { + LeftoverTy = LLT::scalar(LeftoverBits); + auto K = MIRBuilder.buildConstant( + LeftoverTy, + Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); + LeftoverRegs.push_back(K.getReg(0)); + } + + insertParts(MI.getOperand(0).getReg(), + Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); + + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -611,31 +643,6 @@ return reduceLoadStoreWidth(MI, 0, NarrowTy); } - case TargetOpcode::G_CONSTANT: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - - SmallVector DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - ConstantInt *CI = - ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); - MIRBuilder.buildConstant(DstReg, *CI); - DstRegs.push_back(DstReg); - } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; - } case TargetOpcode::G_SELECT: return narrowScalarSelect(MI, TypeIdx, NarrowTy); case TargetOpcode::G_AND: Index: test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -215,14 +215,6 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s96) = G_CONSTANT i96 0 (in function: nonpow2_constant_narrowing) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_constant_narrowing -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_constant_narrowing: -define void @nonpow2_constant_narrowing() { - store i96 0, i96* undef - ret void -} - ; Currently can't handle vector lengths that aren't an exact multiple of ; natively supported vector lengths. Test that the fall-back works for those. ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements Index: test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -25,6 +25,23 @@ ... +--- +name: test_constant_s96 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_s96 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4780896129847249538 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910 + ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[C]](s64), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[C1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + %0:_(s96) = G_CONSTANT i96 -10105770365747857631829412482 + $vgpr0_vgpr1_vgpr2 = COPY %0 + +... + --- name: test_constant_s1 body: |