Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1239,13 +1239,20 @@ switch (Opc) { default: return UnableToLegalize; + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_IMPLICIT_DEF: { SmallVector DstRegs; unsigned NarrowSize = NarrowTy.getSizeInBits(); unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); + LLT Ty = MRI.getType(DstReg); + unsigned Size = Ty.getSizeInBits(); int NumParts = Size / NarrowSize; + + assert((NarrowTy.isScalar() && NarrowTy == Ty.getElementType()) || + NarrowTy.getElementType() == Ty.getElementType()); + // FIXME: Don't know how to handle the situation where the small vectors // aren't all the same size yet. if (Size % NarrowSize != 0) @@ -1253,7 +1260,13 @@ for (int i = 0; i < NumParts; ++i) { unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(TmpReg); + + if (Opc == TargetOpcode::G_IMPLICIT_DEF) + MIRBuilder.buildUndef(TmpReg); + else if (Opc == TargetOpcode::G_CONSTANT) + MIRBuilder.buildConstant(TmpReg, *MI.getOperand(1).getCImm()); + else + MIRBuilder.buildFConstant(TmpReg, *MI.getOperand(1).getFPImm()); DstRegs.push_back(TmpReg); } Index: lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -263,8 +263,6 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, const ConstantFP &Val) { - assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); - auto MIB = buildInstr(TargetOpcode::G_FCONSTANT); Res.addDefToMIB(*getMRI(), MIB); MIB.addFPImm(&Val); Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -109,9 +109,6 @@ setAction({G_BITCAST, S32}, Legal); setAction({G_BITCAST, 1, V2S16}, Legal); - getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64}); - // G_IMPLICIT_DEF is a no-op so we can make it legal for any value type that // can fit in a register. // FIXME: We need to legalize several more operations before we can add @@ -123,15 +120,57 @@ .clampScalar(0, S1, S512); + auto vectorIsLarge = [](const LegalityQuery &Query) { + LLT Ty = Query.Types[0]; + if (!Ty.isVector()) + return false; + return Ty.getSizeInBits() > 64 || + !isPowerOf2_32(Ty.getNumElements()); + }; + + auto splitVectorTypes = [=](const LegalityQuery &Query) { + LLT Ty = Query.Types[0]; + LLT EltTy = Ty.getElementType(); + unsigned Size = Ty.getSizeInBits(); + unsigned EltSize = EltTy.getSizeInBits(); + + if (EltSize >= 64) + return std::make_pair(0, EltTy); + + if (EltSize == 16) { + if (Size % 64 == 0) + return std::make_pair(0, V4S16); + + if (Size % 32 == 0) + return std::make_pair(0, V2S16); + + // FIXME: Handle odd splits + return std::make_pair(0, S16); + } + + if (EltSize == 32) { + if (Size % 64 == 0) + return std::make_pair(0, V2S32); + return std::make_pair(0, S32); + } + + return std::make_pair(0, EltTy); + }; + // FIXME: i1 operands to intrinsics should always be legal, but other i1 // values may not be legal. We need to figure out how to distinguish // between these two scenarios. // FIXME: Pointer types + + getActionDefinitionsBuilder(G_FCONSTANT) + .legalFor({S1, S32, S64, V2S32, S16, V2S16, V4S16}) + .fewerElementsIf(vectorIsLarge, splitVectorTypes); + getActionDefinitionsBuilder(G_CONSTANT) - .legalFor({S1, S32, S64, V2S32, V2S16}) + .legalFor({S1, S32, S64, V2S32, V2S16, V4S16}) .clampScalar(0, S32, S64) - .widenScalarToNextPow2(0); - + .widenScalarToNextPow2(0) + .fewerElementsIf(vectorIsLarge, splitVectorTypes); setAction({G_FRAME_INDEX, PrivatePtr}, Legal); getActionDefinitionsBuilder( Index: test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -12,39 +12,98 @@ %0:_(s32) = G_CONSTANT i32 5 $vgpr0 = COPY %0 ... + --- -name: test_constant_i64 +name: test_constant_v2s32 body: | bb.0: - ; CHECK-LABEL: name: test_constant_i64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = G_CONSTANT i64 5 + ; CHECK-LABEL: name: test_constant_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(<2 x s32>) = G_CONSTANT i32 5 + ; CHECK: $vgpr0_vgpr1 = COPY [[C]](<2 x s32>) + %0:_(<2 x s32>) = G_CONSTANT i32 5 $vgpr0_vgpr1 = COPY %0 ... --- -name: test_fconstant_f32 +name: test_constant_v3s32 body: | bb.0: - ; CHECK-LABEL: name: test_fconstant_f32 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK: $vgpr0 = COPY [[C]](s32) - %0:_(s32) = G_FCONSTANT float 1.0 - $vgpr0 = COPY %0 + ; CHECK-LABEL: name: test_constant_v3s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = G_CONSTANT i32 5 + $vgpr0_vgpr1_vgpr2 = COPY %0 + ... + --- -name: test_fconstant_f64 +name: test_constant_v4s32 body: | bb.0: - ; CHECK-LABEL: name: test_fconstant_f64 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; CHECK-LABEL: name: test_constant_v4s32 + ; CHECK: [[C:%[0-9]+]]:_(<2 x s32>) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(<2 x s32>) = G_CONSTANT i32 5 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[C]](<2 x s32>), [[C1]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + %0:_(<4 x s32>) = G_CONSTANT i32 5 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + +... + +--- +name: test_constant_v5s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_v5s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 234 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 234 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 234 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 234 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 234 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<5 x s32>), [[C5]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<5 x s32>) = G_CONSTANT i32 234 + %1:_(s32) = G_CONSTANT i32 4 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 + +... + +--- +name: test_constant_i64 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CHECK: $vgpr0_vgpr1 = COPY [[C]](s64) - %0:_(s64) = G_FCONSTANT double 1.0 + %0:_(s64) = G_CONSTANT i64 5 $vgpr0_vgpr1 = COPY %0 + +... + +--- +name: test_constant_v2s64 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_v2s64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = G_CONSTANT i64 5 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ... --- @@ -116,25 +175,69 @@ ... --- -name: test_constant_v2s32 +name: test_constant_v2s16 body: | bb.0: - ; CHECK-LABEL: name: test_constant_v2s32 - ; CHECK: [[C:%[0-9]+]]:_(<2 x s32>) = G_CONSTANT i32 5 - ; CHECK: $vgpr0_vgpr1 = COPY [[C]](<2 x s32>) - %0:_(<2 x s32>) = G_CONSTANT i32 5 + ; CHECK-LABEL: name: test_constant_v2s16 + ; CHECK: [[C:%[0-9]+]]:_(<2 x s16>) = G_CONSTANT i32 5 + ; CHECK: $vgpr0 = COPY [[C]](<2 x s16>) + %0:_(<2 x s16>) = G_CONSTANT i32 5 + $vgpr0 = COPY %0 +... + +--- +name: test_constant_v3s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_v3s16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %5(<3 x s32>), [[C3]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(<3 x s16>) = G_CONSTANT i16 5 + %1:_(s32) = G_CONSTANT i32 2 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 + +... + +--- +name: test_constant_v4s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_constant_v4s16 + ; CHECK: [[C:%[0-9]+]]:_(<4 x s16>) = G_CONSTANT i16 5 + ; CHECK: $vgpr0_vgpr1 = COPY [[C]](<4 x s16>) + %0:_(<4 x s16>) = G_CONSTANT i16 5 $vgpr0_vgpr1 = COPY %0 ... + --- -name: test_constant_v2s16 +name: test_constant_v8s16 body: | bb.0: - ; CHECK-LABEL: name: test_constant_v2s16 - ; CHECK: [[C:%[0-9]+]]:_(<2 x s16>) = G_CONSTANT i32 5 - ; CHECK: $vgpr0 = COPY [[C]](<2 x s16>) - %0:_(<2 x s16>) = G_CONSTANT i32 5 - $vgpr0 = COPY %0 + ; CHECK-LABEL: name: test_constant_v8s16 + ; CHECK: [[C:%[0-9]+]]:_(<4 x s16>) = G_CONSTANT i16 5 + ; CHECK: [[C1:%[0-9]+]]:_(<4 x s16>) = G_CONSTANT i16 5 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[C]](<4 x s16>), [[C1]](<4 x s16>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) + %0:_(<8 x s16>) = G_CONSTANT i16 5 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 + ...