diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1008,6 +1008,11 @@ MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is + /// built with G_CONSTANT. + MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements MachineInstrBuilder buildSplatVector(const DstOp &Res, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -269,13 +269,10 @@ const MachineRegisterInfo &MRI); /// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. -/// If successful, returns the G_BUILD_VECTOR representing the folded vector -/// constant. \p MIB should have an insertion point already set to create new -/// G_CONSTANT instructions as needed. -Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB); +/// Returns an empty vector on failure. +SmallVector ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI); Optional ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -203,10 +203,10 @@ if (SrcTy.isVector()) { // Try to constant fold vector constants. - Register VecCst = ConstantFoldVectorBinop( - Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); - if (VecCst) - return buildCopy(DstOps[0], VecCst); + SmallVector VecCst = ConstantFoldVectorBinop( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()); + if (!VecCst.empty()) + return buildBuildVectorConstant(DstOps[0], VecCst); break; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -664,6 +664,17 @@ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } +MachineInstrBuilder +MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops) { + SmallVector TmpVec; + TmpVec.reserve(Ops.size()); + LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); + for (auto &Op : Ops) + TmpVec.push_back(buildConstant(EltTy, Op)); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} + MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -608,33 +608,27 @@ return None; } -Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB) { +SmallVector +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { auto *SrcVec2 = getOpcodeDef(Op2, MRI); if (!SrcVec2) - return Register(); + return SmallVector(); auto *SrcVec1 = getOpcodeDef(Op1, MRI); if (!SrcVec1) - return Register(); + return SmallVector(); - const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); - - SmallVector FoldedElements; + SmallVector FoldedElements; for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), SrcVec2->getSourceReg(Idx), MRI); if (!MaybeCst) - return Register(); - auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); - FoldedElements.emplace_back(FoldedCstReg); + return SmallVector(); + FoldedElements.push_back(*MaybeCst); } - // Create the new vector constant. - auto CstVec = - MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); - return CstVec.getReg(0); + return FoldedElements; } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY [[BUILD_VECTOR1]](<4 x s64>) - ; CHECK-NEXT: G_STORE [[COPY]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 entry: %add = add <4 x i64> zeroinitializer, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -184,10 +184,9 @@ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[COPY8]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)