Index: llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp +++ llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp @@ -89,6 +89,76 @@ StringRef getPassName() const override { return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME; } + + static const unsigned StmtReplTableSize = 12; + static const unsigned MaxNumParam = 5; + + // This enumeration type describes how the operands of a new instructions are + // obtained from the operands of the original instruction. + typedef enum { + Zero, // get from operand 0 + First, // get from operand 1 + Second, // get from operand 2 + Third, // get from operand 3 + Fourth, // get from operand 4 + New, // Create a new operand register + UseNew, // Use the recently created operand register + NO // No Operand + } RegAction; + + struct NewInstInfo + { + unsigned FirstInst; // Instruction Id for first new instruction + unsigned SecondInst; // Instruction Id for second new instruction + // Info describing the mapping of the old instruction operands into the + // new instructions operands. + // Outer index 0 for first instruction mapping, and 1 for second + // instruction mapping. + RegAction Regmapping[2][MaxNumParam]; + }; + + // Table containing instruction id of all instructions that should be replaced + unsigned TableOriginal[StmtReplTableSize] = { + AArch64::FMLAv4i32_indexed, + AArch64::FMLSv4i32_indexed, + AArch64::FMULXv4i32_indexed, + AArch64::FMULv4i32_indexed, + AArch64::FMLAv2i64_indexed, + AArch64::FMLSv2i64_indexed, + AArch64::FMULXv2i64_indexed, + AArch64::FMULv2i64_indexed, + AArch64::FMLAv2i32_indexed, + AArch64::FMLSv2i32_indexed, + AArch64::FMULXv2i32_indexed, + AArch64::FMULv2i32_indexed + }; + // Table containing info for the new insructions to be created. + struct NewInstInfo TableNew[StmtReplTableSize] = { + { AArch64::DUPv4i32lane, AArch64::FMLAv4f32, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv4i32lane, AArch64::FMLSv4f32, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv4i32lane, AArch64::FMULXv4f32, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } }, + { AArch64::DUPv4i32lane, AArch64::FMULv4f32, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } }, + { AArch64::DUPv2i64lane, AArch64::FMLAv2f64, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv2i64lane, AArch64::FMLSv2f64, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv2i64lane, AArch64::FMULXv2f64, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } }, + { AArch64::DUPv2i64lane, AArch64::FMULv2f64, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } }, + { AArch64::DUPv2i32lane, AArch64::FMLAv2f32, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv2i32lane, AArch64::FMLSv2f32, + { {New, Third, Fourth, NO, NO}, {Zero, First, Second, UseNew, NO} } }, + { AArch64::DUPv2i32lane, AArch64::FMULXv2f32, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } }, + { AArch64::DUPv2i32lane, AArch64::FMULv2f32, + { {New, Second, Third, NO, NO}, {Zero, First, UseNew, NO, NO} } } + }; }; char AArch64VectorByElementOpt::ID = 0; } // namespace @@ -152,9 +222,9 @@ /// Return true if early exit of the pass is recommended. bool AArch64VectorByElementOpt::earlyExitVectElement(MachineFunction *MF) { std::map VecInstElemTable; - const MCInstrDesc *IndexMulMCID = &TII->get(AArch64::FMLAv4i32_indexed); - const MCInstrDesc *DupMCID = &TII->get(AArch64::DUPv4i32lane); - const MCInstrDesc *MulMCID = &TII->get(AArch64::FMULv4f32); + const MCInstrDesc *IndexMulMCID = &TII->get(TableOriginal[0]); + const MCInstrDesc *DupMCID = &TII->get(TableNew[0].FirstInst); + const MCInstrDesc *MulMCID = &TII->get(TableNew[0].SecondInst); if (!shouldReplaceInstruction(MF, IndexMulMCID, DupMCID, MulMCID, VecInstElemTable)) @@ -200,122 +270,99 @@ /// Return true if the SIMD instruction is modified. bool AArch64VectorByElementOpt::optimizeVectElement( MachineInstr &MI, std::map *VecInstElemTable) const { - const MCInstrDesc *MulMCID, *DupMCID; - const TargetRegisterClass *RC = &AArch64::FPR128RegClass; - - switch (MI.getOpcode()) { - default: - return false; - - // 4X32 instructions - case AArch64::FMLAv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMLAv4f32); - break; - case AArch64::FMLSv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMLSv4f32); - break; - case AArch64::FMULXv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMULXv4f32); - break; - case AArch64::FMULv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMULv4f32); - break; - - // 2X64 instructions - case AArch64::FMLAv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMLAv2f64); - break; - case AArch64::FMLSv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMLSv2f64); - break; - case AArch64::FMULXv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMULXv2f64); - break; - case AArch64::FMULv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMULv2f64); - break; - - // 2X32 instructions - case AArch64::FMLAv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMLAv2f32); - break; - case AArch64::FMLSv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMLSv2f32); - break; - case AArch64::FMULXv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMULXv2f32); - break; - case AArch64::FMULv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMULv2f32); - break; - } - - if (!shouldReplaceInstruction(MI.getParent()->getParent(), - &TII->get(MI.getOpcode()), DupMCID, MulMCID, - *VecInstElemTable)) - return false; + const MCInstrDesc* NewInstMCID[2]; + bool Found = false; + unsigned InstrID; + + // Check if instruction MI can potentially be replaced + for (InstrID=0; InstrIDget(TableNew[InstrID].FirstInst); + NewInstMCID[1] = &TII->get(TableNew[InstrID].SecondInst); + Found = true; + break; + } + } + if (!Found) + return false; + + if (!shouldReplaceInstruction(MI.getParent()->getParent(), + &TII->get(MI.getOpcode()), NewInstMCID[0], NewInstMCID[1], + *VecInstElemTable)) + return false; const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock &MBB = *MI.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - - // get the operands of the current SIMD arithmetic instruction. - unsigned MulDest = MI.getOperand(0).getReg(); - unsigned SrcReg0 = MI.getOperand(1).getReg(); - unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill()); - unsigned SrcReg1 = MI.getOperand(2).getReg(); - unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill()); - unsigned DupDest; - - // Instructions of interest have either 4 or 5 operands. - if (MI.getNumOperands() == 5) { - unsigned SrcReg2 = MI.getOperand(3).getReg(); - unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill()); - unsigned LaneNumber = MI.getOperand(4).getImm(); - - // Create a new DUP instruction. Note that if an equivalent DUP instruction - // has already been created before, then use that one instread of creating - // a new one. - if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) { - DupDest = MRI.createVirtualRegister(RC); - BuildMI(MBB, MI, DL, *DupMCID, DupDest) - .addReg(SrcReg2, Src2IsKill) - .addImm(LaneNumber); - } - BuildMI(MBB, MI, DL, *MulMCID, MulDest) - .addReg(SrcReg0, Src0IsKill) - .addReg(SrcReg1, Src1IsKill) - .addReg(DupDest, Src2IsKill); - } else if (MI.getNumOperands() == 4) { - unsigned LaneNumber = MI.getOperand(3).getImm(); - if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) { - DupDest = MRI.createVirtualRegister(RC); - BuildMI(MBB, MI, DL, *DupMCID, DupDest) - .addReg(SrcReg1, Src1IsKill) - .addImm(LaneNumber); - } - BuildMI(MBB, MI, DL, *MulMCID, MulDest) - .addReg(SrcReg0, Src0IsKill) - .addReg(DupDest, Src1IsKill); - } else { - return false; - } + const TargetRegisterClass *RC; + + MachineInstrBuilder MIB; + unsigned InstReg, InstReg_Save; + unsigned InstIsKill; + bool IsImmed; + for (int i=0; i<2; i++) // Two instructions to be created + { + bool MoreOperands = true; + unsigned j=0; + while (MoreOperands) + { + IsImmed = false; + RegAction Oper = TableNew[InstrID].Regmapping[i][j]; + + // Process the info on how to build operand j + switch (Oper) { + case NO: + MoreOperands = false; + break; + case New: + RC = MRI.getRegClass(MI.getOperand(0).getReg()); + InstReg = MRI.createVirtualRegister(RC); + InstReg_Save = InstReg; + break; + case UseNew: + InstReg = InstReg_Save; + InstIsKill = getKillRegState(MI.getOperand(j).isKill()); + break; + case Zero: + case First: + case Second: + case Third: + case Fourth: + if (MI.getOperand(Oper).isImm()) + { + InstReg = MI.getOperand(Oper).getImm(); + IsImmed = true; + } + else { + InstReg = MI.getOperand(Oper).getReg(); + if (j != 0) + InstIsKill = getKillRegState(MI.getOperand(Oper).isKill()); + } + break; + default: + llvm_unreachable("Invalid Operand Info"); + } + + // Add operands to the new instruction + if (MoreOperands) + { + // Creating instruction for the first time + if (j == 0) + MIB = BuildMI(MBB, MI, DL, *NewInstMCID[i], InstReg); + // Adding operands to an instruction that has already been created + else + { + if (IsImmed) + MIB = MIB.addImm(InstReg); + else + MIB = MIB.addReg(InstReg, InstIsKill); + } + j++; + } + } + } ++NumModifiedInstr; return true;