Index: lib/Target/Mips/MipsRegisterInfo.cpp =================================================================== --- lib/Target/Mips/MipsRegisterInfo.cpp +++ lib/Target/Mips/MipsRegisterInfo.cpp @@ -126,11 +126,11 @@ BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const MCPhysReg ReservedGPR32[] = { - Mips::ZERO, Mips::K0, Mips::K1, Mips::SP + Mips::ZERO, Mips::K0, Mips::K1, Mips::SP, Mips::AT }; static const MCPhysReg ReservedGPR64[] = { - Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 + Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64, Mips::AT_64 }; BitVector Reserved(getNumRegs()); Index: lib/Target/Mips/MipsSERegisterInfo.cpp =================================================================== --- lib/Target/Mips/MipsSERegisterInfo.cpp +++ lib/Target/Mips/MipsSERegisterInfo.cpp @@ -143,7 +143,6 @@ // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. - bool IsKill = false; int64_t Offset; Offset = SPOffset + (int64_t)StackSize; @@ -166,39 +165,36 @@ MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - const TargetRegisterClass *RC = - Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); - unsigned Reg = RegInfo.createVirtualRegister(RC); + unsigned AT = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; const MipsSEInstrInfo &TII = *static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); - BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset); + BuildMI(MBB, II, DL, TII.get(ADDiu), AT).addReg(FrameReg).addImm(Offset); - FrameReg = Reg; + FrameReg = AT; Offset = 0; - IsKill = true; } else if (!isInt<16>(Offset)) { // Otherwise split the offset into 16-bit pieces and add it in multiple // instructions. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned NewImm = 0; + unsigned AT = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; + + int64_t Hi = (Offset + (1 << 15)) >> 16; + Offset -= (Hi << 16); + const MipsSEInstrInfo &TII = *static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); - unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, - OffsetBitSize == 16 ? &NewImm : nullptr); - BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg) - .addReg(Reg, RegState::Kill); - - FrameReg = Reg; - Offset = SignExtend64<16>(NewImm); - IsKill = true; + BuildMI(MBB, II, DL, TII.get(LUi), AT).addImm(Hi); + BuildMI(MBB, II, DL, TII.get(ADDu), AT).addReg(FrameReg).addReg(AT); + + FrameReg = AT; } } - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); } Index: test/CodeGen/Mips/vector-multiply.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/vector-multiply.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=mipsel -O0 < %s +; RUN: llc -march=mips64el -O0 < %s + +define i32 @main(i32 %argc, i8** %argh) "no-frame-pointer-elim"="true" { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argh.addr = alloca i8**, align 8 + %v0 = alloca <16 x i8>, align 16 + %.compoundliteral = alloca <16 x i8>, align 16 + %v1 = alloca <16 x i8>, align 16 + %.compoundliteral1 = alloca <16 x i8>, align 16 + %unused_variable = alloca [16384 x i32], align 16 + %result = alloca <16 x i8>, align 16 + store i32 0, i32* %retval + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argh, i8*** %argh.addr, align 8 + store <16 x i8> , <16 x i8>* %.compoundliteral + %0 = load <16 x i8>* %.compoundliteral + store <16 x i8> %0, <16 x i8>* %v0, align 16 + store <16 x i8> zeroinitializer, <16 x i8>* %.compoundliteral1 + %1 = load <16 x i8>* %.compoundliteral1 + store <16 x i8> %1, <16 x i8>* %v1, align 16 + %2 = load <16 x i8>* %v0, align 16 + %3 = load <16 x i8>* %v1, align 16 + %mul = mul <16 x i8> %2, %3 + store <16 x i8> %mul, <16 x i8>* %result, align 16 + ret i32 0 +} +