Index: lib/Target/Mips/MipsRegisterInfo.td =================================================================== --- lib/Target/Mips/MipsRegisterInfo.td +++ lib/Target/Mips/MipsRegisterInfo.td @@ -388,6 +388,8 @@ (sequence "W%u", 0, 31)>; def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128, (sequence "W%u", 0, 31)>; +def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128, + (decimate (sequence "W%u", 0, 31), 2)>; def MSACtrl: RegisterClass<"Mips", [i32], 32, (add MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>; Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -2879,10 +2879,21 @@ unsigned Ws = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); - if (Lane == 0) - BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); - else { - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + if (Lane == 0) { + unsigned Wt = Ws; + if (!Subtarget.useOddSPReg()) { + // We must copy to an even-numbered MSA register so that the + // single-precision sub-register is also guaranteed to be even-numbered. + Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); + } + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); + } else { + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); @@ -2944,7 +2955,9 @@ unsigned Wd_in = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); unsigned Fs = MI->getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) Index: test/CodeGen/Mips/no-odd-spreg-msa.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/no-odd-spreg-msa.ll @@ -0,0 +1,131 @@ +; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG +; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG + +@v4f32 = global <4 x float> zeroinitializer + +define void @msa_insert_0(float %a) { +entry: + ; Force the float into an odd-numbered register using named registers and + ; load the vector. + %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a) + %0 = load volatile <4 x float>* @v4f32 + + ; Clobber all except $f12/$w12 and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f12/$w12 for the vector and $f13 for the float to + ; avoid the spill/reload. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must copy $f13 to an even-numbered register before inserting into the + ; vector. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = insertelement <4 x float> %0, float %b, i32 0 + store <4 x float> %1, <4 x float>* @v4f32 + ret void +} + +; ALL-LABEL: msa_insert_0: +; ALL: mov.s $f13, $f12 +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]]) +; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13 +; NOODDSPREG: insve.w $w[[W0]][0], $w[[F0]][0] +; ODDSPREG: insve.w $w[[W0]][0], $w13[0] +; ALL: # Clobber +; ALL-NOT: sdc1 +; ALL-NOT: ldc1 +; ALL: st.w $w[[W0]], 0($[[R0]]) + +define void @msa_insert_1(float %a) { +entry: + ; Force the float into an odd-numbered register using named registers and + ; load the vector. + %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a) + %0 = load volatile <4 x float>* @v4f32 + + ; Clobber all except $f12/$w12 and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f12/$w12 for the vector and $f13 for the float to + ; avoid the spill/reload. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must copy $f13 to an even-numbered register before inserting into the + ; vector. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = insertelement <4 x float> %0, float %b, i32 1 + store <4 x float> %1, <4 x float>* @v4f32 + ret void +} + +; ALL-LABEL: msa_insert_1: +; ALL: mov.s $f13, $f12 +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]]) +; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13 +; NOODDSPREG: insve.w $w[[W0]][1], $w[[F0]][0] +; ODDSPREG: insve.w $w[[W0]][1], $w13[0] +; ALL: # Clobber +; ALL-NOT: sdc1 +; ALL-NOT: ldc1 +; ALL: st.w $w[[W0]], 0($[[R0]]) + +define float @msa_extract_0() { +entry: + %0 = load volatile <4 x float>* @v4f32 + %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0) + + ; Clobber all except $f12, and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f13/$w13 for the vector since that saves on moves. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must move it to $f12/$w12. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + + %2 = extractelement <4 x float> %1, i32 0 + ret float %2 +} + +; ALL-LABEL: msa_extract_0: +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w12, 0($[[R0]]) +; ALL: move.v $w[[W0:13]], $w12 +; NOODDSPREG: move.v $w[[W0:12]], $w13 +; ALL: # Clobber +; ALL-NOT: st.w +; ALL-NOT: ld.w +; ALL: mov.s $f0, $f[[W0]] + +define float @msa_extract_1() { +entry: + %0 = load volatile <4 x float>* @v4f32 + %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0) + + ; Clobber all except $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f13/$w13 for the vector since that saves on moves. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must be spilled. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + + %2 = extractelement <4 x float> %1, i32 1 + ret float %2 +} + +; ALL-LABEL: msa_extract_1: +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w12, 0($[[R0]]) +; ALL: splati.w $w[[W0:[0-9]+]], $w13[1] +; NOODDSPREG: st.w $w[[W0]], 0($sp) +; ODDSPREG-NOT: st.w +; ODDSPREG-NOT: ld.w +; ALL: # Clobber +; ODDSPREG-NOT: st.w +; ODDSPREG-NOT: ld.w +; NOODDSPREG: ld.w $w0, 0($sp) +; ODDSPREG: mov.s $f0, $f[[W0]]