Index: lib/Target/Mips/MipsSEISelDAGToDAG.cpp =================================================================== --- lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -934,6 +934,9 @@ // same set/ of registers. Similarly, ldi.h isn't capable of producing { // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. + const MipsABIInfo &ABI = + static_cast(TM).getABI(); + BuildVectorSDNode *BVN = cast(Node); APInt SplatValue, SplatUndef; unsigned SplatBitSize; @@ -971,13 +974,233 @@ break; } - if (!SplatValue.isSignedIntN(10)) - return false; - - SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, - ViaVecTy.getVectorElementType()); + SDNode *Res; - SDNode *Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + // If we have a signed 10 bit integer, we can splat it directly. + // + // If we have something bigger we can synthesize the value into a GPR and + // splat from there. + if (SplatValue.isSignedIntN(10)) { + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + } else if (SplatValue.isSignedIntN(16) && + ((ABI.IsO32() && SplatBitSize < 64) || + (ABI.IsN32() || ABI.IsN64()))) { + // Only handle signed 16 bit values when the element size is GPR width. + // MIPS64 can handle all the cases but MIPS32 would need to handle + // negative cases specifically here. Instead, handle those cases as + // 64bit values. + + bool Is32BitSplat = ABI.IsO32() || SplatBitSize < 64; + const unsigned ADDiuOp = Is32BitSplat ? Mips::ADDiu : Mips::DADDiu; + const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; + SDValue ZeroVal = CurDAG->getRegister( + Is32BitSplat ? Mips::ZERO : Mips::ZERO_64, SplatMVT); + + const unsigned FILLOp = + SplatBitSize == 16 + ? Mips::FILL_H + : (SplatBitSize == 32 ? Mips::FILL_W + : (SplatBitSize == 64 ? Mips::FILL_D : 0)); + + assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); + assert((!ABI.IsO32() || (ABI.IsO32() && FILLOp != Mips::FILL_D)) && + "Attempting to use fill.d on MIPS32!"); + + const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); + + Res = CurDAG->getMachineNode(ADDiuOp, DL, SplatMVT, ZeroVal, LoVal); + Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); + + } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { + // Only handle the cases where the splat size agrees with the size + // of the SplatValue here. + const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); + const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + + if (Hi) + Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); + Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0)); + + } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 && + (ABI.IsN32() || ABI.IsN64())) { + // N32 and N64 can perform some tricks that O32 can't for signed 32 bit + // integers due to having 64bit registers. lui will cause the necessary + // zero/sign extension. + const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); + const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + + if (Hi) + Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + Res = CurDAG->getMachineNode( + Mips::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64), + SDValue(Res, 0), + CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); + + Res = + CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0)); + + } else if (SplatValue.isSignedIntN(64)) { + // If we have a 64 bit Splat value, we perform a similar sequence to the + // above: + // + // MIPS32: MIPS64: + // lui $res, %highest(val) lui $res, %highest(val) + // ori $res, $res, %higher(val) ori $res, $res, %higher(val) + // lui $res2, %hi(val) lui $res2, %hi(val) + // ori $res2, %res2, %lo(val) ori $res2, %res2, %lo(val) + // $res3 = fill $res2 dinsu $res, $res2, 0, 32 + // $res4 = insert.w $res3[1], $res fill.d $res + // splat.d $res4, 0 + // + // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves + // having to materialize the value by shifts and ors. + // + // FIXME: Implement the preferred sequence for MIPS64R6: + // + // MIPS64R6: + // ori $res, $zero, %lo(val) + // daui $res, $res, %hi(val) + // dahi $res, $res, %higher(val) + // dati $res, $res, %highest(cal) + // fill.d $res + // + + const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); + const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); + const unsigned Higher = SplatValue.lshr(32).getLoBits(16).getZExtValue(); + const unsigned Highest = SplatValue.lshr(48).getLoBits(16).getZExtValue(); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + SDValue HigherVal = CurDAG->getTargetConstant(Higher, DL, MVT::i32); + SDValue HighestVal = CurDAG->getTargetConstant(Highest, DL, MVT::i32); + SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + // Independent of whether we're targeting MIPS64 or not, the basic + // operations are the same. Also, directly use the $zero register if + // the 16 bit chunk is zero. + // + // For optimization purposes we always synthesize the splat value as + // an i32 value, then if we're targetting MIPS64, use SUBREG_TO_REG + // just before combining the values with dinsu to produce an i64. This + // enables SelectionDAG to aggressively share components of splat values + // where possible. + // + // FIXME: This is the general constant synthesis problem. This code + // should be factored out into a class shared between all the + // classes that need it. Specifically, for a splat size of 64 + // bits that's a negative number we can do better than LUi/ORi + // for the upper 32bits. + + if (Hi) + Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + SDNode *HiRes; + if (Highest) + HiRes = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HighestVal); + + if (Higher) + HiRes = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, + Highest ? SDValue(HiRes, 0) : ZeroVal, + HigherVal); + + + if (ABI.IsO32()) { + Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, + (Hi || Lo) ? SDValue(Res, 0) : ZeroVal); + + Res = CurDAG->getMachineNode( + Mips::INSERT_W, DL, MVT::v4i32, SDValue(Res, 0), + (Highest || Higher) ? SDValue(HiRes, 0) : ZeroVal, + CurDAG->getTargetConstant(1, DL, MVT::i32)); + + const TargetLowering *TLI = getTargetLowering(); + const TargetRegisterClass *RC = + TLI->getRegClassFor(ViaVecTy.getSimpleVT()); + + Res = CurDAG->getMachineNode( + Mips::COPY_TO_REGCLASS, DL, ViaVecTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); + + Res = CurDAG->getMachineNode( + Mips::SPLATI_D, DL, MVT::v2i64, SDValue(Res, 0), + CurDAG->getTargetConstant(0, DL, MVT::i32)); + } else if (ABI.IsN64() || ABI.IsN32()) { + + SDValue Zero64Val = CurDAG->getRegister(Mips::ZERO_64, MVT::i64); + const bool HiResNonZero = Highest || Higher; + const bool ResNonZero = Hi || Lo; + + if (HiResNonZero) + HiRes = CurDAG->getMachineNode( + Mips::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(((Highest >> 15) & 0x1), DL, MVT::i64), + SDValue(HiRes, 0), + CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); + + if (ResNonZero) + Res = CurDAG->getMachineNode( + Mips::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64), + SDValue(Res, 0), + CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); + + // We have 3 cases: + // The HiRes is nonzero but Res is $zero => dsll32 HiRes, 0 + // The Res is nonzero but HiRes is $zero => dinsu Res, $zero, 32, 32 + // Both are non zero => dinsu Res, HiRes, 32, 32 + // + // The obvious "missing" case is when both are zero, but that case is + // handled by the ldi case. + if (ResNonZero) { + SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val, + CurDAG->getTargetConstant(64, DL, MVT::i32), + CurDAG->getTargetConstant(32, DL, MVT::i32), + SDValue(Res, 0)}; + + Res = CurDAG->getMachineNode(Mips::DINSU, DL, MVT::i64, Ops); + } else if (HiResNonZero) { + Res = CurDAG->getMachineNode( + Mips::DSLL32, DL, MVT::i64, SDValue(HiRes, 0), + CurDAG->getTargetConstant(0, DL, MVT::i32)); + } else + llvm_unreachable( + "Zero splat value handled by non-zero 64bit splat synthesis!"); + + Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0)); + } else + llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!"); + + } else + return false; if (ResVecTy != ViaVecTy) { // If LdiOp is writing to a different register class to ResVecTy, then Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -2529,11 +2529,10 @@ SplatBitSize != 64) return SDValue(); - // If the value fits into a simm10 then we can use ldi.[bhwd] - // However, if it isn't an integer type we will have to bitcast from an - // integer type first. Also, if there are any undefs, we must lower them - // to defined values first. - if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) + // If the value isn't an integer type we will have to bitcast + // from an integer type first. Also, if there are any undefs, we must + // lower them to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs) return Op; EVT ViaVecTy; Index: test/CodeGen/Mips/msa/basic_operations.ll =================================================================== --- test/CodeGen/Mips/msa/basic_operations.ll +++ test/CodeGen/Mips/msa/basic_operations.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | \ -; RUN: FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-BE %s +; RUN: FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-BE,O32-BE %s ; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | \ -; RUN: FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-LE %s +; RUN: FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-LE,O32-LE %s ; RUN: llc -march=mips64 -target-abi n32 -mattr=+msa,+fp64 \ ; RUN: -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck -check-prefixes=ALL,N32,MIPS64,ALL-BE %s @@ -58,10 +58,19 @@ ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] store volatile <16 x i8> , <16 x i8>*@v16i8 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) + ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 1286 + ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 2055 + ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1800 + ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1541 + ; O32-BE: fill.w [[R1:\$w[0-9]+]], [[R4]] + + ; O32: insert.w [[R1]][1], [[R2]] + ; O32: splati.d $w{{.*}}, [[R1]][0] + + ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32 + ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32 + ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] + ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] ret void } @@ -92,10 +101,19 @@ ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] store volatile <8 x i16> , <8 x i16>*@v8i16 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]]) + ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 3 + ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 4 + ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 4 + ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 3 + + ; O32-BE: fill.w [[R1:\$w[0-9]+]], [[R4]] + ; O32: insert.w [[R1]][1], [[R2]] + ; O32: splati.d $w{{.*}}, [[R1]][0] + + ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32 + ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32 + ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] + ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] ret void } @@ -122,10 +140,23 @@ ; ALL: ldi.h [[R1:\$w[0-9]+]], 1 store volatile <4 x i32> , <4 x i32>*@v4i32 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) + ; -BE-DAG: ori [[R2:\$[0-9]+]], $zero, 1 + ; O32-BE-DAG: ori [[R3:\$[0-9]+]], $zero, 1 + ; O32-BE-DAG: ori [[R4:\$[0-9]+]], $zero, 2 + ; O32-LE-DAG: ori [[R3:\$[0-9]+]], $zero, 2 + ; O32-LE-DAG: ori [[R4:\$[0-9]+]], $zero, 1 + ; O32: fill.w [[W0:\$w[0-9]+]], [[R4]] + ; O32: insert.w [[W0]][1], [[R3]] + ; O32: splati.d [[W1:\$w[0-9]+]], [[W0]] + + ; MIPS64-DAG: ori [[R5:\$[0-9]+]], $zero, 2 + ; MIPS64-DAG: ori [[R6:\$[0-9]+]], $zero, 1 + + ; MIPS64-BE: dinsu [[R5]], [[R6]], 32, 32 + ; MIPS64-LE: dinsu [[R6]], [[R5]], 32, 32 + ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] + ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] + store volatile <4 x i32> , <4 x i32>*@v4i32 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ Index: test/CodeGen/Mips/msa/immediates.ll =================================================================== --- test/CodeGen/Mips/msa/immediates.ll +++ test/CodeGen/Mips/msa/immediates.ll @@ -920,7 +920,7 @@ define void @bclri_d(<2 x i64> * %ptr) { entry: ; CHECK-LABEL: bclri_d: -; CHECK: and.v +; CHECK: bclri.d %a = load <2 x i64>, <2 x i64> * %ptr, align 16 %r = call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %a, i32 16) store <2 x i64> %r, <2 x i64> * %ptr, align 16 @@ -930,7 +930,7 @@ define void @binsli_d(<2 x i64> * %ptr, <2 x i64> * %ptr2) { entry: ; CHECK-LABEL: binsli_d: -; CHECK: bsel.v +; CHECK: binsli.d %a = load <2 x i64>, <2 x i64> * %ptr, align 16 %b = load <2 x i64>, <2 x i64> * %ptr2, align 16 %r = call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %a, <2 x i64> %b, i32 4) @@ -952,7 +952,7 @@ define void @bnegi_d(<2 x i64> * %ptr) { entry: ; CHECK-LABEL: bnegi_d: -; CHECK: xor.v +; CHECK: bnegi.d %a = load <2 x i64>, <2 x i64> * %ptr, align 16 %r = call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %a, i32 9) store <2 x i64> %r, <2 x i64> * %ptr, align 16 @@ -962,7 +962,7 @@ define void @bseti_d(<2 x i64> * %ptr) { entry: ; CHECK-LABEL: bseti_d: -; CHECK: or.v +; CHECK: bseti.d %a = load <2 x i64>, <2 x i64> * %ptr, align 16 %r = call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %a, i32 25) store <2 x i64> %r, <2 x i64> * %ptr, align 16 Index: test/CodeGen/Mips/msa/splats.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/msa/splats.ll @@ -0,0 +1,666 @@ +; RUN: llc -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa < %s | FileCheck %s --check-prefixes=MIPS32-BE,MIPSXX-BE,ALL +; RUN: llc -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa < %s | FileCheck %s --check-prefixes=MIPS32-LE,MIPSXX-LE,ALL +; RUN: llc -march=mips64 -mcpu=mips64r5 -mattr=+fp64,+msa < %s | FileCheck %s --check-prefixes=MIPS64-BE,MIPSXX-BE,ALL +; RUN: llc -march=mips64el -mcpu=mips64r5 -mattr=+fp64,+msa < %s | FileCheck %s --check-prefixes=MIPS64-LE,MIPSXX-EL,ALL + +@v16i8 = global <16 x i8> + +@v8i16 = global <8 x i16> + +@v4i32 = global <4 x i32> + +@v2i64 = global <2 x i64> + +define void @splat_i8_positive() { +; ALL-LABEL: splat_i8_positive: + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; ALL: ldi.b [[W0:\$w[0-9]]], 0 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.h [[W0:\$w[0-9]]], 1 + +; MIPSXX-LE: ldi.h [[W0:\$w[0-9]]], 256 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]]], 1 + +; MIPSXX-LE: lui [[R0:\$[0-9]]], 256 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R0]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]]], 1 + +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], $zero +; MIPS32-LE: move.v [[W1:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W1]][1], [[R0]] +; MIPS32-LE: splati.d [[W2:\$w[0-9]]], [[W1]][0] + +; MIPS64-LE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.h [[W0:\$w[0-9]]], 255 +; MIPSXX-LE: ldi.h [[W1:\$w[0-9]]], -256 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]]], 255 +; MIPSXX-LE: lui [[R2:\$[0-9]]], 65280 +; MIPSXX-LE: fill.w [[W1:\$w[0-9]]], [[R2]] + +; MIPS64-LE: dsll32 [[R3:\$[0-9]]], [[R2]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]]], 255 + +; MIPS32-LE: move.v [[W2:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W2]][1], [[R2]] +; MIPS32-LE: splati.d [[W2:\$w[0-9]]], [[W2]][0] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: addiu [[R2:\$[0-9]]], $zero, 8191 +; MIPSXX-BE: fill.h [[W0:\$w[0-9]]], [[R2]] + +; MIPSXX-LE: ldi.h [[W2:\$w[0-9]]], -225 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R2]] + +; MIPSXX-LE: lui [[R2:\$[0-9]]], 65311 +; MIPSXX-LE: fill.w [[W2:\$w[0-9]]], [[R2]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: ori [[R3:\$[0-9]]], $zero, 8191 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] + +; MIPS32-LE: move.v [[W2:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W2]][1], [[R2]] +; MIPS32-LE: splati.d [[W2:\$w[0-9]]], [[W2]][0] + +; MIPS64-BE: daddiu [[R2:\$[0-9]]], $zero, 8191 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: dsll32 [[R3:\$[0-9]]], [[R3]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.b [[W0:\$w[0-9]]], -1 + +; MIPSXX-LE: ldi.b [[W2:\$w[0-9]]], -1 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ori [[R2:\$[0-9]]], $zero, 65535 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R2]] + +; MIPSXX-LE: lui [[R2:\$[0-9]]], 65535 +; MIPSXX-LE: fill.w [[W2:\$w[0-9]]], [[R2]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: insert.w [[W0]][1], $zero +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: move.v [[W2:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W2]][1], [[R2]] +; MIPS32-LE: splati.d [[W3:\$w[0-9]]], [[W2]][0] + +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: dsll32 [[R4:\$[0-9]]], [[R3]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R4]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: lui [[R2:\$[0-9]]], 255 +; MIPSXX-BE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R3]] + +; MIPSXX-LE: ldi.w [[W2:\$w[0-9]]], -256 + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: insert.w [[W0]][1], $zero +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 65280 +; MIPS32-LE: move.v [[W2:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W2]][1], [[R3]] +; MIPS32-LE: splati.d [[W3:\$w[0-9]]], [[W2]][0] + +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R4:\$[0-9]]], [[R3]], 65280 +; MIPS64-LE: dsll32 [[R4:\$[0-9]]], [[R4]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R4]] + + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: lui [[R2:\$[0-9]]], 65535 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: move.v [[W1:\$w[0-9]]], $w0 +; MIPS32-BE: insert.w [[W1:\$w[0-9]]][1], $zero +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], $w1[0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS32-LE: insert.w [[W0]][1], [[R3]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R2:\$[0-9]]], 65535 +; MIPS64-BE: ori [[R2:\$[0-9]]], [[R2]], 65535 +; MIPS64-BE: move [[R3:\$[0-9]]], [[R2]] +; MIPS64-BE: dinsu [[R3]], $zero, 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R3]] + +; MIPS64-LE: ori [[R3:\$[0-9]]], [[R3]], 65535 +; MIPS64-LE: dsll32 [[R3:\$[0-9]]], [[R3]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: ori [[R2:\$[0-9]]], $zero, 255 +; MIPS32-BE: insert.w [[W0:\$w[0-9]]][1], [[R2:\$[0-9]]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], $w0[0] +; MIPS32-LE: insert.w [[W1:\$w[0-9]]][1], [[R2:\$[0-9]]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W1:\$w[0-9]]][0] + + ret void +} + + +define void @test_i8_negative() { + +; Negative i8 + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.b [[W0:\$w[0-9]+]], -54 +; MIPSXX-LE: ldi.b [[W0:\$w[0-9]+]], -54 + +; Negative i16 + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.h [[W0:\$w[0-9]+]], -128 +; MIPSXX-LE: addiu [[R2:\$[0-9]]], $zero, 33023 +; MIPSXX-LE: fill.h [[W0:\$w[0-9]]], [[R2]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: addiu [[R2:\$[0-9]]], $zero, 49152 +; MIPSXX-BE: fill.h [[W:\$w[0-9]]], [[R2]] +; MIPSXX-LE: ldi.h [[W0:\$w[0-9]]], 192 + +; Negative i32 + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]+]], -54 +; MIPSXX-LE: lui [[R2:\$[0-9]]], 51967 +; MIPSXX-LE: ori [[R2:\$[0-9]]], [[R2]], 65535 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R2:\$[0-9]]] + + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: lui [[R0:\$[0-9]]], 65409 +; MIPSXX-BE: ori [[R1:\$[0-9]]], [[R0]], 51914 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R1]] +; MIPSXX-LE: lui [[R3:\$[0-9]]], 51914 +; MIPSXX-LE: ori [[R4:\$[0-9]]], [[R3]], 33279 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R4]] + +; Negative i64 + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]+]], -54 +; MIPSXX-LE: lui [[R4:\$[0-9]]], 65535 +; MIPSXX-LE: ori [[R4:\$[0-9]]], [[R4]], 65535 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPSXX-LE: insert.w [[W0:\$w[0-9]]][1], [[R2]] +; MIPSXX-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; Negative i64 + store volatile <16 x i8> , <16 x i8>* @v16i8 +; MIPS32-BE: ori [[R2:\$[0-9]]], [[R0]], 49407 +; MIPS32-BE: lui [[R3:\$[0-9]]], 65535 +; MIPS32-BE: ori [[R4:\$[0-9]]], [[R3]], 51914 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-BE: insert.w [[W0]][1], [[R2]] +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] +; MIPS32-LE: ori [[R2:\$[0-9]]], [[R3]], 65535 +; MIPS32-LE: lui [[R3:\$[0-9]]], 65472 +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R3]], 33279 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0:\$w[0-9]]][1], [[R2]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R3:\$[0-9]]], 65535 +; MIPS64-BE: ori [[R3:\$[0-9]]], [[R3]], 51914 +; MIPS64-BE: ori [[R2:\$[0-9]]], [[R2]], 49407 +; MIPS64-BE: dinsu [[R3]], [[R2]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R3]] + +; MIPS64-LE: lui [[R2:\$[0-9]]], 65472 +; MIPS64-LE: ori [[R2:\$[0-9]]], [[R2]], 33279 +; MIPS64-LE: ori [[R3:\$[0-9]]], [[R3]], 65535 +; MIPS64-LE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + ret void +} + +define void @splat_i16_positive() { +; ALL-LABEL: splat_i16_positive: + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; ALL: ldi.b [[W0:\$w[0-9]]], 0 + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; ALL: ldi.h [[W0:\$w[0-9]]], 339 + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; ALL: addiu [[R2:\$[0-9]]], $zero, 1413 +; ALL: fill.h [[W0:\$w[0-9]]], [[R2]] + + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]]], 1 + +; MIPSXX-LE: lui [[R2:\$[0-9]]], 1 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R2:\$[0-9]]] + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; MIPSXX-BE: lui [[R2:\$[0-9]]], 1028 +; MIPSXX-BE: ori [[R2:\$[0-9]]], [[R2]], 1029 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R2]] + +; MIPSXX-LE: lui [[R3:\$[0-9]]], 1029 +; MIPSXX-LE: ori [[R3:\$[0-9]]], [[R3]], 1028 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R3]] + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]]], 1 + +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], $zero +; MIPS32-LE: insert.w [[W0:\$w[0-9]]][1], [[R2]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-LE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + store volatile <8 x i16> , <8 x i16>* @v8i16 +; MIPS32-BE: lui [[R2:\$[0-9]]], 1 +; MIPS32-BE: ori [[R2:\$[0-9]]], [[R2]], 2 +; MIPS32-BE: lui [[R3:\$[0-9]]], 3 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R3]], 4 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: insert.w [[W0:\$w[0-9]]][1], [[R2]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: lui [[R2:\$[0-9]]], 4 +; MIPS32-LE: ori [[R2:\$[0-9]]], [[R2]], 3 +; MIPS32-LE: lui [[R3:\$[0-9]]], 2 +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R3]], 1 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0:\$w[0-9]]][1], [[R2]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R2:\$[0-9]]], 3 +; MIPS64-BE: ori [[R2:\$[0-9]]], [[R2]], 4 +; MIPS64-BE: lui [[R3:\$[0-9]]], 1 +; MIPS64-BE: ori [[R3:\$[0-9]]], [[R3]], 2 +; MIPS64-BE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: lui [[R2:\$[0-9]]], 2 +; MIPS64-LE: ori [[R2:\$[0-9]]], [[R2]], 1 +; MIPS64-LE: lui [[R3:\$[0-9]]], 4 +; MIPS64-LE: ori [[R3:\$[0-9]]], [[R3]], 3 +; MIPS64-LE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + ret void +} + +define void @test_i16_negative() { +; Negative i16 + store volatile <8 x i16> , <8 x i16>* @v8i16 +; ALL: ldi.h [[W0:\$w[0-9]]], -331 + +; Negative i32 + store volatile <8 x i16> , <8 x i16>* @v8i16 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]]], -331 + +; MIPSXX-LE: lui [[R2:\$[0-9]]], 65205 +; MIPSXX-LE: ori [[R2:\$[0-9]]], [[R2]], 65535 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R2]] + +; Negative i64 + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]]], -332 + +; MIPS32-LE: lui [[R2:\$[0-9]]], 65204 +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS32-LE: lui [[R4:\$[0-9]]], 65535 +; MIPS32-LE: ori [[R4:\$[0-9]]], [[R4]], 65535 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-LE: move.v [[W1:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W1]][1], [[R3]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W1]][0] + +; MIPS64-LE: lui [[R2:\$[0-9]]], 65535 +; MIPS64-LE: ori [[R2:\$[0-9]]], [[R2]], 65535 +; MIPS64-LE: lui [[R3:\$[0-9]]], 65204 +; MIPS64-LE: ori [[R4:\$[0-9]]], [[R3]], 65535 +; MIPS64-LE: move [[R5:\$[0-9]]], [[R2]] +; MIPS64-LE: dinsu [[R5]], [[R4]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R5]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: lui [[R2:\$[0-9]]], 65535 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS32-BE: ori [[R4:\$[0-9]]], [[R2]], 32768 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-BE: insert.w [[W0]][1], [[R3]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: lui [[R4:\$[0-9]]], 32768 +; MIPS32-LE: ori [[R4:\$[0-9]]], [[R4]], 65535 +; MIPS32-LE: move.v [[W1:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W1]][1], [[R4]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W1]][0] + +; MIPS64-BE: daddiu [[R2:\$[0-9]]], $zero, 32768 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: lui [[R5:\$[0-9]]], 32768 +; MIPS64-LE: ori [[R5:\$[0-9]]], [[R5]], 65535 +; MIPS64-LE: move [[R6:\$[0-9]]], [[R2]] +; MIPS64-LE: dinsu [[R6]], [[R5]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R6]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: ori [[R4:\$[0-9]]], $zero, 65204 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-BE: insert.w [[W0]][1], [[R3]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: insert.w [[W0]][1], [[R2]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: ori [[R2:\$[0-9]]], $zero, 65204 +; MIPS64-BE: lui [[R3:\$[0-9]]], 65535 +; MIPS64-BE: ori [[R4:\$[0-9]]], [[R3]], 65535 +; MIPS64-BE: dinsu [[R2]], [[R4]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 65204 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: insert.w [[W0]][1], [[R2]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] +; MIPS32-LE: ori [[R2:\$[0-9]]], $zero, 65535 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R2]] +; MIPS32-LE: insert.w [[W0]][1], [[R3]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: ori [[R2:\$[0-9]]], [[R3]], 65204 +; MIPS64-BE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R2:\$[0-9]]], $zero, 65535 +; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + ret void +} + +define void @splat_i32_positive() { +; ALL-LABEL: splat_i32_positive: + + store volatile <4 x i32> , <4 x i32>* @v4i32 +; ALL: ldi.w [[W0:\$w[0-9]]], 339 + + store volatile <4 x i32> , <4 x i32>* @v4i32 +; ALL: addiu [[R2:\$[0-9]]], $zero, 1413 +; ALL: fill.w [[W0:\$w[0-9]]], [[R2]] + + store volatile <4 x i32> , <4 x i32>* @v4i32 +; ALL: lui [[R2:\$[0-9]]], 1 +; ALL: ori [[R3:\$[0-9]]], [[R2]], 65535 +; ALL: fill.w [[W0:\$w[0-9]]], [[R3]] + + store volatile <4 x i32> , <4 x i32>* @v4i32 +; MIPS32-BE: ori [[R2:\$[0-9]]], $zero, 1028 +; MIPS32-BE: ori [[R3:\$[0-9]]], $zero, 1029 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: insert.w [[W0]][1], [[R2]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R2:\$[0-9]]], $zero, 1029 +; MIPS32-LE: ori [[R3:\$[0-9]]], $zero, 1028 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0]][1], [[R2]] +; MIPS32-LE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: ori [[R2:\$[0-9]]], $zero, 1029 +; MIPS64-BE: ori [[R3:\$[0-9]]], $zero, 1028 +; MIPS64-BE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R2:\$[0-9]]], $zero, 1028 +; MIPS64-LE: ori [[R3:\$[0-9]]], $zero, 1029 +; MIPS64-LE: dinsu [[R2]], [[R3]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + ret void +} + +define void @test_i32_negative() { +; Negative i32 + store volatile <4 x i32> , <4 x i32>* @v4i32 +; ALL: ldi.w [[W0:\$w[0-9]]], -16 + + store volatile <4 x i32> , <4 x i32> * @v4i32 +; ALL: addiu [[R2:\$[0-9]]], $zero, 63472 +; ALL: fill.w [[W0:\$w[0-9]]], [[R2]] + +; Negative i64 + store volatile <4 x i32> , <4 x i32>* @v4i32 +; MIPSXX-BE: ldi.d [[W0:\$w[0-9]]], -35 + +; MIPS32-LE: lui [[R2:\$[0-9]]], 65535 +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 65501 +; MIPS32-LE: ori [[R4:\$[0-9]]], [[R2]], 65535 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-LE: move.v [[W1:\$w[0-9]]], [[W0]] +; MIPS32-LE: insert.w [[W1]][1], [[R3]] +; MIPS32-LE: splati.d [[W2:\$w[0-9]]], [[W1]][0] + +; MIPS64-LE: lui [[R2:\$[0-9]]], 65535 +; MIPS64-LE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS64-LE: ori [[R4:\$[0-9]]], [[R2]], 65501 +; MIPS64-LE: move [[R5:\$[0-9]]], [[R3]] +; MIPS64-LE: dinsu [[R5]], [[R4]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R5]] + + store volatile <4 x i32> , <4 x i32> * @v4i32 +; MIPS32-BE: lui [[R2:\$[0-9]]], 65535 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 65535 +; MIPS32-BE: ori [[R4:\$[0-9]]], [[R2]], 63997 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-BE: insert.w [[W0:\$w[0-9]]][1], [[R3]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 63997 +; MIPS32-LE: insert.w [[W0]][1], [[R3]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: daddiu [[R2:\$[0-9]]], $zero, 63997 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R4:\$[0-9]]], [[R2]], 63997 +; MIPS64-LE: dinsu [[R3]], [[R4]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <4 x i32> , <4 x i32> * @v4i32 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 65525 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], $zero +; MIPS32-BE: insert.w [[W0:\$w[0-9]]][1], [[R3]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 65525 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0]][1], $zero +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R2:\$[0-9]]], 65535 +; MIPS64-BE: ori [[R2:\$[0-9]]], [[R2]], 65525 +; MIPS64-BE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R2:\$[0-9]]], [[R2]], 65525 +; MIPS64-LE: dinsu [[R2]], $zero, 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + ret void +} + +define void @splat_i64_positive() { +; ALL-LABEL: splat_i64_positive: + + store volatile <2 x i64> , <2 x i64>* @v2i64 +; ALL: ldi.d [[W0:\$w[0-9]]], 339 + + store volatile <2 x i64> , <2 x i64>* @v2i64 +; MIPS32-BE: ori [[R2:\$[0-9]]], $zero, 16374 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R2]] +; MIPS32-BE: insert.w [[W0]][1], $zero +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R2:\$[0-9]]], $zero, 16374 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R2]] +; MIPS32-LE: insert.w [[W0]][1], $zero +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: daddiu [[R2:\$[0-9]]], $zero, 16374 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: daddiu [[R2:\$[0-9]]], $zero, 16374 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + + store volatile <2 x i64> , <2 x i64>* @v2i64 +; MIPS32-BE: lui [[R2:\$[0-9]]], 1 +; MIPS32-BE: ori [[R3:\$[0-9]]], [[R2]], 50838 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: insert.w [[W0]][1], $zero +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: lui [[R2:\$[0-9]]], 1 +; MIPS32-LE: ori [[R3:\$[0-9]]], [[R2]], 50838 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0]][1], $zero +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R2:\$[0-9]]], 1 +; MIPS64-BE: ori [[R3:\$[0-9]]], [[R2]], 50838 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <2 x i64> , <2 x i64>* @v2i64 +; ALL: addiu [[R2:\$[0-9]]], $zero, 16384 +; ALL: fill.h [[W0:\$w[0-9]]], [[R2:\$[0-9]]] + + ret void +} + +; Zero testing + +define void @splat_zero_tests() { + store volatile <8 x i16> , <8 x i16> * @v8i16 +; ALL: ldi.b [[W0:\$w[0-9]]], 0 + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPSXX-BE: lui [[R2:\$[0-9]]], 1 +; MIPSXX-BE: fill.w [[W0:\$w[0-9]]], [[R2]] +; MIPSXX-LE: ldi.w [[W0:\$w[0-9]]], 1 + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPSXX-BE: ldi.w [[W0:\$w[0-9]]], 1 +; MIPSXX-LE: lui [[R2:\$[0-9]]], 1 +; MIPSXX-LE: fill.w [[W0:\$w[0-9]]], [[R2]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: lui [[R3:\$[0-9]]], 2 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-BE: insert.w [[W0]][1], [[R2]] +; MIPS32-BE: splati.d [[W0:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], $zero, 2 +; MIPS32-LE: ori [[R4:\$[0-9]]], $zero, 1 +; MIPS32-LE: fill.w [[W1:\$w[0-9]]], [[R4]] +; MIPS32-LE: insert.w [[W1]][1], [[R3]] +; MIPS32-LE: splati.d [[W2:\$w[0-9]]], [[W1]][0] + +; MIPS64-BE: lui [[R3:\$[0-9]]], 2 +; MIPS64-BE: dinsu [[R3]], [[R2]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R3]] + +; MIPS64-LE: ori [[R3:\$[0-9]]], $zero, 1 +; MIPS64-LE: ori [[R4:\$[0-9]]], $zero, 2 +; MIPS64-LE: dinsu [[R3]], [[R4]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: ori [[R3:\$[0-9]]], $zero, 1 +; MIPS32-BE: ori [[R4:\$[0-9]]], $zero, 2 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], [[R4]] +; MIPS32-BE: insert.w [[W0]][1], [[R3]] +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: lui [[R3:\$[0-9]]], 2 +; MIPS32-LE: insert.w [[W0]][1], [[R3]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: ori [[R3:\$[0-9]]], $zero, 2 +; MIPS64-BE: ori [[R4:\$[0-9]]], $zero, 1 +; MIPS64-BE: dinsu [[R3]], [[R4]], 32, 32 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R3]] + +; MIPS64-LE: lui [[R3:\$[0-9]]], 2 +; MIPS64-LE: lui [[R4:\$[0-9]]], 1 +; MIPS64-LE: dinsu [[R4]], [[R3]], 32, 32 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R4]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: fill.w [[W0:\$w[0-9]]], $zero +; MIPS32-BE: move.v [[W1:\$w[0-9]]], [[W0]] +; MIPS32-BE: insert.w [[W1]][1], [[R2]] +; MIPS32-BE: splati.d [[W2:\$w[0-9]]], [[W1]][0] + +; MIPS32-LE: ldi.d [[W0:\$w[0-9]]], 1 + +; MIPS64-BE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ldi.d [[W0:\$w[0-9]]], 1 + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: lui [[R2:\$[0-9]]], 32768 +; MIPS32-BE: insert.w [[W0]][1], [[R2]] +; MIPS32-BE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS32-LE: ori [[R3:\$[0-9]]], $zero, 32768 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], [[R3]] +; MIPS32-LE: insert.w [[W0]][1], $zero +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: lui [[R2:\$[0-9]]], 32768 +; MIPS64-BE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-BE: fill.d [[W0:\$w[0-9]]], [[R2]] + +; MIPS64-LE: ori [[R3:\$[0-9]]], $zero, 32768 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R3]] + + store volatile <8 x i16> , <8 x i16> * @v8i16 +; MIPS32-BE: ldi.d [[W0:\$w[0-9]]], 1 +; MIPS32-LE: fill.w [[W0:\$w[0-9]]], $zero + +; MIPS32-LE: insert.w [[W0]][1], [[R2]] +; MIPS32-LE: splati.d [[W1:\$w[0-9]]], [[W0]][0] + +; MIPS64-BE: ldi.d [[W0:\$w[0-9]]], 1 + +; MIPS64-LE: dsll32 [[R2:\$[0-9]]], [[R2]], 0 +; MIPS64-LE: fill.d [[W0:\$w[0-9]]], [[R2]] + ret void +}