Index: llvm/trunk/lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/Mips/MipsMSAInstrInfo.td +++ llvm/trunk/lib/Target/Mips/MipsMSAInstrInfo.td @@ -3781,6 +3781,80 @@ ISA_MIPS1_NOT_32R6_64R6; } +def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; +def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; +def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; + +def vsplati8imm7 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati8 immi32Cst7))>; +def vsplati16imm15 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati16 immi32Cst15))>; +def vsplati32imm31 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati32 immi32Cst31))>; +def vsplati64imm63 : PatFrag<(ops node:$wt), + (and node:$wt, vsplati64_imm_eq_63)>; + +class MSAShiftPat : + MSAPat<(VT (Node VT:$ws, (VT (and VT:$wt, Vec)))), + (VT (Insn VT:$ws, VT:$wt))>; + +class MSABitPat : + MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + (VT (Insn VT:$ws, VT:$wt))>; + +multiclass MSAShiftPats { + def : MSAShiftPat(Insn#_B), + (vsplati8 immi32Cst7)>; + def : MSAShiftPat(Insn#_H), + (vsplati16 immi32Cst15)>; + def : MSAShiftPat(Insn#_W), + (vsplati32 immi32Cst31)>; + def : MSAPat<(v2i64 (Node v2i64:$ws, (v2i64 (and v2i64:$wt, + vsplati64_imm_eq_63)))), + (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +multiclass MSABitPats { + def : MSABitPat(Insn#_B), vsplati8imm7>; + def : MSABitPat(Insn#_H), vsplati16imm15>; + def : MSABitPat(Insn#_W), vsplati32imm31>; + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt))), + (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +defm : MSAShiftPats; +defm : MSAShiftPats; +defm : MSAShiftPats; +defm : MSABitPats; +defm : MSABitPats; + +def : MSAPat<(and v16i8:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$wt)), + immAllOnesV)), + (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; +def : MSAPat<(and v8i16:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$wt)), + immAllOnesV)), + (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; +def : MSAPat<(and v4i32:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$wt)), + immAllOnesV)), + (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; +def : MSAPat<(and v2i64:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt)), + (bitconvert (v4i32 immAllOnesV)))), + (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; + // Vector extraction with fixed index. // // Extracting 32-bit values on MSA32 should always use COPY_S_W rather than Index: llvm/trunk/lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsSEISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1547,11 +1547,24 @@ return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); } +static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Vec = Op->getOperand(2); + bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); + MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; + SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, + DL, ResEltTy); + SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); + + return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); +} + static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); SDLoc DL(Op); SDValue One = DAG.getConstant(1, DL, ResTy); - SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), DAG.getNOT(DL, Bit, ResTy)); @@ -1687,7 +1700,7 @@ return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bnegi_b: case Intrinsic::mips_bnegi_h: @@ -1723,7 +1736,7 @@ return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bseti_b: case Intrinsic::mips_bseti_h: @@ -2210,7 +2223,7 @@ case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_slli_b: case Intrinsic::mips_slli_h: case Intrinsic::mips_slli_w: @@ -2240,7 +2253,7 @@ case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srai_b: case Intrinsic::mips_srai_h: case Intrinsic::mips_srai_w: @@ -2270,7 +2283,7 @@ case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srli_b: case Intrinsic::mips_srli_h: case Intrinsic::mips_srli_w: Index: llvm/trunk/test/CodeGen/Mips/msa/shift_constant_pool.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/msa/shift_constant_pool.ll +++ llvm/trunk/test/CodeGen/Mips/msa/shift_constant_pool.ll @@ -0,0 +1,171 @@ +; Test whether the following functions, with vectors featuring negative or values larger than the element +; bit size have their results of operations generated correctly when placed into constant pools + +; RUN: llc -march=mips64 -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS64 %s +; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS32 %s +; RUN: llc -march=mips64el -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS64 %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,MIPS32 %s + +@llvm_mips_bclr_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_bclr_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_bclr_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 1 # 0x1 +; ALL: .4byte 1 # 0x1 +; ALL: .4byte 3 # 0x3 +; ALL: .4byte 3 # 0x3 +; ALL-LABEL: llvm_mips_bclr_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bclr_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bclr_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) + + +@llvm_mips_bneg_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_bneg_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_bneg_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 1 # 0x1 +; ALL: .4byte 1 # 0x1 +; ALL: .4byte 3 # 0x3 +; ALL: .4byte 3 # 0x3 +; ALL-LABEL: llvm_mips_bneg_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bneg_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bneg_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) + + +@llvm_mips_bset_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_bset_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_bset_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 2147483648 # 0x80000000 +; ALL: .4byte 2147483648 # 0x80000000 +; ALL: .4byte 4 # 0x4 +; ALL: .4byte 4 # 0x4 +; ALL-LABEL: llvm_mips_bset_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_bset_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_bset_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) + +@llvm_mips_sll_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_sll_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_sll_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 2147483648 # 0x80000000 +; ALL: .4byte 2147483648 # 0x80000000 +; ALL: .4byte 4 # 0x4 +; ALL: .4byte 4 # 0x4 +; ALL-LABEL: llvm_mips_sll_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_sll_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_sll_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) + +@llvm_mips_sra_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_sra_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_sra_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 4294967292 # 0xfffffffc +; ALL: .4byte 4 # 0x4 +; ALL: .4byte 8 # 0x8 +; ALL: .4byte 8 # 0x8 +; ALL-LABEL: llvm_mips_sra_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_sra_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_sra_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) + +@llvm_mips_srl_w_test_const_vec_res = global <4 x i32> zeroinitializer, align 16 + +define void @llvm_mips_srl_w_test_const_vec() nounwind { +entry: + %0 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32>* @llvm_mips_srl_w_test_const_vec_res + ret void +} + +declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind + +; MIPS32: [[LABEL:\$CPI[0-9]+_[0-9]+]]: +; MIPS64: [[LABEL:\.LCPI[0-9]+_[0-9]+]]: +; ALL: .4byte 1073741820 # 0x3ffffffc +; ALL: .4byte 4 # 0x4 +; ALL: .4byte 8 # 0x8 +; ALL: .4byte 8 # 0x8 +; ALL-LABEL: llvm_mips_srl_w_test_const_vec: +; MIPS32: lw $[[R2:[0-9]+]], %got([[LABEL]])($[[R1:[0-9]+]]) +; MIPS32: addiu $[[R2]], $[[R2]], %lo([[LABEL]]) +; MIPS32: lw $[[R3:[0-9]+]], %got(llvm_mips_srl_w_test_const_vec_res)($[[R1]]) +; MIPS64: ld $[[R2:[0-9]+]], %got_page([[LABEL]])($[[R1:[0-9]+]]) +; MIPS64: daddiu $[[R2]], $[[R2]], %got_ofst([[LABEL]]) +; MIPS64: ld $[[R3:[0-9]+]], %got_disp(llvm_mips_srl_w_test_const_vec_res)($[[R1]]) +; ALL: ld.w $w0, 0($[[R2]]) +; ALL: st.w $w0, 0($[[R3]]) Index: llvm/trunk/test/CodeGen/Mips/msa/shift_no_and.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/msa/shift_no_and.ll +++ llvm/trunk/test/CodeGen/Mips/msa/shift_no_and.ll @@ -0,0 +1,460 @@ +; Test the absence of the andi.b / and.v instructions + +; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck %s + +@llvm_mips_bclr_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_bclr_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_bclr_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_bclr_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_bclr_b_test: +; CHECK-NOT: andi.b +; CHECK: bclr.b + +@llvm_mips_bclr_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_bclr_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_bclr_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_bclr_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_bclr_h_test: +; CHECK-NOT: and.v +; CHECK: bclr.h + +@llvm_mips_bclr_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_bclr_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_bclr_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_bclr_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_bclr_w_test: +; CHECK-NOT: and.v +; CHECK: bclr.w + +@llvm_mips_bclr_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_bclr_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_bclr_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_bclr_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_bclr_d_test: +; CHECK-NOT: and.v +; CHECK: bclr.d + +@llvm_mips_bneg_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_bneg_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_bneg_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_bneg_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_bneg_b_test: +; CHECK-NOT: andi.b +; CHECK: bneg.b + +@llvm_mips_bneg_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_bneg_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_bneg_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_bneg_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_bneg_h_test: +; CHECK-NOT: and.v +; CHECK: bneg.h + +@llvm_mips_bneg_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_bneg_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_bneg_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_bneg_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_bneg_w_test: +; CHECK-NOT: and.v +; CHECK: bneg.w + +@llvm_mips_bneg_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_bneg_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_bneg_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_bneg_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_bneg_d_test: +; CHECK-NOT: and.v +; CHECK: bneg.d + +@llvm_mips_bset_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_bset_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_bset_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_bset_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_bset_b_test: +; CHECK-NOT: andi.b +; CHECK: bset.b + +@llvm_mips_bset_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_bset_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_bset_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_bset_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_bset_h_test: +; CHECK-NOT: and.v +; CHECK: bset.h + +@llvm_mips_bset_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_bset_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_bset_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_bset_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_bset_w_test: +; CHECK-NOT: and.v +; CHECK: bset.w + +@llvm_mips_bset_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_bset_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_bset_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_bset_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bset.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_bset_d_test: +; CHECK-NOT: and.v +; CHECK: bset.d + +@llvm_mips_sll_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_sll_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_sll_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_sll_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.sll.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_sll_b_test: +; CHECK-NOT: andi.b +; CHECK: sll.b + +@llvm_mips_sll_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_sll_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_sll_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_sll_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.sll.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_sll_h_test: +; CHECK-NOT: and.v +; CHECK: sll.h + +@llvm_mips_sll_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_sll_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_sll_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_sll_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_sll_w_test: +; CHECK-NOT: and.v +; CHECK: sll.w + +@llvm_mips_sll_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_sll_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_sll_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_sll_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.sll.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_sll_d_test: +; CHECK-NOT: and.v +; CHECK: sll.d + +@llvm_mips_sra_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_sra_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_sra_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_sra_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.sra.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_sra_b_test: +; CHECK-NOT: andi.b +; CHECK: sra.b + +@llvm_mips_sra_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_sra_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_sra_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_sra_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.sra.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_sra_h_test: +; CHECK-NOT: and.v +; CHECK: sra.h + +@llvm_mips_sra_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_sra_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_sra_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_sra_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_sra_w_test: +; CHECK-NOT: and.v +; CHECK: sra.w + +@llvm_mips_sra_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_sra_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_sra_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_sra_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.sra.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_sra_d_test: +; CHECK-NOT: and.v +; CHECK: sra.d + +@llvm_mips_srl_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_srl_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_srl_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_srl_b_test() nounwind { +entry: + %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1 + %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.srl.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK-LABEL: llvm_mips_srl_b_test: +; CHECK-NOT: andi.b +; CHECK: srl.b + +@llvm_mips_srl_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_srl_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_srl_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_srl_h_test() nounwind { +entry: + %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1 + %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.srl.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK-LABEL: llvm_mips_srl_h_test: +; CHECK-NOT: and.v +; CHECK: srl.h + +@llvm_mips_srl_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_srl_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_srl_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_srl_w_test() nounwind { +entry: + %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1 + %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK-LABEL: llvm_mips_srl_w_test: +; CHECK-NOT: and.v +; CHECK: srl.w + +@llvm_mips_srl_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_srl_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_srl_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_srl_d_test() nounwind { +entry: + %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1 + %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.srl.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK-LABEL: llvm_mips_srl_d_test: +; CHECK-NOT: and.v +; CHECK: srl.d