Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.h =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.h +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.h @@ -284,6 +284,8 @@ bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool shouldFoldShiftPairToMask(const SDNode *N, + CombineLevel Level) const override; /// Return the register type for a given MVT, ensuring vectors are treated /// as a series of gpr sized integers. Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp @@ -1190,6 +1190,13 @@ return Subtarget.hasMips32(); } +bool MipsTargetLowering::shouldFoldShiftPairToMask(const SDNode *N, + CombineLevel Level) const { + if (N->getOperand(0).getValueType().isVector()) + return false; + return true; +} + void MipsTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, Index: llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll +++ llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6 +; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5 + +declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32) +declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32) + +declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32) +declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32) + +; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 +; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s +define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.d $w0, 0($4) +; MIPSEL64R6-NEXT: srli.d $w0, $w0, 52 +; MIPSEL64R6-NEXT: slli.d $w0, $w0, 51 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.d $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.d $w0, 0($4) +; MIPSEL32R5-NEXT: srli.d $w0, $w0, 52 +; MIPSEL32R5-NEXT: slli.d $w0, $w0, 51 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.d $w0, 0($5) +entry: + %0 = load <2 x i64>, <2 x i64>* %a + %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52) + %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51) + store <2 x i64> %2, <2 x i64>* %b + ret void +} + +; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 +define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.d $w0, 0($4) +; MIPSEL64R6-NEXT: srli.d $w0, $w0, 6 +; MIPSEL64R6-NEXT: slli.d $w0, $w0, 4 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.d $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.d $w0, 0($4) +; MIPSEL32R5-NEXT: srli.d $w0, $w0, 6 +; MIPSEL32R5-NEXT: slli.d $w0, $w0, 4 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.d $w0, 0($5) +entry: + %0 = load <2 x i64>, <2 x i64>* %a + %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6) + %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4) + store <2 x i64> %2, <2 x i64>* %b + ret void +} + +; do not fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2 +; MASK_TYPE2 = 1s | C1 zeros +define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.d $w0, 0($4) +; MIPSEL64R6-NEXT: srli.d $w0, $w0, 4 +; MIPSEL64R6-NEXT: slli.d $w0, $w0, 6 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.d $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.d $w0, 0($4) +; MIPSEL32R5-NEXT: srli.d $w0, $w0, 4 +; MIPSEL32R5-NEXT: slli.d $w0, $w0, 6 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.d $w0, 0($5) +entry: + %0 = load <2 x i64>, <2 x i64>* %a + %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4) + %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6) + store <2 x i64> %2, <2 x i64>* %b + ret void +} + +; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 +define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.w $w0, 0($4) +; MIPSEL64R6-NEXT: srli.w $w0, $w0, 7 +; MIPSEL64R6-NEXT: slli.w $w0, $w0, 3 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.w $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.w $w0, 0($4) +; MIPSEL32R5-NEXT: srli.w $w0, $w0, 7 +; MIPSEL32R5-NEXT: slli.w $w0, $w0, 3 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.w $w0, 0($5) +entry: + %0 = load <4 x i32>, <4 x i32>* %a + %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7) + %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3) + store <4 x i32> %2, <4 x i32>* %b + ret void +} + +; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) +define void @avoid_to_combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.d $w0, 0($4) +; MIPSEL64R6-NEXT: srli.d $w0, $w0, 38 +; MIPSEL64R6-NEXT: slli.d $w0, $w0, 38 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.d $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.d $w0, 0($4) +; MIPSEL32R5-NEXT: srli.d $w0, $w0, 38 +; MIPSEL32R5-NEXT: slli.d $w0, $w0, 38 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.d $w0, 0($5) +entry: + %0 = load <2 x i64>, <2 x i64>* %a + %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38) + %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38) + store <2 x i64> %2, <2 x i64>* %b + ret void +} + +; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) +define void @avoid_to_combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.d $w0, 0($4) +; MIPSEL64R6-NEXT: srli.d $w0, $w0, 3 +; MIPSEL64R6-NEXT: slli.d $w0, $w0, 3 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.d $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.d $w0, 0($4) +; MIPSEL32R5-NEXT: srli.d $w0, $w0, 3 +; MIPSEL32R5-NEXT: slli.d $w0, $w0, 3 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.d $w0, 0($5) +entry: + %0 = load <2 x i64>, <2 x i64>* %a + %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3) + %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3) + store <2 x i64> %2, <2 x i64>* %b + ret void +} + +; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) +define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.w $w0, 0($4) +; MIPSEL64R6-NEXT: srli.w $w0, $w0, 5 +; MIPSEL64R6-NEXT: slli.w $w0, $w0, 5 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.w $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.w $w0, 0($4) +; MIPSEL32R5-NEXT: srli.w $w0, $w0, 5 +; MIPSEL32R5-NEXT: slli.w $w0, $w0, 5 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.w $w0, 0($5) +entry: + %0 = load <4 x i32>, <4 x i32>* %a + %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5) + %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5) + store <4 x i32> %2, <4 x i32>* %b + ret void +} + +; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) +define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) { +; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b: +; MIPSEL64R6: # %bb.0: # %entry +; MIPSEL64R6-NEXT: ld.w $w0, 0($4) +; MIPSEL64R6-NEXT: srli.w $w0, $w0, 30 +; MIPSEL64R6-NEXT: slli.w $w0, $w0, 30 +; MIPSEL64R6-NEXT: jr $ra +; MIPSEL64R6-NEXT: st.w $w0, 0($5) +; +; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b: +; MIPSEL32R5: # %bb.0: # %entry +; MIPSEL32R5-NEXT: ld.w $w0, 0($4) +; MIPSEL32R5-NEXT: srli.w $w0, $w0, 30 +; MIPSEL32R5-NEXT: slli.w $w0, $w0, 30 +; MIPSEL32R5-NEXT: jr $ra +; MIPSEL32R5-NEXT: st.w $w0, 0($5) +entry: + %0 = load <4 x i32>, <4 x i32>* %a + %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30) + %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30) + store <4 x i32> %2, <4 x i32>* %b + ret void +} Index: llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll +++ llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll @@ -1,232 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6 -; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5 - -declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32) -declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32) - -declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32) -declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32) - -; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 -; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s -define void @combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: ld.d $w0, 0($4) -; MIPSEL64R6-NEXT: srli.d $w0, $w0, 1 -; MIPSEL64R6-NEXT: lui $1, 32760 -; MIPSEL64R6-NEXT: dsll32 $1, $1, 0 -; MIPSEL64R6-NEXT: fill.d $w1, $1 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.d $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.d $w0, 0($4) -; MIPSEL32R5-NEXT: srli.d $w0, $w0, 52 -; MIPSEL32R5-NEXT: slli.d $w0, $w0, 51 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.d $w0, 0($5) -entry: - %0 = load <2 x i64>, <2 x i64>* %a - %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52) - %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51) - store <2 x i64> %2, <2 x i64>* %b - ret void -} - -; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 -define void @combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: lui $1, 65535 -; MIPSEL64R6-NEXT: ori $1, $1, 65520 -; MIPSEL64R6-NEXT: lui $2, 16383 -; MIPSEL64R6-NEXT: ori $2, $2, 65535 -; MIPSEL64R6-NEXT: dinsu $1, $2, 32, 32 -; MIPSEL64R6-NEXT: ld.d $w0, 0($4) -; MIPSEL64R6-NEXT: srli.d $w0, $w0, 2 -; MIPSEL64R6-NEXT: fill.d $w1, $1 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.d $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.d $w0, 0($4) -; MIPSEL32R5-NEXT: srli.d $w0, $w0, 6 -; MIPSEL32R5-NEXT: slli.d $w0, $w0, 4 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.d $w0, 0($5) -entry: - %0 = load <2 x i64>, <2 x i64>* %a - %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6) - %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4) - store <2 x i64> %2, <2 x i64>* %b - ret void -} - -; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2 -; MASK_TYPE2 = 1s | C1 zeros -define void @combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: ld.d $w0, 0($4) -; MIPSEL64R6-NEXT: slli.d $w0, $w0, 2 -; MIPSEL64R6-NEXT: ldi.d $w1, -64 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.d $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.d $w0, 0($4) -; MIPSEL32R5-NEXT: srli.d $w0, $w0, 4 -; MIPSEL32R5-NEXT: slli.d $w0, $w0, 6 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.d $w0, 0($5) -entry: - %0 = load <2 x i64>, <2 x i64>* %a - %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4) - %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6) - store <2 x i64> %2, <2 x i64>* %b - ret void -} - -; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2 -define void @combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: ld.w $w0, 0($4) -; MIPSEL64R6-NEXT: srli.w $w0, $w0, 4 -; MIPSEL64R6-NEXT: lui $1, 4095 -; MIPSEL64R6-NEXT: ori $1, $1, 65528 -; MIPSEL64R6-NEXT: fill.w $w1, $1 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.w $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.w $w0, 0($4) -; MIPSEL32R5-NEXT: srli.w $w0, $w0, 4 -; MIPSEL32R5-NEXT: lui $1, 4095 -; MIPSEL32R5-NEXT: ori $1, $1, 65528 -; MIPSEL32R5-NEXT: fill.w $w1, $1 -; MIPSEL32R5-NEXT: and.v $w0, $w0, $w1 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.w $w0, 0($5) -entry: - %0 = load <4 x i32>, <4 x i32>* %a - %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7) - %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3) - store <4 x i32> %2, <4 x i32>* %b - ret void -} - -; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) -define void @combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64_long: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: lui $1, 65535 -; MIPSEL64R6-NEXT: ori $1, $1, 65472 -; MIPSEL64R6-NEXT: dsll32 $1, $1, 0 -; MIPSEL64R6-NEXT: ld.d $w0, 0($4) -; MIPSEL64R6-NEXT: fill.d $w1, $1 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.d $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64_long: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.d $w0, 0($4) -; MIPSEL32R5-NEXT: srli.d $w0, $w0, 38 -; MIPSEL32R5-NEXT: slli.d $w0, $w0, 38 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.d $w0, 0($5) -entry: - %0 = load <2 x i64>, <2 x i64>* %a - %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38) - %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38) - store <2 x i64> %2, <2 x i64>* %b - ret void -} - -; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) -define void @combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: ld.d $w0, 0($4) -; MIPSEL64R6-NEXT: ldi.d $w1, -8 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.d $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.d $w0, 0($4) -; MIPSEL32R5-NEXT: srli.d $w0, $w0, 3 -; MIPSEL32R5-NEXT: slli.d $w0, $w0, 3 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.d $w0, 0($5) -entry: - %0 = load <2 x i64>, <2 x i64>* %a - %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3) - %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3) - store <2 x i64> %2, <2 x i64>* %b - ret void -} - -; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) -define void @combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_a: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: ld.w $w0, 0($4) -; MIPSEL64R6-NEXT: ldi.w $w1, -32 -; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.w $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_a: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: ld.w $w0, 0($4) -; MIPSEL32R5-NEXT: ldi.w $w1, -32 -; MIPSEL32R5-NEXT: and.v $w0, $w0, $w1 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.w $w0, 0($5) -entry: - %0 = load <4 x i32>, <4 x i32>* %a - %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5) - %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5) - store <4 x i32> %2, <4 x i32>* %b - ret void -} - -; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) -define void @combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) { -; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_b: -; MIPSEL64R6: # %bb.0: # %entry -; MIPSEL64R6-NEXT: lui $1, 49152 -; MIPSEL64R6-NEXT: fill.w $w0, $1 -; MIPSEL64R6-NEXT: ld.w $w1, 0($4) -; MIPSEL64R6-NEXT: and.v $w0, $w1, $w0 -; MIPSEL64R6-NEXT: jr $ra -; MIPSEL64R6-NEXT: st.w $w0, 0($5) -; -; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_b: -; MIPSEL32R5: # %bb.0: # %entry -; MIPSEL32R5-NEXT: lui $1, 49152 -; MIPSEL32R5-NEXT: fill.w $w0, $1 -; MIPSEL32R5-NEXT: ld.w $w1, 0($4) -; MIPSEL32R5-NEXT: and.v $w0, $w1, $w0 -; MIPSEL32R5-NEXT: jr $ra -; MIPSEL32R5-NEXT: st.w $w0, 0($5) -entry: - %0 = load <4 x i32>, <4 x i32>* %a - %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30) - %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30) - store <4 x i32> %2, <4 x i32>* %b - ret void -}