Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3128,11 +3128,12 @@ // X / 1 -> X // X % 1 -> 0 - if (N1C && N1C->isOne()) - return IsDiv ? N0 : DAG.getConstant(0, DL, VT); // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. + // TODO: Similarly, if we're zero-extending a boolean divisor, then assume + // it's a 1. + if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); } Index: test/CodeGen/Mips/llvm-ir/urem.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/urem.ll +++ test/CodeGen/Mips/llvm-ir/urem.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=mips -mcpu=mips2 -relocation-model=pic | FileCheck %s \ ; RUN: -check-prefixes=ALL,GP32,NOT-R6,NOT-R2-R6 ; RUN: llc < %s -march=mips -mcpu=mips32 -relocation-model=pic | FileCheck %s \ @@ -32,178 +33,597 @@ ; RUN: -check-prefixes=ALL,MMR6,MM32 define signext i1 @urem_i1(i1 signext %a, i1 signext %b) { +; GP32-LABEL: urem_i1: +; GP32: # %bb.0: # %entry +; GP32-NEXT: jr $ra +; GP32-NEXT: addiu $2, $zero, 0 +; GP32-NEXT: .set at +; GP32-NEXT: .set macro +; GP32-NEXT: .set reorder +; GP32-NEXT: .end urem_i1 +; +; GP64-NOT-R6-LABEL: urem_i1: +; GP64-NOT-R6: # %bb.0: # %entry +; GP64-NOT-R6-NEXT: jr $ra +; GP64-NOT-R6-NEXT: addiu $2, $zero, 0 +; GP64-NOT-R6-NEXT: .set at +; GP64-NOT-R6-NEXT: .set macro +; GP64-NOT-R6-NEXT: .set reorder +; GP64-NOT-R6-NEXT: .end urem_i1 +; +; R2-R5-LABEL: urem_i1: +; R2-R5: # %bb.0: # %entry +; R2-R5-NEXT: jr $ra +; R2-R5-NEXT: addiu $2, $zero, 0 +; R2-R5-NEXT: .set at +; R2-R5-NEXT: .set macro +; R2-R5-NEXT: .set reorder +; R2-R5-NEXT: .end urem_i1 +; +; 64R6-LABEL: urem_i1: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: jr $ra +; 64R6-NEXT: addiu $2, $zero, 0 +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i1 +; +; MMR3-LABEL: urem_i1: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: jrc $ra +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i1 +; +; MMR6-LABEL: urem_i1: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: li16 $2, 0 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i1 entry: -; ALL-LABEL: urem_i1: - ; NOT-R6: andi $[[T0:[0-9]+]], $5, 1 - ; NOT-R6: andi $[[T1:[0-9]+]], $4, 1 - ; NOT-R6: divu $zero, $[[T1]], $[[T0]] - ; NOT-R6: teq $[[T0]], $zero, 7 - ; NOT-R6: mfhi $[[T2:[0-9]+]] - ; NOT-R6: andi $[[T0]], $[[T0]], 1 - ; NOT-R6: negu $2, $[[T0]] - ; R6: andi $[[T0:[0-9]+]], $5, 1 - ; R6: andi $[[T1:[0-9]+]], $4, 1 - ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; R6: teq $[[T0]], $zero, 7 - ; R6: negu $2, $[[T2]] - ; MMR3: andi16 $[[T0:[0-9]+]], $5, 1 - ; MMR3: andi16 $[[T1:[0-9]+]], $4, 1 - ; MMR3: divu $zero, $[[T1]], $[[T0]] - ; MMR3: teq $[[T0]], $zero, 7 - ; MMR3: mfhi16 $[[T2:[0-9]+]] - ; MMR3: andi16 $[[T0]], $[[T0]], 1 - ; MMR3: li16 $[[T1:[0-9]+]], 0 - ; MMR3: subu16 $2, $[[T1]], $[[T0]] - ; MMR6: andi16 $[[T0:[0-9]+]], $5, 1 - ; MMR6: andi16 $[[T1:[0-9]+]], $4, 1 - ; MMR6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; MMR6: teq $[[T0]], $zero, 7 - ; MMR6: li16 $[[T3:[0-9]+]], 0 - ; MMR6: subu16 $2, $[[T3]], $[[T2]] %r = urem i1 %a, %b ret i1 %r } define signext i8 @urem_i8(i8 signext %a, i8 signext %b) { +; NOT-R6-LABEL: urem_i8: +; NOT-R6: # %bb.0: # %entry +; NOT-R6-NEXT: andi $1, $5, 255 +; NOT-R6-NEXT: andi $2, $4, 255 +; NOT-R6-NEXT: divu $zero, $2, $1 +; NOT-R6-NEXT: teq $1, $zero, 7 +; NOT-R6-NEXT: mfhi $1 +; NOT-R6-NEXT: jr $ra +; NOT-R6-NEXT: seb $2, $1 +; NOT-R6-NEXT: .set at +; NOT-R6-NEXT: .set macro +; NOT-R6-NEXT: .set reorder +; NOT-R6-NEXT: .end urem_i8 +; +; R2-R5-LABEL: urem_i8: +; R2-R5: # %bb.0: # %entry +; R2-R5-NEXT: andi $1, $5, 255 +; R2-R5-NEXT: andi $2, $4, 255 +; R2-R5-NEXT: divu $zero, $2, $1 +; R2-R5-NEXT: teq $1, $zero, 7 +; R2-R5-NEXT: mfhi $1 +; R2-R5-NEXT: jr $ra +; R2-R5-NEXT: seb $2, $1 +; R2-R5-NEXT: .set at +; R2-R5-NEXT: .set macro +; R2-R5-NEXT: .set reorder +; R2-R5-NEXT: .end urem_i8 +; +; R6-LABEL: urem_i8: +; R6: # %bb.0: # %entry +; R6-NEXT: andi $1, $5, 255 +; R6-NEXT: andi $2, $4, 255 +; R6-NEXT: modu $2, $2, $1 +; R6-NEXT: teq $1, $zero, 7 +; R6-NEXT: jr $ra +; R6-NEXT: seb $2, $2 +; R6-NEXT: .set at +; R6-NEXT: .set macro +; R6-NEXT: .set reorder +; R6-NEXT: .end urem_i8 +; +; 64R6-LABEL: urem_i8: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: andi $1, $5, 255 +; 64R6-NEXT: andi $2, $4, 255 +; 64R6-NEXT: modu $2, $2, $1 +; 64R6-NEXT: teq $1, $zero, 7 +; 64R6-NEXT: jr $ra +; 64R6-NEXT: seb $2, $2 +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i8 +; +; MMR3-LABEL: urem_i8: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: andi16 $2, $5, 255 +; MMR3-NEXT: andi16 $3, $4, 255 +; MMR3-NEXT: divu $zero, $3, $2 +; MMR3-NEXT: teq $2, $zero, 7 +; MMR3-NEXT: mfhi16 $1 +; MMR3-NEXT: jr $ra +; MMR3-NEXT: seb $2, $1 +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i8 +; +; MMR6-LABEL: urem_i8: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: andi16 $2, $5, 255 +; MMR6-NEXT: andi16 $3, $4, 255 +; MMR6-NEXT: modu $1, $3, $2 +; MMR6-NEXT: teq $2, $zero, 7 +; MMR6-NEXT: seb $2, $1 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i8 entry: -; ALL-LABEL: urem_i8: - ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 255 - ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 255 - ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]] - ; NOT-R2-R6: teq $[[T0]], $zero, 7 - ; NOT-R2-R6: mfhi $[[T2:[0-9]+]] - ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 24 - ; NOT-R2-R6: sra $2, $[[T3]], 24 - ; R2-R5: andi $[[T0:[0-9]+]], $5, 255 - ; R2-R5: andi $[[T1:[0-9]+]], $4, 255 - ; R2-R5: divu $zero, $[[T1]], $[[T0]] - ; R2-R5: teq $[[T0]], $zero, 7 - ; R2-R5: mfhi $[[T2:[0-9]+]] - ; R2-R5: seb $2, $[[T2]] - ; R6: andi $[[T0:[0-9]+]], $5, 255 - ; R6: andi $[[T1:[0-9]+]], $4, 255 - ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; R6: teq $[[T0]], $zero, 7 - ; R6: seb $2, $[[T2]] - ; MMR3: andi16 $[[T0:[0-9]+]], $5, 255 - ; MMR3: andi16 $[[T1:[0-9]+]], $4, 255 - ; MMR3: divu $zero, $[[T1]], $[[T0]] - ; MMR3: teq $[[T0]], $zero, 7 - ; MMR3: mfhi16 $[[T2:[0-9]+]] - ; MMR3: seb $2, $[[T2]] - ; MMR6: andi16 $[[T0:[0-9]+]], $5, 255 - ; MMR6: andi16 $[[T1:[0-9]+]], $4, 255 - ; MMR6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; MMR6: teq $[[T0]], $zero, 7 - ; MMR6: seb $2, $[[T2]] %r = urem i8 %a, %b ret i8 %r } define signext i16 @urem_i16(i16 signext %a, i16 signext %b) { +; NOT-R6-LABEL: urem_i16: +; NOT-R6: # %bb.0: # %entry +; NOT-R6-NEXT: andi $1, $5, 65535 +; NOT-R6-NEXT: andi $2, $4, 65535 +; NOT-R6-NEXT: divu $zero, $2, $1 +; NOT-R6-NEXT: teq $1, $zero, 7 +; NOT-R6-NEXT: mfhi $1 +; NOT-R6-NEXT: jr $ra +; NOT-R6-NEXT: seh $2, $1 +; NOT-R6-NEXT: .set at +; NOT-R6-NEXT: .set macro +; NOT-R6-NEXT: .set reorder +; NOT-R6-NEXT: .end urem_i16 +; +; R2-R5-LABEL: urem_i16: +; R2-R5: # %bb.0: # %entry +; R2-R5-NEXT: andi $1, $5, 65535 +; R2-R5-NEXT: andi $2, $4, 65535 +; R2-R5-NEXT: divu $zero, $2, $1 +; R2-R5-NEXT: teq $1, $zero, 7 +; R2-R5-NEXT: mfhi $1 +; R2-R5-NEXT: jr $ra +; R2-R5-NEXT: seh $2, $1 +; R2-R5-NEXT: .set at +; R2-R5-NEXT: .set macro +; R2-R5-NEXT: .set reorder +; R2-R5-NEXT: .end urem_i16 +; +; R6-LABEL: urem_i16: +; R6: # %bb.0: # %entry +; R6-NEXT: andi $1, $5, 65535 +; R6-NEXT: andi $2, $4, 65535 +; R6-NEXT: modu $2, $2, $1 +; R6-NEXT: teq $1, $zero, 7 +; R6-NEXT: jr $ra +; R6-NEXT: seh $2, $2 +; R6-NEXT: .set at +; R6-NEXT: .set macro +; R6-NEXT: .set reorder +; R6-NEXT: .end urem_i16 +; +; 64R6-LABEL: urem_i16: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: andi $1, $5, 65535 +; 64R6-NEXT: andi $2, $4, 65535 +; 64R6-NEXT: modu $2, $2, $1 +; 64R6-NEXT: teq $1, $zero, 7 +; 64R6-NEXT: jr $ra +; 64R6-NEXT: seh $2, $2 +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i16 +; +; MMR3-LABEL: urem_i16: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: andi16 $2, $5, 65535 +; MMR3-NEXT: andi16 $3, $4, 65535 +; MMR3-NEXT: divu $zero, $3, $2 +; MMR3-NEXT: teq $2, $zero, 7 +; MMR3-NEXT: mfhi16 $1 +; MMR3-NEXT: jr $ra +; MMR3-NEXT: seh $2, $1 +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i16 +; +; MMR6-LABEL: urem_i16: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: andi16 $2, $5, 65535 +; MMR6-NEXT: andi16 $3, $4, 65535 +; MMR6-NEXT: modu $1, $3, $2 +; MMR6-NEXT: teq $2, $zero, 7 +; MMR6-NEXT: seh $2, $1 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i16 entry: -; ALL-LABEL: urem_i16: - ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 65535 - ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 65535 - ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]] - ; NOT-R2-R6: teq $[[T0]], $zero, 7 - ; NOT-R2-R6: mfhi $[[T2:[0-9]+]] - ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 16 - ; NOT-R2-R6: sra $2, $[[T3]], 16 - ; R2-R5: andi $[[T0:[0-9]+]], $5, 65535 - ; R2-R5: andi $[[T1:[0-9]+]], $4, 65535 - ; R2-R5: divu $zero, $[[T1]], $[[T0]] - ; R2-R5: teq $[[T0]], $zero, 7 - ; R2-R5: mfhi $[[T3:[0-9]+]] - ; R2-R5: seh $2, $[[T2]] - ; R6: andi $[[T0:[0-9]+]], $5, 65535 - ; R6: andi $[[T1:[0-9]+]], $4, 65535 - ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; R6: teq $[[T0]], $zero, 7 - ; R6: seh $2, $[[T2]] - ; MMR3: andi16 $[[T0:[0-9]+]], $5, 65535 - ; MMR3: andi16 $[[T1:[0-9]+]], $4, 65535 - ; MMR3: divu $zero, $[[T1]], $[[T0]] - ; MMR3: teq $[[T0]], $zero, 7 - ; MMR3: mfhi16 $[[T2:[0-9]+]] - ; MMR3: seh $2, $[[T2]] - ; MMR6: andi16 $[[T0:[0-9]+]], $5, 65535 - ; MMR6: andi16 $[[T1:[0-9]+]], $4, 65535 - ; MMR6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] - ; MMR6: teq $[[T0]], $zero, 7 - ; MMR6: seh $2, $[[T2]] %r = urem i16 %a, %b ret i16 %r } define signext i32 @urem_i32(i32 signext %a, i32 signext %b) { +; NOT-R6-LABEL: urem_i32: +; NOT-R6: # %bb.0: # %entry +; NOT-R6-NEXT: divu $zero, $4, $5 +; NOT-R6-NEXT: teq $5, $zero, 7 +; NOT-R6-NEXT: jr $ra +; NOT-R6-NEXT: mfhi $2 +; NOT-R6-NEXT: .set at +; NOT-R6-NEXT: .set macro +; NOT-R6-NEXT: .set reorder +; NOT-R6-NEXT: .end urem_i32 +; +; R2-R5-LABEL: urem_i32: +; R2-R5: # %bb.0: # %entry +; R2-R5-NEXT: divu $zero, $4, $5 +; R2-R5-NEXT: teq $5, $zero, 7 +; R2-R5-NEXT: jr $ra +; R2-R5-NEXT: mfhi $2 +; R2-R5-NEXT: .set at +; R2-R5-NEXT: .set macro +; R2-R5-NEXT: .set reorder +; R2-R5-NEXT: .end urem_i32 +; +; R6-LABEL: urem_i32: +; R6: # %bb.0: # %entry +; R6-NEXT: modu $2, $4, $5 +; R6-NEXT: teq $5, $zero, 7 +; R6-NEXT: jrc $ra +; R6-NEXT: .set at +; R6-NEXT: .set macro +; R6-NEXT: .set reorder +; R6-NEXT: .end urem_i32 +; +; GP64-NOT-R6-LABEL: urem_i32: +; GP64-NOT-R6: # %bb.0: # %entry +; GP64-NOT-R6-NEXT: divu $zero, $4, $5 +; GP64-NOT-R6-NEXT: teq $5, $zero, 7 +; GP64-NOT-R6-NEXT: jr $ra +; GP64-NOT-R6-NEXT: mfhi $2 +; GP64-NOT-R6-NEXT: .set at +; GP64-NOT-R6-NEXT: .set macro +; GP64-NOT-R6-NEXT: .set reorder +; GP64-NOT-R6-NEXT: .end urem_i32 +; +; 64R6-LABEL: urem_i32: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: modu $2, $4, $5 +; 64R6-NEXT: teq $5, $zero, 7 +; 64R6-NEXT: jrc $ra +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i32 +; +; MMR3-LABEL: urem_i32: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: divu $zero, $4, $5 +; MMR3-NEXT: teq $5, $zero, 7 +; MMR3-NEXT: mfhi16 $2 +; MMR3-NEXT: jrc $ra +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i32 +; +; MMR6-LABEL: urem_i32: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: modu $2, $4, $5 +; MMR6-NEXT: teq $5, $zero, 7 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i32 entry: -; ALL-LABEL: urem_i32: - ; NOT-R6: divu $zero, $4, $5 - ; NOT-R6: teq $5, $zero, 7 - ; NOT-R6: mfhi $2 - ; R6: modu $2, $4, $5 - ; R6: teq $5, $zero, 7 - ; MMR3: divu $zero, $4, $5 - ; MMR3: teq $5, $zero, 7 - ; MMR3: mfhi16 $2 - ; MMR6: modu $2, $4, $5 - ; MMR6: teq $5, $zero, 7 %r = urem i32 %a, %b ret i32 %r } define signext i64 @urem_i64(i64 signext %a, i64 signext %b) { +; NOT-R6-LABEL: urem_i64: +; NOT-R6: # %bb.0: # %entry +; NOT-R6-NEXT: ddivu $zero, $4, $5 +; NOT-R6-NEXT: teq $5, $zero, 7 +; NOT-R6-NEXT: jr $ra +; NOT-R6-NEXT: mfhi $2 +; NOT-R6-NEXT: .set at +; NOT-R6-NEXT: .set macro +; NOT-R6-NEXT: .set reorder +; NOT-R6-NEXT: .end urem_i64 +; +; R2-R6-LABEL: urem_i64: +; R2-R6: # %bb.0: # %entry +; R2-R6-NEXT: dmodu $2, $4, $5 +; R2-R6-NEXT: teq $5, $zero, 7 +; R2-R6-NEXT: jrc $ra +; R2-R6-NEXT: .set at +; R2-R6-NEXT: .set macro +; R2-R6-NEXT: .set reorder +; R2-R6-NEXT: .end urem_i64 +; +; GP64-NOT-R6-LABEL: urem_i64: +; GP64-NOT-R6: # %bb.0: # %entry +; GP64-NOT-R6-NEXT: ddivu $zero, $4, $5 +; GP64-NOT-R6-NEXT: teq $5, $zero, 7 +; GP64-NOT-R6-NEXT: jr $ra +; GP64-NOT-R6-NEXT: mfhi $2 +; GP64-NOT-R6-NEXT: .set at +; GP64-NOT-R6-NEXT: .set macro +; GP64-NOT-R6-NEXT: .set reorder +; GP64-NOT-R6-NEXT: .end urem_i64 +; +; 64R6-LABEL: urem_i64: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: dmodu $2, $4, $5 +; 64R6-NEXT: teq $5, $zero, 7 +; 64R6-NEXT: jrc $ra +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i64 +; +; MMR3-LABEL: urem_i64: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: lui $2, %hi(_gp_disp) +; MMR3-NEXT: addiu $2, $2, %lo(_gp_disp) +; MMR3-NEXT: addiusp -24 +; MMR3-NEXT: .cfi_def_cfa_offset 24 +; MMR3-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: .cfi_offset 31, -4 +; MMR3-NEXT: addu $2, $2, $25 +; MMR3-NEXT: lw $25, %call16(__umoddi3)($2) +; MMR3-NEXT: move $gp, $2 +; MMR3-NEXT: jalr $25 +; MMR3-NEXT: nop +; MMR3-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: addiusp 24 +; MMR3-NEXT: jrc $ra +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i64 +; +; MMR6-LABEL: urem_i64: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: lui $2, %hi(_gp_disp) +; MMR6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MMR6-NEXT: addiu $sp, $sp, -24 +; MMR6-NEXT: .cfi_def_cfa_offset 24 +; MMR6-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR6-NEXT: .cfi_offset 31, -4 +; MMR6-NEXT: addu $2, $2, $25 +; MMR6-NEXT: lw $25, %call16(__umoddi3)($2) +; MMR6-NEXT: move $gp, $2 +; MMR6-NEXT: jalr $25 +; MMR6-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 24 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i64 entry: -; ALL-LABEL: urem_i64: - ; GP32: lw $25, %call16(__umoddi3)($gp) - ; GP64-NOT-R6: ddivu $zero, $4, $5 - ; GP64-NOT-R6: teq $5, $zero, 7 - ; GP64-NOT-R6: mfhi $2 - ; 64R6: dmodu $2, $4, $5 - ; 64R6: teq $5, $zero, 7 - ; MM32: lw $25, %call16(__umoddi3)($2) %r = urem i64 %a, %b ret i64 %r } define signext i128 @urem_i128(i128 signext %a, i128 signext %b) { +; NOT-R6-LABEL: urem_i128: +; NOT-R6: # %bb.0: # %entry +; NOT-R6-NEXT: daddiu $sp, $sp, -16 +; NOT-R6-NEXT: .cfi_def_cfa_offset 16 +; NOT-R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; NOT-R6-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; NOT-R6-NEXT: .cfi_offset 31, -8 +; NOT-R6-NEXT: .cfi_offset 28, -16 +; NOT-R6-NEXT: lui $1, %hi(%neg(%gp_rel(urem_i128))) +; NOT-R6-NEXT: daddu $1, $1, $25 +; NOT-R6-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(urem_i128))) +; NOT-R6-NEXT: ld $25, %call16(__umodti3)($gp) +; NOT-R6-NEXT: jalr $25 +; NOT-R6-NEXT: nop +; NOT-R6-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; NOT-R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; NOT-R6-NEXT: jr $ra +; NOT-R6-NEXT: daddiu $sp, $sp, 16 +; NOT-R6-NEXT: .set at +; NOT-R6-NEXT: .set macro +; NOT-R6-NEXT: .set reorder +; NOT-R6-NEXT: .end urem_i128 +; +; R2-R6-LABEL: urem_i128: +; R2-R6: # %bb.0: # %entry +; R2-R6-NEXT: daddiu $sp, $sp, -16 +; R2-R6-NEXT: .cfi_def_cfa_offset 16 +; R2-R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; R2-R6-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; R2-R6-NEXT: .cfi_offset 31, -8 +; R2-R6-NEXT: .cfi_offset 28, -16 +; R2-R6-NEXT: lui $1, %hi(%neg(%gp_rel(urem_i128))) +; R2-R6-NEXT: daddu $1, $1, $25 +; R2-R6-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(urem_i128))) +; R2-R6-NEXT: ld $25, %call16(__umodti3)($gp) +; R2-R6-NEXT: jalrc $25 +; R2-R6-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; R2-R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; R2-R6-NEXT: jr $ra +; R2-R6-NEXT: daddiu $sp, $sp, 16 +; R2-R6-NEXT: .set at +; R2-R6-NEXT: .set macro +; R2-R6-NEXT: .set reorder +; R2-R6-NEXT: .end urem_i128 +; +; GP64-NOT-R6-LABEL: urem_i128: +; GP64-NOT-R6: # %bb.0: # %entry +; GP64-NOT-R6-NEXT: daddiu $sp, $sp, -16 +; GP64-NOT-R6-NEXT: .cfi_def_cfa_offset 16 +; GP64-NOT-R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; GP64-NOT-R6-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; GP64-NOT-R6-NEXT: .cfi_offset 31, -8 +; GP64-NOT-R6-NEXT: .cfi_offset 28, -16 +; GP64-NOT-R6-NEXT: lui $1, %hi(%neg(%gp_rel(urem_i128))) +; GP64-NOT-R6-NEXT: daddu $1, $1, $25 +; GP64-NOT-R6-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(urem_i128))) +; GP64-NOT-R6-NEXT: ld $25, %call16(__umodti3)($gp) +; GP64-NOT-R6-NEXT: jalr $25 +; GP64-NOT-R6-NEXT: nop +; GP64-NOT-R6-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; GP64-NOT-R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; GP64-NOT-R6-NEXT: jr $ra +; GP64-NOT-R6-NEXT: daddiu $sp, $sp, 16 +; GP64-NOT-R6-NEXT: .set at +; GP64-NOT-R6-NEXT: .set macro +; GP64-NOT-R6-NEXT: .set reorder +; GP64-NOT-R6-NEXT: .end urem_i128 +; +; 64R6-LABEL: urem_i128: +; 64R6: # %bb.0: # %entry +; 64R6-NEXT: daddiu $sp, $sp, -16 +; 64R6-NEXT: .cfi_def_cfa_offset 16 +; 64R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; 64R6-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; 64R6-NEXT: .cfi_offset 31, -8 +; 64R6-NEXT: .cfi_offset 28, -16 +; 64R6-NEXT: lui $1, %hi(%neg(%gp_rel(urem_i128))) +; 64R6-NEXT: daddu $1, $1, $25 +; 64R6-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(urem_i128))) +; 64R6-NEXT: ld $25, %call16(__umodti3)($gp) +; 64R6-NEXT: jalrc $25 +; 64R6-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; 64R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; 64R6-NEXT: jr $ra +; 64R6-NEXT: daddiu $sp, $sp, 16 +; 64R6-NEXT: .set at +; 64R6-NEXT: .set macro +; 64R6-NEXT: .set reorder +; 64R6-NEXT: .end urem_i128 +; +; MMR3-LABEL: urem_i128: +; MMR3: # %bb.0: # %entry +; MMR3-NEXT: lui $2, %hi(_gp_disp) +; MMR3-NEXT: addiu $2, $2, %lo(_gp_disp) +; MMR3-NEXT: addiusp -48 +; MMR3-NEXT: .cfi_def_cfa_offset 48 +; MMR3-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MMR3-NEXT: swp $16, 36($sp) +; MMR3-NEXT: .cfi_offset 31, -4 +; MMR3-NEXT: .cfi_offset 17, -8 +; MMR3-NEXT: .cfi_offset 16, -12 +; MMR3-NEXT: addu $16, $2, $25 +; MMR3-NEXT: move $1, $7 +; MMR3-NEXT: lw $7, 68($sp) +; MMR3-NEXT: lw $17, 72($sp) +; MMR3-NEXT: lw $3, 76($sp) +; MMR3-NEXT: move $2, $sp +; MMR3-NEXT: sw16 $3, 28($2) +; MMR3-NEXT: sw16 $17, 24($2) +; MMR3-NEXT: sw16 $7, 20($2) +; MMR3-NEXT: lw $3, 64($sp) +; MMR3-NEXT: sw16 $3, 16($2) +; MMR3-NEXT: lw $25, %call16(__umodti3)($16) +; MMR3-NEXT: move $7, $1 +; MMR3-NEXT: move $gp, $16 +; MMR3-NEXT: jalr $25 +; MMR3-NEXT: nop +; MMR3-NEXT: lwp $16, 36($sp) +; MMR3-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MMR3-NEXT: addiusp 48 +; MMR3-NEXT: jrc $ra +; MMR3-NEXT: .set at +; MMR3-NEXT: .set macro +; MMR3-NEXT: .set reorder +; MMR3-NEXT: .end urem_i128 +; +; MMR6-LABEL: urem_i128: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: lui $2, %hi(_gp_disp) +; MMR6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MMR6-NEXT: addiu $sp, $sp, -48 +; MMR6-NEXT: .cfi_def_cfa_offset 48 +; MMR6-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $17, 40($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $16, 36($sp) # 4-byte Folded Spill +; MMR6-NEXT: .cfi_offset 31, -4 +; MMR6-NEXT: .cfi_offset 17, -8 +; MMR6-NEXT: .cfi_offset 16, -12 +; MMR6-NEXT: addu $16, $2, $25 +; MMR6-NEXT: move $1, $7 +; MMR6-NEXT: lw $7, 68($sp) +; MMR6-NEXT: lw $17, 72($sp) +; MMR6-NEXT: lw $3, 76($sp) +; MMR6-NEXT: move $2, $sp +; MMR6-NEXT: sw16 $3, 28($2) +; MMR6-NEXT: sw16 $17, 24($2) +; MMR6-NEXT: sw16 $7, 20($2) +; MMR6-NEXT: lw $3, 64($sp) +; MMR6-NEXT: sw16 $3, 16($2) +; MMR6-NEXT: lw $25, %call16(__umodti3)($16) +; MMR6-NEXT: move $7, $1 +; MMR6-NEXT: move $gp, $16 +; MMR6-NEXT: jalr $25 +; MMR6-NEXT: lw $16, 36($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $17, 40($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 48 +; MMR6-NEXT: jrc $ra +; MMR6-NEXT: .set at +; MMR6-NEXT: .set macro +; MMR6-NEXT: .set reorder +; MMR6-NEXT: .end urem_i128 entry: - ; ALL-LABEL: urem_i128: - ; GP32: lw $25, %call16(__umodti3)($gp) - ; GP64-NOT-R6: ld $25, %call16(__umodti3)($gp) - ; 64R6: ld $25, %call16(__umodti3)($gp) - ; MM32: lw $25, %call16(__umodti3)($16) %r = urem i128 %a, %b ret i128 %r Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -3289,322 +3289,16 @@ ; CHECK-LABEL: bool_sdiv: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: negb %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: cbtw -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: negb %sil -; CHECK-NEXT: idivb %sil ; CHECK-NEXT: retq %r = sdiv i1 %x, %y ret i1 %r } define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) { -; SSE2-LABEL: boolvec_sdiv: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: boolvec_sdiv: -; SSE41: # %bb.0: -; SSE41-NEXT: pslld $31, %xmm1 -; SSE41-NEXT: psrad $31, %xmm1 -; SSE41-NEXT: pslld $31, %xmm0 -; SSE41-NEXT: psrad $31, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax -; SSE41-NEXT: pextrd $1, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movd %xmm1, %esi -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %esi -; SSE41-NEXT: movd %eax, %xmm2 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE41-NEXT: pextrd $2, %xmm0, %eax -; SSE41-NEXT: pextrd $2, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm2 -; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: pextrd $3, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: boolvec_sdiv: -; AVX1: # %bb.0: -; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %esi -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_sdiv: -; AVX2: # %bb.0: -; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %esi -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: boolvec_sdiv: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 -; AVX512F-NEXT: kshiftrw $3, %k3, %k0 -; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 -; AVX512F-NEXT: kshiftrw $3, %k4, %k1 -; AVX512F-NEXT: kshiftrw $2, %k3, %k2 -; AVX512F-NEXT: kshiftrw $2, %k4, %k5 -; AVX512F-NEXT: kmovw %k5, %ecx -; AVX512F-NEXT: kshiftrw $1, %k3, %k5 -; AVX512F-NEXT: kmovw %k3, %edi -; AVX512F-NEXT: kshiftrw $1, %k4, %k3 -; AVX512F-NEXT: kmovw %k4, %esi -; AVX512F-NEXT: kmovw %k5, %edx -; AVX512F-NEXT: kmovw %k3, %eax -; AVX512F-NEXT: andb $1, %al -; AVX512F-NEXT: negb %al -; AVX512F-NEXT: # kill: def $al killed $al killed $eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: andb $1, %dl -; AVX512F-NEXT: negb %dl -; AVX512F-NEXT: idivb %dl -; AVX512F-NEXT: movl %eax, %edx -; AVX512F-NEXT: andb $1, %sil -; AVX512F-NEXT: negb %sil -; AVX512F-NEXT: movl %esi, %eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: andb $1, %dil -; AVX512F-NEXT: negb %dil -; AVX512F-NEXT: idivb %dil -; AVX512F-NEXT: movl %eax, %esi -; AVX512F-NEXT: andb $1, %cl -; AVX512F-NEXT: negb %cl -; AVX512F-NEXT: movl %ecx, %eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: kmovw %k2, %ecx -; AVX512F-NEXT: andb $1, %cl -; AVX512F-NEXT: negb %cl -; AVX512F-NEXT: idivb %cl -; AVX512F-NEXT: movl %eax, %ecx -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: andb $1, %al -; AVX512F-NEXT: negb %al -; AVX512F-NEXT: # kill: def $al killed $al killed $eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: kmovw %k0, %edi -; AVX512F-NEXT: andb $1, %dil -; AVX512F-NEXT: negb %dil -; AVX512F-NEXT: idivb %dil -; AVX512F-NEXT: # kill: def $al killed $al def $eax -; AVX512F-NEXT: kmovw %edx, %k0 -; AVX512F-NEXT: kmovw %esi, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k2 -; AVX512F-NEXT: kxorw %k0, %k2, %k0 -; AVX512F-NEXT: kshiftlw $15, %k0, %k0 -; AVX512F-NEXT: kshiftrw $14, %k0, %k0 -; AVX512F-NEXT: kxorw %k0, %k1, %k0 -; AVX512F-NEXT: kshiftrw $2, %k0, %k1 -; AVX512F-NEXT: kmovw %ecx, %k2 -; AVX512F-NEXT: kxorw %k2, %k1, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: kxorw %k1, %k0, %k0 -; AVX512F-NEXT: kshiftlw $13, %k0, %k0 -; AVX512F-NEXT: kshiftrw $13, %k0, %k0 -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $3, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: boolvec_sdiv: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512BW-NEXT: vptestmd %xmm1, %xmm1, %k3 -; AVX512BW-NEXT: kshiftrw $3, %k3, %k0 -; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k4 -; AVX512BW-NEXT: kshiftrw $3, %k4, %k1 -; AVX512BW-NEXT: kshiftrw $2, %k3, %k2 -; AVX512BW-NEXT: kshiftrw $2, %k4, %k5 -; AVX512BW-NEXT: kmovd %k5, %ecx -; AVX512BW-NEXT: kshiftrw $1, %k3, %k5 -; AVX512BW-NEXT: kmovd %k3, %edi -; AVX512BW-NEXT: kshiftrw $1, %k4, %k3 -; AVX512BW-NEXT: kmovd %k4, %esi -; AVX512BW-NEXT: kmovd %k5, %edx -; AVX512BW-NEXT: kmovd %k3, %eax -; AVX512BW-NEXT: andb $1, %al -; AVX512BW-NEXT: negb %al -; AVX512BW-NEXT: # kill: def $al killed $al killed $eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: andb $1, %dl -; AVX512BW-NEXT: negb %dl -; AVX512BW-NEXT: idivb %dl -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: andb $1, %sil -; AVX512BW-NEXT: negb %sil -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: andb $1, %dil -; AVX512BW-NEXT: negb %dil -; AVX512BW-NEXT: idivb %dil -; AVX512BW-NEXT: movl %eax, %esi -; AVX512BW-NEXT: andb $1, %cl -; AVX512BW-NEXT: negb %cl -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: kmovd %k2, %ecx -; AVX512BW-NEXT: andb $1, %cl -; AVX512BW-NEXT: negb %cl -; AVX512BW-NEXT: idivb %cl -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: kmovd %k1, %eax -; AVX512BW-NEXT: andb $1, %al -; AVX512BW-NEXT: negb %al -; AVX512BW-NEXT: # kill: def $al killed $al killed $eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: kmovd %k0, %edi -; AVX512BW-NEXT: andb $1, %dil -; AVX512BW-NEXT: negb %dil -; AVX512BW-NEXT: idivb %dil -; AVX512BW-NEXT: # kill: def $al killed $al def $eax -; AVX512BW-NEXT: kmovd %edx, %k0 -; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: kmovd %ecx, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftlw $13, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $13, %k0, %k0 -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftlw $3, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k1 -; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; XOP-LABEL: boolvec_sdiv: -; XOP: # %bb.0: -; XOP-NEXT: vpslld $31, %xmm1, %xmm1 -; XOP-NEXT: vpsrad $31, %xmm1, %xmm1 -; XOP-NEXT: vpslld $31, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $31, %xmm0, %xmm0 -; XOP-NEXT: vpextrd $1, %xmm0, %eax -; XOP-NEXT: vpextrd $1, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: vmovd %xmm1, %esi -; XOP-NEXT: cltd -; XOP-NEXT: idivl %esi -; XOP-NEXT: vmovd %eax, %xmm2 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $2, %xmm0, %eax -; XOP-NEXT: vpextrd $2, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $3, %xmm0, %eax -; XOP-NEXT: vpextrd $3, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; XOP-NEXT: retq +; CHECK-LABEL: boolvec_sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = sdiv <4 x i1> %x, %y ret <4 x i1> %r } Index: test/CodeGen/X86/combine-srem.ll =================================================================== --- test/CodeGen/X86/combine-srem.ll +++ test/CodeGen/X86/combine-srem.ll @@ -462,16 +462,7 @@ define i1 @bool_srem(i1 %x, i1 %y) { ; CHECK-LABEL: bool_srem: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: negb %al -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: cbtw -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: negb %sil -; CHECK-NEXT: idivb %sil -; CHECK-NEXT: movsbl %ah, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %r = srem i1 %x, %y ret i1 %r @@ -479,61 +470,12 @@ define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) { ; SSE-LABEL: boolvec_srem: ; SSE: # %bb.0: -; SSE-NEXT: pslld $31, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pslld $31, %xmm0 -; SSE-NEXT: psrad $31, %xmm0 -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: pextrd $1, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movd %xmm1, %esi -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: pextrd $2, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm2 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: pextrd $3, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: boolvec_srem: ; AVX: # %bb.0: -; AVX-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: vpextrd $1, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vmovd %xmm1, %esi -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %edx, %xmm2 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: vpextrd $2, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: vpextrd $3, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %r = srem <4 x i1> %x, %y ret <4 x i1> %r Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -911,166 +911,17 @@ define i1 @bool_udiv(i1 %x, i1 %y) { ; CHECK-LABEL: bool_udiv: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: # kill: def $eax killed $eax def $ax -; CHECK-NEXT: divb %sil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = udiv i1 %x, %y ret i1 %r } define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) { -; SSE2-LABEL: boolvec_udiv: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: boolvec_udiv: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax -; SSE41-NEXT: pextrd $1, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movd %xmm1, %esi -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %esi -; SSE41-NEXT: movd %eax, %xmm2 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE41-NEXT: pextrd $2, %xmm0, %eax -; SSE41-NEXT: pextrd $2, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm2 -; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: pextrd $3, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: boolvec_udiv: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %esi -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_udiv: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %esi -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX2-NEXT: retq -; -; XOP-LABEL: boolvec_udiv: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 -; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpextrd $1, %xmm0, %eax -; XOP-NEXT: vpextrd $1, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: vmovd %xmm1, %esi -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %esi -; XOP-NEXT: vmovd %eax, %xmm2 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $2, %xmm0, %eax -; XOP-NEXT: vpextrd $2, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $3, %xmm0, %eax -; XOP-NEXT: vpextrd $3, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; XOP-NEXT: retq +; CHECK-LABEL: boolvec_udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = udiv <4 x i1> %x, %y ret <4 x i1> %r } Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -383,13 +383,7 @@ define i1 @bool_urem(i1 %x, i1 %y) { ; CHECK-LABEL: bool_urem: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: # kill: def $eax killed $eax def $ax -; CHECK-NEXT: divb %sil -; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %r = urem i1 %x, %y ret i1 %r @@ -398,88 +392,13 @@ define <4 x i1> @boolvec_urem(<4 x i1> %x, <4 x i1> %y) { ; SSE-LABEL: boolvec_urem: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: pextrd $1, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movd %xmm1, %esi -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %esi -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: pextrd $2, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm2 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: pextrd $3, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: boolvec_urem: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: movl %edx, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %esi -; AVX1-NEXT: vmovd %edx, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_urem: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: movl %edx, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %esi -; AVX2-NEXT: vmovd %edx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: boolvec_urem: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq %r = urem <4 x i1> %x, %y ret <4 x i1> %r }