diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2790,6 +2790,17 @@ Amt, DAG.getConstant(0, dl, ShTy), ISD::SETEQ); + SDValue NVTBitsM1 = DAG.getConstant(NVTBits - 1, dl, ShTy); + + // Mask all shift amounts to be in a valid range. + // TODO: This is only really needed if the shift will be expanded to a libcall + // later. Otherwise an out of bounds shift produces poison, but the select + // won't let it propagate. + assert(isPowerOf2_32(NVTBits) && "Unexpected VT"); + AmtExcess = DAG.getNode(ISD::AND, dl, ShTy, AmtExcess, NVTBitsM1); + AmtLack = DAG.getNode(ISD::AND, dl, ShTy, AmtLack, NVTBitsM1); + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, NVTBitsM1); + SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -2833,8 +2844,7 @@ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits - HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. - DAG.getConstant(NVTBits - 1, dl, ShTy)); + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, NVTBitsM1); // Sign of Hi part. LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. Lo = DAG.getSelect(dl, NVT, isZero, InL, diff --git a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -396,109 +396,112 @@ define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) { ; MIPS-LABEL: ashr_i128: ; MIPS: # %bb.0: # %entry -; MIPS-NEXT: lw $2, 28($sp) +; MIPS-NEXT: lw $9, 28($sp) +; MIPS-NEXT: andi $2, $9, 63 ; MIPS-NEXT: addiu $1, $zero, 64 -; MIPS-NEXT: subu $9, $1, $2 -; MIPS-NEXT: sllv $10, $5, $9 -; MIPS-NEXT: andi $13, $9, 32 -; MIPS-NEXT: andi $3, $2, 32 -; MIPS-NEXT: addiu $11, $zero, 0 -; MIPS-NEXT: bnez $13, $BB5_2 +; MIPS-NEXT: subu $1, $1, $9 +; MIPS-NEXT: andi $10, $1, 63 +; MIPS-NEXT: sllv $11, $5, $10 +; MIPS-NEXT: andi $14, $1, 32 +; MIPS-NEXT: andi $3, $9, 32 ; MIPS-NEXT: addiu $12, $zero, 0 +; MIPS-NEXT: bnez $14, $BB5_2 +; MIPS-NEXT: addiu $13, $zero, 0 ; MIPS-NEXT: # %bb.1: # %entry -; MIPS-NEXT: move $12, $10 +; MIPS-NEXT: move $13, $11 ; MIPS-NEXT: $BB5_2: # %entry ; MIPS-NEXT: not $8, $2 ; MIPS-NEXT: bnez $3, $BB5_5 -; MIPS-NEXT: srlv $14, $6, $2 +; MIPS-NEXT: srlv $15, $6, $2 ; MIPS-NEXT: # %bb.3: # %entry ; MIPS-NEXT: sll $1, $6, 1 -; MIPS-NEXT: srlv $11, $7, $2 +; MIPS-NEXT: srlv $12, $7, $2 ; MIPS-NEXT: sllv $1, $1, $8 -; MIPS-NEXT: or $15, $1, $11 -; MIPS-NEXT: bnez $13, $BB5_7 -; MIPS-NEXT: move $11, $14 +; MIPS-NEXT: or $24, $1, $12 +; MIPS-NEXT: bnez $14, $BB5_7 +; MIPS-NEXT: move $12, $15 ; MIPS-NEXT: # %bb.4: # %entry ; MIPS-NEXT: b $BB5_6 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_5: -; MIPS-NEXT: bnez $13, $BB5_7 -; MIPS-NEXT: move $15, $14 +; MIPS-NEXT: bnez $14, $BB5_7 +; MIPS-NEXT: move $24, $15 ; MIPS-NEXT: $BB5_6: # %entry -; MIPS-NEXT: sllv $1, $4, $9 -; MIPS-NEXT: not $9, $9 -; MIPS-NEXT: srl $10, $5, 1 -; MIPS-NEXT: srlv $9, $10, $9 -; MIPS-NEXT: or $10, $1, $9 +; MIPS-NEXT: sllv $1, $4, $10 +; MIPS-NEXT: not $10, $10 +; MIPS-NEXT: srl $11, $5, 1 +; MIPS-NEXT: srlv $10, $11, $10 +; MIPS-NEXT: or $11, $1, $10 ; MIPS-NEXT: $BB5_7: # %entry -; MIPS-NEXT: addiu $24, $2, -64 -; MIPS-NEXT: sll $13, $4, 1 -; MIPS-NEXT: srav $14, $4, $24 -; MIPS-NEXT: andi $1, $24, 32 +; MIPS-NEXT: addiu $1, $9, -64 +; MIPS-NEXT: andi $25, $1, 63 +; MIPS-NEXT: sll $14, $4, 1 +; MIPS-NEXT: srav $15, $4, $25 +; MIPS-NEXT: andi $1, $1, 32 ; MIPS-NEXT: bnez $1, $BB5_10 -; MIPS-NEXT: sra $9, $4, 31 +; MIPS-NEXT: sra $10, $4, 31 ; MIPS-NEXT: # %bb.8: # %entry -; MIPS-NEXT: srlv $1, $5, $24 -; MIPS-NEXT: not $24, $24 -; MIPS-NEXT: sllv $24, $13, $24 -; MIPS-NEXT: or $25, $24, $1 -; MIPS-NEXT: move $24, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 +; MIPS-NEXT: srlv $1, $5, $25 +; MIPS-NEXT: not $25, $25 +; MIPS-NEXT: sllv $25, $14, $25 +; MIPS-NEXT: or $gp, $25, $1 +; MIPS-NEXT: move $25, $15 +; MIPS-NEXT: sltiu $15, $9, 64 +; MIPS-NEXT: beqz $15, $BB5_12 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.9: # %entry ; MIPS-NEXT: b $BB5_11 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_10: -; MIPS-NEXT: move $25, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 -; MIPS-NEXT: move $24, $9 +; MIPS-NEXT: move $gp, $15 +; MIPS-NEXT: sltiu $15, $9, 64 +; MIPS-NEXT: beqz $15, $BB5_12 +; MIPS-NEXT: move $25, $10 ; MIPS-NEXT: $BB5_11: -; MIPS-NEXT: or $25, $15, $12 +; MIPS-NEXT: or $gp, $24, $13 ; MIPS-NEXT: $BB5_12: # %entry -; MIPS-NEXT: sltiu $12, $2, 1 -; MIPS-NEXT: beqz $12, $BB5_18 +; MIPS-NEXT: sltiu $9, $9, 1 +; MIPS-NEXT: beqz $9, $BB5_18 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.13: # %entry -; MIPS-NEXT: bnez $14, $BB5_19 +; MIPS-NEXT: bnez $15, $BB5_19 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_14: # %entry -; MIPS-NEXT: beqz $12, $BB5_20 +; MIPS-NEXT: beqz $9, $BB5_20 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_15: # %entry ; MIPS-NEXT: bnez $3, $BB5_21 ; MIPS-NEXT: srav $4, $4, $2 ; MIPS-NEXT: $BB5_16: # %entry ; MIPS-NEXT: srlv $1, $5, $2 -; MIPS-NEXT: sllv $2, $13, $8 +; MIPS-NEXT: sllv $2, $14, $8 ; MIPS-NEXT: or $3, $2, $1 -; MIPS-NEXT: bnez $14, $BB5_23 +; MIPS-NEXT: bnez $15, $BB5_23 ; MIPS-NEXT: move $2, $4 ; MIPS-NEXT: # %bb.17: # %entry ; MIPS-NEXT: b $BB5_22 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_18: # %entry -; MIPS-NEXT: beqz $14, $BB5_14 -; MIPS-NEXT: move $7, $25 +; MIPS-NEXT: beqz $15, $BB5_14 +; MIPS-NEXT: move $7, $gp ; MIPS-NEXT: $BB5_19: -; MIPS-NEXT: bnez $12, $BB5_15 -; MIPS-NEXT: or $24, $11, $10 +; MIPS-NEXT: bnez $9, $BB5_15 +; MIPS-NEXT: or $25, $12, $11 ; MIPS-NEXT: $BB5_20: # %entry -; MIPS-NEXT: move $6, $24 +; MIPS-NEXT: move $6, $25 ; MIPS-NEXT: beqz $3, $BB5_16 ; MIPS-NEXT: srav $4, $4, $2 ; MIPS-NEXT: $BB5_21: -; MIPS-NEXT: move $2, $9 -; MIPS-NEXT: bnez $14, $BB5_23 +; MIPS-NEXT: move $2, $10 +; MIPS-NEXT: bnez $15, $BB5_23 ; MIPS-NEXT: move $3, $4 ; MIPS-NEXT: $BB5_22: # %entry -; MIPS-NEXT: move $2, $9 +; MIPS-NEXT: move $2, $10 ; MIPS-NEXT: $BB5_23: # %entry -; MIPS-NEXT: bnez $14, $BB5_25 +; MIPS-NEXT: bnez $15, $BB5_25 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.24: # %entry -; MIPS-NEXT: move $3, $9 +; MIPS-NEXT: move $3, $10 ; MIPS-NEXT: $BB5_25: # %entry ; MIPS-NEXT: move $4, $6 ; MIPS-NEXT: jr $ra @@ -506,188 +509,215 @@ ; ; MIPS32-LABEL: ashr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) -; MIPS32-NEXT: srlv $1, $7, $9 -; MIPS32-NEXT: not $2, $9 -; MIPS32-NEXT: sll $3, $6, 1 -; MIPS32-NEXT: sllv $3, $3, $2 -; MIPS32-NEXT: addiu $8, $zero, 64 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: srlv $10, $6, $9 -; MIPS32-NEXT: subu $3, $8, $9 -; MIPS32-NEXT: sllv $11, $5, $3 -; MIPS32-NEXT: andi $12, $3, 32 -; MIPS32-NEXT: andi $13, $9, 32 -; MIPS32-NEXT: move $8, $11 -; MIPS32-NEXT: movn $8, $zero, $12 -; MIPS32-NEXT: movn $1, $10, $13 -; MIPS32-NEXT: addiu $14, $9, -64 -; MIPS32-NEXT: srlv $15, $5, $14 -; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $25, $14 -; MIPS32-NEXT: sllv $25, $24, $25 -; MIPS32-NEXT: or $gp, $1, $8 -; MIPS32-NEXT: or $1, $25, $15 -; MIPS32-NEXT: srav $8, $4, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $1, $8, $14 -; MIPS32-NEXT: sllv $15, $4, $3 -; MIPS32-NEXT: not $3, $3 -; MIPS32-NEXT: srl $25, $5, 1 -; MIPS32-NEXT: srlv $3, $25, $3 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $gp, $25 -; MIPS32-NEXT: or $15, $15, $3 -; MIPS32-NEXT: srlv $3, $5, $9 -; MIPS32-NEXT: sllv $2, $24, $2 -; MIPS32-NEXT: or $5, $2, $3 -; MIPS32-NEXT: srav $24, $4, $9 -; MIPS32-NEXT: movn $5, $24, $13 +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 16, -4 +; MIPS32-NEXT: lw $9, 36($sp) +; MIPS32-NEXT: andi $2, $9, 63 +; MIPS32-NEXT: addiu $1, $zero, 64 +; MIPS32-NEXT: subu $1, $1, $9 +; MIPS32-NEXT: srlv $3, $7, $2 +; MIPS32-NEXT: sll $8, $6, 1 +; MIPS32-NEXT: not $10, $2 +; MIPS32-NEXT: sllv $8, $8, $10 +; MIPS32-NEXT: or $3, $8, $3 +; MIPS32-NEXT: srlv $11, $6, $2 +; MIPS32-NEXT: andi $12, $1, 63 +; MIPS32-NEXT: sllv $13, $5, $12 +; MIPS32-NEXT: andi $14, $1, 32 +; MIPS32-NEXT: andi $15, $9, 32 +; MIPS32-NEXT: move $1, $13 +; MIPS32-NEXT: movn $1, $zero, $14 +; MIPS32-NEXT: movn $3, $11, $15 +; MIPS32-NEXT: addiu $24, $9, -64 +; MIPS32-NEXT: andi $8, $24, 63 +; MIPS32-NEXT: srlv $25, $5, $8 +; MIPS32-NEXT: sll $gp, $4, 1 +; MIPS32-NEXT: not $16, $8 +; MIPS32-NEXT: sllv $16, $gp, $16 +; MIPS32-NEXT: or $3, $3, $1 +; MIPS32-NEXT: or $1, $16, $25 +; MIPS32-NEXT: srav $8, $4, $8 +; MIPS32-NEXT: andi $24, $24, 32 +; MIPS32-NEXT: movn $1, $8, $24 +; MIPS32-NEXT: sllv $25, $4, $12 +; MIPS32-NEXT: not $12, $12 +; MIPS32-NEXT: srl $16, $5, 1 +; MIPS32-NEXT: srlv $12, $16, $12 +; MIPS32-NEXT: sltiu $16, $9, 64 +; MIPS32-NEXT: movn $1, $3, $16 +; MIPS32-NEXT: or $12, $25, $12 +; MIPS32-NEXT: srlv $3, $5, $2 +; MIPS32-NEXT: sllv $5, $gp, $10 +; MIPS32-NEXT: or $5, $5, $3 +; MIPS32-NEXT: srav $10, $4, $2 +; MIPS32-NEXT: movn $5, $10, $15 ; MIPS32-NEXT: sra $2, $4, 31 ; MIPS32-NEXT: movz $1, $7, $9 ; MIPS32-NEXT: move $3, $2 -; MIPS32-NEXT: movn $3, $5, $25 -; MIPS32-NEXT: movn $15, $11, $12 -; MIPS32-NEXT: movn $10, $zero, $13 -; MIPS32-NEXT: or $4, $10, $15 -; MIPS32-NEXT: movn $8, $2, $14 -; MIPS32-NEXT: movn $8, $4, $25 +; MIPS32-NEXT: movn $3, $5, $16 +; MIPS32-NEXT: movn $12, $13, $14 +; MIPS32-NEXT: movn $11, $zero, $15 +; MIPS32-NEXT: or $4, $11, $12 +; MIPS32-NEXT: movn $8, $2, $24 +; MIPS32-NEXT: movn $8, $4, $16 ; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $24, $2, $13 -; MIPS32-NEXT: movn $2, $24, $25 +; MIPS32-NEXT: movn $10, $2, $15 +; MIPS32-NEXT: movn $2, $10, $16 ; MIPS32-NEXT: move $4, $8 -; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: move $5, $1 +; MIPS32-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 8 ; ; 32R2-LABEL: ashr_i128: ; 32R2: # %bb.0: # %entry -; 32R2-NEXT: lw $9, 28($sp) -; 32R2-NEXT: srlv $1, $7, $9 -; 32R2-NEXT: not $2, $9 -; 32R2-NEXT: sll $3, $6, 1 -; 32R2-NEXT: sllv $3, $3, $2 -; 32R2-NEXT: addiu $8, $zero, 64 -; 32R2-NEXT: or $1, $3, $1 -; 32R2-NEXT: srlv $10, $6, $9 -; 32R2-NEXT: subu $3, $8, $9 -; 32R2-NEXT: sllv $11, $5, $3 -; 32R2-NEXT: andi $12, $3, 32 -; 32R2-NEXT: andi $13, $9, 32 -; 32R2-NEXT: move $8, $11 -; 32R2-NEXT: movn $8, $zero, $12 -; 32R2-NEXT: movn $1, $10, $13 -; 32R2-NEXT: addiu $14, $9, -64 -; 32R2-NEXT: srlv $15, $5, $14 -; 32R2-NEXT: sll $24, $4, 1 -; 32R2-NEXT: not $25, $14 -; 32R2-NEXT: sllv $25, $24, $25 -; 32R2-NEXT: or $gp, $1, $8 -; 32R2-NEXT: or $1, $25, $15 -; 32R2-NEXT: srav $8, $4, $14 -; 32R2-NEXT: andi $14, $14, 32 -; 32R2-NEXT: movn $1, $8, $14 -; 32R2-NEXT: sllv $15, $4, $3 -; 32R2-NEXT: not $3, $3 -; 32R2-NEXT: srl $25, $5, 1 -; 32R2-NEXT: srlv $3, $25, $3 -; 32R2-NEXT: sltiu $25, $9, 64 -; 32R2-NEXT: movn $1, $gp, $25 -; 32R2-NEXT: or $15, $15, $3 -; 32R2-NEXT: srlv $3, $5, $9 -; 32R2-NEXT: sllv $2, $24, $2 -; 32R2-NEXT: or $5, $2, $3 -; 32R2-NEXT: srav $24, $4, $9 -; 32R2-NEXT: movn $5, $24, $13 +; 32R2-NEXT: addiu $sp, $sp, -8 +; 32R2-NEXT: .cfi_def_cfa_offset 8 +; 32R2-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; 32R2-NEXT: .cfi_offset 16, -4 +; 32R2-NEXT: lw $9, 36($sp) +; 32R2-NEXT: andi $2, $9, 63 +; 32R2-NEXT: addiu $1, $zero, 64 +; 32R2-NEXT: subu $1, $1, $9 +; 32R2-NEXT: srlv $3, $7, $2 +; 32R2-NEXT: sll $8, $6, 1 +; 32R2-NEXT: not $10, $2 +; 32R2-NEXT: sllv $8, $8, $10 +; 32R2-NEXT: or $3, $8, $3 +; 32R2-NEXT: srlv $11, $6, $2 +; 32R2-NEXT: andi $12, $1, 63 +; 32R2-NEXT: sllv $13, $5, $12 +; 32R2-NEXT: andi $14, $1, 32 +; 32R2-NEXT: andi $15, $9, 32 +; 32R2-NEXT: move $1, $13 +; 32R2-NEXT: movn $1, $zero, $14 +; 32R2-NEXT: movn $3, $11, $15 +; 32R2-NEXT: addiu $24, $9, -64 +; 32R2-NEXT: andi $8, $24, 63 +; 32R2-NEXT: srlv $25, $5, $8 +; 32R2-NEXT: sll $gp, $4, 1 +; 32R2-NEXT: not $16, $8 +; 32R2-NEXT: sllv $16, $gp, $16 +; 32R2-NEXT: or $3, $3, $1 +; 32R2-NEXT: or $1, $16, $25 +; 32R2-NEXT: srav $8, $4, $8 +; 32R2-NEXT: andi $24, $24, 32 +; 32R2-NEXT: movn $1, $8, $24 +; 32R2-NEXT: sllv $25, $4, $12 +; 32R2-NEXT: not $12, $12 +; 32R2-NEXT: srl $16, $5, 1 +; 32R2-NEXT: srlv $12, $16, $12 +; 32R2-NEXT: sltiu $16, $9, 64 +; 32R2-NEXT: movn $1, $3, $16 +; 32R2-NEXT: or $12, $25, $12 +; 32R2-NEXT: srlv $3, $5, $2 +; 32R2-NEXT: sllv $5, $gp, $10 +; 32R2-NEXT: or $5, $5, $3 +; 32R2-NEXT: srav $10, $4, $2 +; 32R2-NEXT: movn $5, $10, $15 ; 32R2-NEXT: sra $2, $4, 31 ; 32R2-NEXT: movz $1, $7, $9 ; 32R2-NEXT: move $3, $2 -; 32R2-NEXT: movn $3, $5, $25 -; 32R2-NEXT: movn $15, $11, $12 -; 32R2-NEXT: movn $10, $zero, $13 -; 32R2-NEXT: or $4, $10, $15 -; 32R2-NEXT: movn $8, $2, $14 -; 32R2-NEXT: movn $8, $4, $25 +; 32R2-NEXT: movn $3, $5, $16 +; 32R2-NEXT: movn $12, $13, $14 +; 32R2-NEXT: movn $11, $zero, $15 +; 32R2-NEXT: or $4, $11, $12 +; 32R2-NEXT: movn $8, $2, $24 +; 32R2-NEXT: movn $8, $4, $16 ; 32R2-NEXT: movz $8, $6, $9 -; 32R2-NEXT: movn $24, $2, $13 -; 32R2-NEXT: movn $2, $24, $25 +; 32R2-NEXT: movn $10, $2, $15 +; 32R2-NEXT: movn $2, $10, $16 ; 32R2-NEXT: move $4, $8 -; 32R2-NEXT: jr $ra ; 32R2-NEXT: move $5, $1 +; 32R2-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; 32R2-NEXT: jr $ra +; 32R2-NEXT: addiu $sp, $sp, 8 ; ; 32R6-LABEL: ashr_i128: ; 32R6: # %bb.0: # %entry -; 32R6-NEXT: lw $3, 28($sp) -; 32R6-NEXT: addiu $1, $zero, 64 -; 32R6-NEXT: subu $1, $1, $3 -; 32R6-NEXT: sllv $2, $5, $1 -; 32R6-NEXT: andi $8, $1, 32 -; 32R6-NEXT: selnez $9, $2, $8 -; 32R6-NEXT: sllv $10, $4, $1 -; 32R6-NEXT: not $1, $1 -; 32R6-NEXT: srl $11, $5, 1 -; 32R6-NEXT: srlv $1, $11, $1 -; 32R6-NEXT: or $1, $10, $1 -; 32R6-NEXT: seleqz $1, $1, $8 -; 32R6-NEXT: or $1, $9, $1 -; 32R6-NEXT: srlv $9, $7, $3 -; 32R6-NEXT: not $10, $3 -; 32R6-NEXT: sll $11, $6, 1 -; 32R6-NEXT: sllv $11, $11, $10 -; 32R6-NEXT: or $9, $11, $9 -; 32R6-NEXT: andi $11, $3, 32 +; 32R6-NEXT: addiu $sp, $sp, -8 +; 32R6-NEXT: .cfi_def_cfa_offset 8 +; 32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; 32R6-NEXT: .cfi_offset 16, -4 +; 32R6-NEXT: lw $1, 36($sp) +; 32R6-NEXT: addiu $2, $zero, 64 +; 32R6-NEXT: subu $2, $2, $1 +; 32R6-NEXT: andi $3, $2, 63 +; 32R6-NEXT: sllv $8, $4, $3 +; 32R6-NEXT: not $9, $3 +; 32R6-NEXT: srl $10, $5, 1 +; 32R6-NEXT: srlv $9, $10, $9 +; 32R6-NEXT: or $8, $8, $9 +; 32R6-NEXT: sllv $3, $5, $3 +; 32R6-NEXT: andi $2, $2, 32 +; 32R6-NEXT: selnez $9, $3, $2 +; 32R6-NEXT: seleqz $8, $8, $2 +; 32R6-NEXT: andi $10, $1, 63 +; 32R6-NEXT: srlv $11, $7, $10 +; 32R6-NEXT: sll $12, $6, 1 +; 32R6-NEXT: not $13, $10 +; 32R6-NEXT: sllv $12, $12, $13 +; 32R6-NEXT: or $8, $9, $8 +; 32R6-NEXT: or $9, $12, $11 +; 32R6-NEXT: andi $11, $1, 32 ; 32R6-NEXT: seleqz $9, $9, $11 -; 32R6-NEXT: srlv $12, $6, $3 -; 32R6-NEXT: selnez $13, $12, $11 +; 32R6-NEXT: srlv $12, $6, $10 +; 32R6-NEXT: selnez $14, $12, $11 ; 32R6-NEXT: seleqz $12, $12, $11 -; 32R6-NEXT: or $1, $12, $1 -; 32R6-NEXT: seleqz $2, $2, $8 -; 32R6-NEXT: or $8, $13, $9 -; 32R6-NEXT: addiu $9, $3, -64 -; 32R6-NEXT: srlv $12, $5, $9 -; 32R6-NEXT: sll $13, $4, 1 -; 32R6-NEXT: not $14, $9 -; 32R6-NEXT: sllv $14, $13, $14 -; 32R6-NEXT: sltiu $15, $3, 64 -; 32R6-NEXT: or $2, $8, $2 -; 32R6-NEXT: selnez $1, $1, $15 -; 32R6-NEXT: or $8, $14, $12 -; 32R6-NEXT: srav $12, $4, $9 -; 32R6-NEXT: andi $9, $9, 32 -; 32R6-NEXT: seleqz $14, $12, $9 -; 32R6-NEXT: sra $24, $4, 31 -; 32R6-NEXT: selnez $25, $24, $9 -; 32R6-NEXT: seleqz $8, $8, $9 -; 32R6-NEXT: or $14, $25, $14 -; 32R6-NEXT: seleqz $14, $14, $15 -; 32R6-NEXT: selnez $9, $12, $9 -; 32R6-NEXT: seleqz $12, $24, $15 -; 32R6-NEXT: or $1, $1, $14 -; 32R6-NEXT: selnez $14, $1, $3 -; 32R6-NEXT: selnez $1, $2, $15 -; 32R6-NEXT: or $2, $9, $8 -; 32R6-NEXT: srav $8, $4, $3 -; 32R6-NEXT: seleqz $4, $8, $11 -; 32R6-NEXT: selnez $9, $24, $11 +; 32R6-NEXT: sltiu $15, $1, 64 +; 32R6-NEXT: or $8, $12, $8 +; 32R6-NEXT: seleqz $2, $3, $2 +; 32R6-NEXT: or $3, $14, $9 +; 32R6-NEXT: sra $9, $4, 31 +; 32R6-NEXT: selnez $12, $9, $11 +; 32R6-NEXT: srav $14, $4, $10 +; 32R6-NEXT: seleqz $24, $14, $11 +; 32R6-NEXT: addiu $25, $1, -64 +; 32R6-NEXT: andi $gp, $25, 32 +; 32R6-NEXT: or $2, $3, $2 +; 32R6-NEXT: or $3, $12, $24 +; 32R6-NEXT: seleqz $12, $9, $15 +; 32R6-NEXT: selnez $8, $8, $15 +; 32R6-NEXT: selnez $9, $9, $gp +; 32R6-NEXT: andi $24, $25, 63 +; 32R6-NEXT: srav $25, $4, $24 +; 32R6-NEXT: seleqz $16, $25, $gp +; 32R6-NEXT: or $9, $9, $16 +; 32R6-NEXT: seleqz $9, $9, $15 +; 32R6-NEXT: or $8, $8, $9 +; 32R6-NEXT: selnez $8, $8, $1 +; 32R6-NEXT: selnez $3, $3, $15 +; 32R6-NEXT: seleqz $6, $6, $1 +; 32R6-NEXT: seleqz $7, $7, $1 +; 32R6-NEXT: selnez $2, $2, $15 +; 32R6-NEXT: srlv $9, $5, $24 +; 32R6-NEXT: sll $16, $4, 1 +; 32R6-NEXT: not $4, $24 +; 32R6-NEXT: sllv $4, $16, $4 +; 32R6-NEXT: or $4, $4, $9 +; 32R6-NEXT: seleqz $4, $4, $gp +; 32R6-NEXT: selnez $9, $25, $gp ; 32R6-NEXT: or $4, $9, $4 -; 32R6-NEXT: selnez $9, $4, $15 -; 32R6-NEXT: seleqz $2, $2, $15 -; 32R6-NEXT: seleqz $4, $6, $3 -; 32R6-NEXT: seleqz $6, $7, $3 -; 32R6-NEXT: or $1, $1, $2 -; 32R6-NEXT: selnez $1, $1, $3 -; 32R6-NEXT: or $1, $6, $1 -; 32R6-NEXT: or $4, $4, $14 -; 32R6-NEXT: or $2, $9, $12 -; 32R6-NEXT: srlv $3, $5, $3 -; 32R6-NEXT: sllv $5, $13, $10 +; 32R6-NEXT: seleqz $4, $4, $15 +; 32R6-NEXT: or $2, $2, $4 +; 32R6-NEXT: selnez $1, $2, $1 +; 32R6-NEXT: or $1, $7, $1 +; 32R6-NEXT: or $4, $6, $8 +; 32R6-NEXT: or $2, $3, $12 +; 32R6-NEXT: srlv $3, $5, $10 +; 32R6-NEXT: sllv $5, $16, $13 ; 32R6-NEXT: or $3, $5, $3 ; 32R6-NEXT: seleqz $3, $3, $11 -; 32R6-NEXT: selnez $5, $8, $11 +; 32R6-NEXT: selnez $5, $14, $11 ; 32R6-NEXT: or $3, $5, $3 ; 32R6-NEXT: selnez $3, $3, $15 ; 32R6-NEXT: or $3, $3, $12 -; 32R6-NEXT: jr $ra ; 32R6-NEXT: move $5, $1 +; 32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; 32R6-NEXT: jr $ra +; 32R6-NEXT: addiu $sp, $sp, 8 ; ; MIPS3-LABEL: ashr_i128: ; MIPS3: # %bb.0: # %entry @@ -760,88 +790,97 @@ ; ; MMR3-LABEL: ashr_i128: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: addiusp -48 -; MMR3-NEXT: .cfi_def_cfa_offset 48 -; MMR3-NEXT: swp $16, 40($sp) +; MMR3-NEXT: addiusp -56 +; MMR3-NEXT: .cfi_def_cfa_offset 56 +; MMR3-NEXT: swp $16, 48($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 ; MMR3-NEXT: move $8, $7 +; MMR3-NEXT: move $3, $6 ; MMR3-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: srlv $4, $7, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $3, $2, $3 +; MMR3-NEXT: sw $4, 40($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $4, 84($sp) +; MMR3-NEXT: sw $4, 36($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $6, $4, 63 +; MMR3-NEXT: sw $6, 20($sp) # 4-byte Folded Spill ; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: srlv $6, $6, $16 -; MMR3-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: subu16 $7, $2, $16 -; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: andi16 $2, $7, 32 -; MMR3-NEXT: sw $2, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $5, $16, 32 -; MMR3-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $4, $9 +; MMR3-NEXT: subu16 $17, $2, $4 +; MMR3-NEXT: srlv $4, $7, $6 +; MMR3-NEXT: sll16 $2, $3, 1 +; MMR3-NEXT: not16 $7, $6 +; MMR3-NEXT: sw $7, 24($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $2, $2, $7 +; MMR3-NEXT: or16 $2, $4 +; MMR3-NEXT: srlv $16, $3, $6 +; MMR3-NEXT: andi16 $3, $17, 63 +; MMR3-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $9, $5, $3 +; MMR3-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $4, $17, 32 +; MMR3-NEXT: sw $4, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $7, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: andi16 $3, $7, 32 +; MMR3-NEXT: sw $3, 44($sp) # 4-byte Folded Spill ; MMR3-NEXT: li16 $17, 0 -; MMR3-NEXT: movn $4, $17, $2 -; MMR3-NEXT: movn $3, $6, $5 -; MMR3-NEXT: addiu $2, $16, -64 -; MMR3-NEXT: lw $5, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $5, $5, $2 -; MMR3-NEXT: sw $5, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $17, 1 -; MMR3-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $5, $2 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $4 -; MMR3-NEXT: srav $1, $17, $2 -; MMR3-NEXT: andi16 $2, $2, 32 -; MMR3-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $2 -; MMR3-NEXT: sllv $2, $17, $7 -; MMR3-NEXT: not16 $4, $7 +; MMR3-NEXT: move $6, $9 +; MMR3-NEXT: movn $6, $17, $4 +; MMR3-NEXT: movn $2, $16, $3 +; MMR3-NEXT: addiu $17, $7, -64 +; MMR3-NEXT: andi16 $4, $17, 63 +; MMR3-NEXT: srlv $3, $5, $4 +; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $3, 40($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $7, $3, 1 +; MMR3-NEXT: sw $7, 8($sp) # 4-byte Folded Spill +; MMR3-NEXT: not16 $5, $4 +; MMR3-NEXT: sllv $5, $7, $5 +; MMR3-NEXT: or16 $2, $6 +; MMR3-NEXT: lw $6, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: or16 $5, $6 +; MMR3-NEXT: srav $1, $3, $4 +; MMR3-NEXT: andi16 $4, $17, 32 +; MMR3-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $5, $1, $4 +; MMR3-NEXT: lw $4, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $6, $3, $4 +; MMR3-NEXT: not16 $4, $4 +; MMR3-NEXT: lw $3, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: srl16 $17, $3, 1 +; MMR3-NEXT: srlv $4, $17, $4 ; MMR3-NEXT: lw $7, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $6, $7, 1 -; MMR3-NEXT: srlv $6, $6, $4 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $5, $3, $10 -; MMR3-NEXT: or16 $6, $2 -; MMR3-NEXT: srlv $2, $7, $16 +; MMR3-NEXT: sltiu $10, $7, 64 +; MMR3-NEXT: movn $5, $2, $10 +; MMR3-NEXT: or16 $4, $6 +; MMR3-NEXT: lw $6, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $2, $3, $6 ; MMR3-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $4, $3 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: srav $11, $17, $16 +; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $17, $17, $3 +; MMR3-NEXT: or16 $17, $2 +; MMR3-NEXT: lw $2, 40($sp) # 4-byte Folded Reload +; MMR3-NEXT: srav $11, $2, $6 +; MMR3-NEXT: lw $3, 44($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $17, $11, $3 +; MMR3-NEXT: sra $2, $2, 31 +; MMR3-NEXT: movz $5, $8, $7 +; MMR3-NEXT: move $3, $2 +; MMR3-NEXT: movn $3, $17, $10 +; MMR3-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $4, $9, $17 +; MMR3-NEXT: lw $6, 44($sp) # 4-byte Folded Reload +; MMR3-NEXT: li16 $17, 0 +; MMR3-NEXT: movn $16, $17, $6 +; MMR3-NEXT: or16 $16, $4 ; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $11, $4 -; MMR3-NEXT: sra $2, $17, 31 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: move $8, $2 -; MMR3-NEXT: movn $8, $3, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $6, $9, $3 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: lw $7, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $7, $3, $4 -; MMR3-NEXT: or16 $7, $6 -; MMR3-NEXT: lw $3, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $1, $2, $3 -; MMR3-NEXT: movn $1, $7, $10 -; MMR3-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $3, $16 -; MMR3-NEXT: movn $11, $2, $4 +; MMR3-NEXT: movn $1, $2, $4 +; MMR3-NEXT: movn $1, $16, $10 +; MMR3-NEXT: lw $4, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $1, $4, $7 +; MMR3-NEXT: movn $11, $2, $6 ; MMR3-NEXT: movn $2, $11, $10 -; MMR3-NEXT: move $3, $8 ; MMR3-NEXT: move $4, $1 -; MMR3-NEXT: lwp $16, 40($sp) -; MMR3-NEXT: addiusp 48 +; MMR3-NEXT: lwp $16, 48($sp) +; MMR3-NEXT: addiusp 56 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: ashr_i128: @@ -853,78 +892,86 @@ ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 ; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: li16 $2, 64 -; MMR6-NEXT: subu16 $7, $2, $3 -; MMR6-NEXT: sllv $8, $5, $7 -; MMR6-NEXT: andi16 $2, $7, 32 -; MMR6-NEXT: selnez $9, $8, $2 -; MMR6-NEXT: sllv $10, $4, $7 -; MMR6-NEXT: not16 $7, $7 -; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $7, $16, $7 -; MMR6-NEXT: or $7, $10, $7 -; MMR6-NEXT: seleqz $7, $7, $2 -; MMR6-NEXT: or $7, $9, $7 -; MMR6-NEXT: srlv $9, $1, $3 -; MMR6-NEXT: not16 $16, $3 -; MMR6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $17, $6, 1 -; MMR6-NEXT: sllv $10, $17, $16 -; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: andi16 $17, $3, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: srlv $10, $6, $3 -; MMR6-NEXT: selnez $11, $10, $17 +; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: move $6, $5 +; MMR6-NEXT: move $5, $4 +; MMR6-NEXT: lw $2, 44($sp) +; MMR6-NEXT: li16 $3, 64 +; MMR6-NEXT: subu16 $3, $3, $2 +; MMR6-NEXT: andi16 $7, $3, 63 +; MMR6-NEXT: sllv $8, $4, $7 +; MMR6-NEXT: not16 $16, $7 +; MMR6-NEXT: srl16 $17, $6, 1 +; MMR6-NEXT: srlv $9, $17, $16 +; MMR6-NEXT: or $8, $8, $9 +; MMR6-NEXT: sllv $9, $6, $7 +; MMR6-NEXT: andi16 $3, $3, 32 +; MMR6-NEXT: selnez $10, $9, $3 +; MMR6-NEXT: seleqz $8, $8, $3 +; MMR6-NEXT: andi16 $7, $2, 63 +; MMR6-NEXT: srlv $11, $1, $7 +; MMR6-NEXT: lw $4, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sll16 $17, $4, 1 +; MMR6-NEXT: not16 $16, $7 +; MMR6-NEXT: sw $16, 0($sp) # 4-byte Folded Spill +; MMR6-NEXT: sllv $12, $17, $16 +; MMR6-NEXT: or $8, $10, $8 +; MMR6-NEXT: or $10, $12, $11 +; MMR6-NEXT: andi16 $17, $2, 32 ; MMR6-NEXT: seleqz $10, $10, $17 -; MMR6-NEXT: or $10, $10, $7 -; MMR6-NEXT: seleqz $12, $8, $2 -; MMR6-NEXT: or $8, $11, $9 -; MMR6-NEXT: addiu $2, $3, -64 -; MMR6-NEXT: srlv $9, $5, $2 -; MMR6-NEXT: sll16 $7, $4, 1 -; MMR6-NEXT: not16 $16, $2 -; MMR6-NEXT: sllv $11, $7, $16 -; MMR6-NEXT: sltiu $13, $3, 64 -; MMR6-NEXT: or $8, $8, $12 -; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: or $9, $11, $9 -; MMR6-NEXT: srav $11, $4, $2 -; MMR6-NEXT: andi16 $2, $2, 32 -; MMR6-NEXT: seleqz $12, $11, $2 -; MMR6-NEXT: sra $14, $4, 31 -; MMR6-NEXT: selnez $15, $14, $2 -; MMR6-NEXT: seleqz $9, $9, $2 -; MMR6-NEXT: or $12, $15, $12 -; MMR6-NEXT: seleqz $12, $12, $13 -; MMR6-NEXT: selnez $2, $11, $2 -; MMR6-NEXT: seleqz $11, $14, $13 -; MMR6-NEXT: or $10, $10, $12 -; MMR6-NEXT: selnez $10, $10, $3 +; MMR6-NEXT: srlv $11, $4, $7 +; MMR6-NEXT: selnez $12, $11, $17 +; MMR6-NEXT: seleqz $11, $11, $17 +; MMR6-NEXT: sltiu $13, $2, 64 +; MMR6-NEXT: or $8, $11, $8 +; MMR6-NEXT: seleqz $9, $9, $3 +; MMR6-NEXT: or $10, $12, $10 +; MMR6-NEXT: sra $11, $5, 31 +; MMR6-NEXT: selnez $12, $11, $17 +; MMR6-NEXT: srav $14, $5, $7 +; MMR6-NEXT: seleqz $15, $14, $17 +; MMR6-NEXT: addiu $4, $2, -64 +; MMR6-NEXT: andi16 $3, $4, 32 +; MMR6-NEXT: or $9, $10, $9 +; MMR6-NEXT: or $10, $12, $15 +; MMR6-NEXT: seleqz $12, $11, $13 ; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: or $2, $2, $9 -; MMR6-NEXT: srav $9, $4, $3 -; MMR6-NEXT: seleqz $4, $9, $17 -; MMR6-NEXT: selnez $12, $14, $17 -; MMR6-NEXT: or $4, $12, $4 -; MMR6-NEXT: selnez $12, $4, $13 -; MMR6-NEXT: seleqz $2, $2, $13 -; MMR6-NEXT: seleqz $4, $6, $3 -; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $8, $2 -; MMR6-NEXT: selnez $2, $2, $3 +; MMR6-NEXT: selnez $11, $11, $3 +; MMR6-NEXT: andi16 $4, $4, 63 +; MMR6-NEXT: srav $15, $5, $4 +; MMR6-NEXT: seleqz $24, $15, $3 +; MMR6-NEXT: or $11, $11, $24 +; MMR6-NEXT: seleqz $11, $11, $13 +; MMR6-NEXT: or $8, $8, $11 +; MMR6-NEXT: selnez $8, $8, $2 +; MMR6-NEXT: selnez $10, $10, $13 +; MMR6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: seleqz $11, $16, $2 +; MMR6-NEXT: seleqz $1, $1, $2 +; MMR6-NEXT: selnez $9, $9, $13 +; MMR6-NEXT: srlv $24, $6, $4 +; MMR6-NEXT: sll16 $5, $5, 1 +; MMR6-NEXT: not16 $4, $4 +; MMR6-NEXT: sllv $4, $5, $4 +; MMR6-NEXT: or $4, $4, $24 +; MMR6-NEXT: seleqz $4, $4, $3 +; MMR6-NEXT: selnez $3, $15, $3 +; MMR6-NEXT: or $3, $3, $4 +; MMR6-NEXT: seleqz $3, $3, $13 +; MMR6-NEXT: or $3, $9, $3 +; MMR6-NEXT: selnez $2, $3, $2 ; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: or $2, $12, $11 -; MMR6-NEXT: srlv $3, $5, $3 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $5, $7, $5 +; MMR6-NEXT: or $4, $11, $8 +; MMR6-NEXT: or $2, $10, $12 +; MMR6-NEXT: srlv $3, $6, $7 +; MMR6-NEXT: lw $6, 0($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $5, $5, $6 ; MMR6-NEXT: or $3, $5, $3 ; MMR6-NEXT: seleqz $3, $3, $17 -; MMR6-NEXT: selnez $5, $9, $17 +; MMR6-NEXT: selnez $5, $14, $17 ; MMR6-NEXT: or $3, $5, $3 ; MMR6-NEXT: selnez $3, $3, $13 -; MMR6-NEXT: or $3, $3, $11 +; MMR6-NEXT: or $3, $3, $12 ; MMR6-NEXT: move $5, $1 ; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload ; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -396,111 +396,114 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MIPS2-LABEL: lshr_i128: ; MIPS2: # %bb.0: # %entry -; MIPS2-NEXT: lw $2, 28($sp) +; MIPS2-NEXT: lw $8, 28($sp) +; MIPS2-NEXT: andi $2, $8, 63 ; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $12, $1, $2 -; MIPS2-NEXT: sllv $10, $5, $12 -; MIPS2-NEXT: andi $15, $12, 32 -; MIPS2-NEXT: andi $8, $2, 32 +; MIPS2-NEXT: subu $1, $1, $8 +; MIPS2-NEXT: andi $13, $1, 63 +; MIPS2-NEXT: sllv $11, $5, $13 +; MIPS2-NEXT: andi $24, $1, 32 +; MIPS2-NEXT: andi $9, $8, 32 ; MIPS2-NEXT: addiu $3, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_2 -; MIPS2-NEXT: addiu $13, $zero, 0 +; MIPS2-NEXT: bnez $24, $BB5_2 +; MIPS2-NEXT: addiu $14, $zero, 0 ; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: move $13, $10 +; MIPS2-NEXT: move $14, $11 ; MIPS2-NEXT: $BB5_2: # %entry -; MIPS2-NEXT: not $9, $2 -; MIPS2-NEXT: bnez $8, $BB5_5 -; MIPS2-NEXT: srlv $24, $6, $2 +; MIPS2-NEXT: not $10, $2 +; MIPS2-NEXT: bnez $9, $BB5_5 +; MIPS2-NEXT: srlv $25, $6, $2 ; MIPS2-NEXT: # %bb.3: # %entry ; MIPS2-NEXT: sll $1, $6, 1 -; MIPS2-NEXT: srlv $11, $7, $2 -; MIPS2-NEXT: sllv $1, $1, $9 -; MIPS2-NEXT: or $14, $1, $11 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $11, $24 +; MIPS2-NEXT: srlv $12, $7, $2 +; MIPS2-NEXT: sllv $1, $1, $10 +; MIPS2-NEXT: or $15, $1, $12 +; MIPS2-NEXT: bnez $24, $BB5_7 +; MIPS2-NEXT: move $12, $25 ; MIPS2-NEXT: # %bb.4: # %entry ; MIPS2-NEXT: b $BB5_6 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_5: -; MIPS2-NEXT: addiu $11, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $14, $24 +; MIPS2-NEXT: addiu $12, $zero, 0 +; MIPS2-NEXT: bnez $24, $BB5_7 +; MIPS2-NEXT: move $15, $25 ; MIPS2-NEXT: $BB5_6: # %entry -; MIPS2-NEXT: sllv $1, $4, $12 -; MIPS2-NEXT: not $10, $12 -; MIPS2-NEXT: srl $12, $5, 1 -; MIPS2-NEXT: srlv $10, $12, $10 -; MIPS2-NEXT: or $10, $1, $10 +; MIPS2-NEXT: sllv $1, $4, $13 +; MIPS2-NEXT: not $11, $13 +; MIPS2-NEXT: srl $13, $5, 1 +; MIPS2-NEXT: srlv $11, $13, $11 +; MIPS2-NEXT: or $11, $1, $11 ; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: addiu $15, $2, -64 -; MIPS2-NEXT: sll $12, $4, 1 -; MIPS2-NEXT: andi $1, $15, 32 +; MIPS2-NEXT: addiu $1, $8, -64 +; MIPS2-NEXT: andi $24, $1, 63 +; MIPS2-NEXT: sll $13, $4, 1 +; MIPS2-NEXT: andi $1, $1, 32 ; MIPS2-NEXT: bnez $1, $BB5_10 -; MIPS2-NEXT: srlv $25, $4, $15 +; MIPS2-NEXT: srlv $gp, $4, $24 ; MIPS2-NEXT: # %bb.8: # %entry -; MIPS2-NEXT: srlv $1, $5, $15 -; MIPS2-NEXT: not $15, $15 -; MIPS2-NEXT: sllv $15, $12, $15 -; MIPS2-NEXT: or $24, $15, $1 -; MIPS2-NEXT: move $15, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 +; MIPS2-NEXT: srlv $1, $5, $24 +; MIPS2-NEXT: not $24, $24 +; MIPS2-NEXT: sllv $24, $13, $24 +; MIPS2-NEXT: or $25, $24, $1 +; MIPS2-NEXT: move $24, $gp +; MIPS2-NEXT: sltiu $gp, $8, 64 +; MIPS2-NEXT: beqz $gp, $BB5_12 ; MIPS2-NEXT: nop ; MIPS2-NEXT: # %bb.9: # %entry ; MIPS2-NEXT: b $BB5_11 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_10: -; MIPS2-NEXT: move $24, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 -; MIPS2-NEXT: addiu $15, $zero, 0 +; MIPS2-NEXT: move $25, $gp +; MIPS2-NEXT: sltiu $gp, $8, 64 +; MIPS2-NEXT: beqz $gp, $BB5_12 +; MIPS2-NEXT: addiu $24, $zero, 0 ; MIPS2-NEXT: $BB5_11: -; MIPS2-NEXT: or $24, $14, $13 +; MIPS2-NEXT: or $25, $15, $14 ; MIPS2-NEXT: $BB5_12: # %entry -; MIPS2-NEXT: sltiu $13, $2, 1 -; MIPS2-NEXT: beqz $13, $BB5_19 +; MIPS2-NEXT: sltiu $14, $8, 1 +; MIPS2-NEXT: beqz $14, $BB5_19 ; MIPS2-NEXT: nop ; MIPS2-NEXT: # %bb.13: # %entry -; MIPS2-NEXT: bnez $25, $BB5_20 +; MIPS2-NEXT: bnez $gp, $BB5_20 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 +; MIPS2-NEXT: bnez $14, $BB5_16 +; MIPS2-NEXT: addiu $11, $zero, 63 ; MIPS2-NEXT: $BB5_15: # %entry -; MIPS2-NEXT: move $6, $15 +; MIPS2-NEXT: move $6, $24 ; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: sltu $10, $10, $2 -; MIPS2-NEXT: bnez $8, $BB5_22 +; MIPS2-NEXT: sltu $8, $11, $8 +; MIPS2-NEXT: bnez $9, $BB5_22 ; MIPS2-NEXT: srlv $11, $4, $2 ; MIPS2-NEXT: # %bb.17: # %entry ; MIPS2-NEXT: srlv $1, $5, $2 -; MIPS2-NEXT: sllv $2, $12, $9 +; MIPS2-NEXT: sllv $2, $13, $10 ; MIPS2-NEXT: or $4, $2, $1 ; MIPS2-NEXT: move $5, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 +; MIPS2-NEXT: bnez $8, $BB5_24 ; MIPS2-NEXT: addiu $2, $zero, 0 ; MIPS2-NEXT: # %bb.18: # %entry ; MIPS2-NEXT: b $BB5_23 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_19: # %entry -; MIPS2-NEXT: beqz $25, $BB5_14 -; MIPS2-NEXT: move $7, $24 +; MIPS2-NEXT: beqz $gp, $BB5_14 +; MIPS2-NEXT: move $7, $25 ; MIPS2-NEXT: $BB5_20: -; MIPS2-NEXT: or $15, $11, $10 -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 +; MIPS2-NEXT: or $24, $12, $11 +; MIPS2-NEXT: bnez $14, $BB5_16 +; MIPS2-NEXT: addiu $11, $zero, 63 ; MIPS2-NEXT: # %bb.21: ; MIPS2-NEXT: b $BB5_15 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_22: ; MIPS2-NEXT: addiu $5, $zero, 0 ; MIPS2-NEXT: move $4, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 +; MIPS2-NEXT: bnez $8, $BB5_24 ; MIPS2-NEXT: addiu $2, $zero, 0 ; MIPS2-NEXT: $BB5_23: # %entry ; MIPS2-NEXT: move $2, $5 ; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: bnez $10, $BB5_26 +; MIPS2-NEXT: bnez $8, $BB5_26 ; MIPS2-NEXT: nop ; MIPS2-NEXT: # %bb.25: # %entry ; MIPS2-NEXT: move $3, $4 @@ -511,109 +514,127 @@ ; ; MIPS32-LABEL: lshr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 16, -4 +; MIPS32-NEXT: lw $9, 36($sp) ; MIPS32-NEXT: addiu $1, $zero, 64 -; MIPS32-NEXT: subu $2, $1, $9 +; MIPS32-NEXT: subu $1, $1, $9 +; MIPS32-NEXT: andi $2, $1, 63 ; MIPS32-NEXT: sllv $10, $5, $2 -; MIPS32-NEXT: andi $11, $2, 32 +; MIPS32-NEXT: andi $11, $1, 32 +; MIPS32-NEXT: andi $12, $9, 63 ; MIPS32-NEXT: move $1, $10 ; MIPS32-NEXT: movn $1, $zero, $11 -; MIPS32-NEXT: srlv $3, $7, $9 -; MIPS32-NEXT: not $12, $9 +; MIPS32-NEXT: srlv $3, $7, $12 ; MIPS32-NEXT: sll $8, $6, 1 -; MIPS32-NEXT: sllv $8, $8, $12 +; MIPS32-NEXT: not $13, $12 +; MIPS32-NEXT: sllv $8, $8, $13 ; MIPS32-NEXT: or $3, $8, $3 -; MIPS32-NEXT: srlv $13, $6, $9 -; MIPS32-NEXT: andi $14, $9, 32 -; MIPS32-NEXT: movn $3, $13, $14 -; MIPS32-NEXT: addiu $15, $9, -64 +; MIPS32-NEXT: srlv $14, $6, $12 +; MIPS32-NEXT: andi $15, $9, 32 +; MIPS32-NEXT: movn $3, $14, $15 +; MIPS32-NEXT: addiu $24, $9, -64 +; MIPS32-NEXT: andi $8, $24, 63 ; MIPS32-NEXT: or $3, $3, $1 -; MIPS32-NEXT: srlv $1, $5, $15 -; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $8, $15 -; MIPS32-NEXT: sllv $8, $24, $8 -; MIPS32-NEXT: or $1, $8, $1 -; MIPS32-NEXT: srlv $8, $4, $15 -; MIPS32-NEXT: andi $15, $15, 32 -; MIPS32-NEXT: movn $1, $8, $15 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $3, $25 +; MIPS32-NEXT: srlv $1, $5, $8 +; MIPS32-NEXT: sll $25, $4, 1 +; MIPS32-NEXT: not $gp, $8 +; MIPS32-NEXT: sllv $gp, $25, $gp +; MIPS32-NEXT: or $1, $gp, $1 +; MIPS32-NEXT: srlv $8, $4, $8 +; MIPS32-NEXT: andi $24, $24, 32 +; MIPS32-NEXT: movn $1, $8, $24 +; MIPS32-NEXT: sltiu $gp, $9, 64 +; MIPS32-NEXT: movn $1, $3, $gp ; MIPS32-NEXT: sllv $3, $4, $2 ; MIPS32-NEXT: not $2, $2 -; MIPS32-NEXT: srl $gp, $5, 1 -; MIPS32-NEXT: srlv $2, $gp, $2 -; MIPS32-NEXT: or $gp, $3, $2 -; MIPS32-NEXT: srlv $2, $5, $9 -; MIPS32-NEXT: sllv $3, $24, $12 +; MIPS32-NEXT: srl $16, $5, 1 +; MIPS32-NEXT: srlv $2, $16, $2 +; MIPS32-NEXT: or $16, $3, $2 +; MIPS32-NEXT: srlv $2, $5, $12 +; MIPS32-NEXT: sllv $3, $25, $13 ; MIPS32-NEXT: or $3, $3, $2 -; MIPS32-NEXT: srlv $2, $4, $9 -; MIPS32-NEXT: movn $3, $2, $14 +; MIPS32-NEXT: srlv $2, $4, $12 +; MIPS32-NEXT: movn $3, $2, $15 ; MIPS32-NEXT: movz $1, $7, $9 -; MIPS32-NEXT: movz $3, $zero, $25 -; MIPS32-NEXT: movn $gp, $10, $11 -; MIPS32-NEXT: movn $13, $zero, $14 -; MIPS32-NEXT: or $4, $13, $gp -; MIPS32-NEXT: movn $8, $zero, $15 -; MIPS32-NEXT: movn $8, $4, $25 +; MIPS32-NEXT: movz $3, $zero, $gp +; MIPS32-NEXT: movn $16, $10, $11 +; MIPS32-NEXT: movn $14, $zero, $15 +; MIPS32-NEXT: or $4, $14, $16 +; MIPS32-NEXT: movn $8, $zero, $24 +; MIPS32-NEXT: movn $8, $4, $gp ; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $2, $zero, $14 -; MIPS32-NEXT: movz $2, $zero, $25 +; MIPS32-NEXT: movn $2, $zero, $15 +; MIPS32-NEXT: movz $2, $zero, $gp ; MIPS32-NEXT: move $4, $8 -; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: move $5, $1 +; MIPS32-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 8 ; ; MIPS32R2-LABEL: lshr_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $9, 28($sp) +; MIPS32R2-NEXT: addiu $sp, $sp, -8 +; MIPS32R2-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R2-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32R2-NEXT: .cfi_offset 16, -4 +; MIPS32R2-NEXT: lw $9, 36($sp) ; MIPS32R2-NEXT: addiu $1, $zero, 64 -; MIPS32R2-NEXT: subu $2, $1, $9 +; MIPS32R2-NEXT: subu $1, $1, $9 +; MIPS32R2-NEXT: andi $2, $1, 63 ; MIPS32R2-NEXT: sllv $10, $5, $2 -; MIPS32R2-NEXT: andi $11, $2, 32 +; MIPS32R2-NEXT: andi $11, $1, 32 +; MIPS32R2-NEXT: andi $12, $9, 63 ; MIPS32R2-NEXT: move $1, $10 ; MIPS32R2-NEXT: movn $1, $zero, $11 -; MIPS32R2-NEXT: srlv $3, $7, $9 -; MIPS32R2-NEXT: not $12, $9 +; MIPS32R2-NEXT: srlv $3, $7, $12 ; MIPS32R2-NEXT: sll $8, $6, 1 -; MIPS32R2-NEXT: sllv $8, $8, $12 +; MIPS32R2-NEXT: not $13, $12 +; MIPS32R2-NEXT: sllv $8, $8, $13 ; MIPS32R2-NEXT: or $3, $8, $3 -; MIPS32R2-NEXT: srlv $13, $6, $9 -; MIPS32R2-NEXT: andi $14, $9, 32 -; MIPS32R2-NEXT: movn $3, $13, $14 -; MIPS32R2-NEXT: addiu $15, $9, -64 +; MIPS32R2-NEXT: srlv $14, $6, $12 +; MIPS32R2-NEXT: andi $15, $9, 32 +; MIPS32R2-NEXT: movn $3, $14, $15 +; MIPS32R2-NEXT: addiu $24, $9, -64 +; MIPS32R2-NEXT: andi $8, $24, 63 ; MIPS32R2-NEXT: or $3, $3, $1 -; MIPS32R2-NEXT: srlv $1, $5, $15 -; MIPS32R2-NEXT: sll $24, $4, 1 -; MIPS32R2-NEXT: not $8, $15 -; MIPS32R2-NEXT: sllv $8, $24, $8 -; MIPS32R2-NEXT: or $1, $8, $1 -; MIPS32R2-NEXT: srlv $8, $4, $15 -; MIPS32R2-NEXT: andi $15, $15, 32 -; MIPS32R2-NEXT: movn $1, $8, $15 -; MIPS32R2-NEXT: sltiu $25, $9, 64 -; MIPS32R2-NEXT: movn $1, $3, $25 +; MIPS32R2-NEXT: srlv $1, $5, $8 +; MIPS32R2-NEXT: sll $25, $4, 1 +; MIPS32R2-NEXT: not $gp, $8 +; MIPS32R2-NEXT: sllv $gp, $25, $gp +; MIPS32R2-NEXT: or $1, $gp, $1 +; MIPS32R2-NEXT: srlv $8, $4, $8 +; MIPS32R2-NEXT: andi $24, $24, 32 +; MIPS32R2-NEXT: movn $1, $8, $24 +; MIPS32R2-NEXT: sltiu $gp, $9, 64 +; MIPS32R2-NEXT: movn $1, $3, $gp ; MIPS32R2-NEXT: sllv $3, $4, $2 ; MIPS32R2-NEXT: not $2, $2 -; MIPS32R2-NEXT: srl $gp, $5, 1 -; MIPS32R2-NEXT: srlv $2, $gp, $2 -; MIPS32R2-NEXT: or $gp, $3, $2 -; MIPS32R2-NEXT: srlv $2, $5, $9 -; MIPS32R2-NEXT: sllv $3, $24, $12 +; MIPS32R2-NEXT: srl $16, $5, 1 +; MIPS32R2-NEXT: srlv $2, $16, $2 +; MIPS32R2-NEXT: or $16, $3, $2 +; MIPS32R2-NEXT: srlv $2, $5, $12 +; MIPS32R2-NEXT: sllv $3, $25, $13 ; MIPS32R2-NEXT: or $3, $3, $2 -; MIPS32R2-NEXT: srlv $2, $4, $9 -; MIPS32R2-NEXT: movn $3, $2, $14 +; MIPS32R2-NEXT: srlv $2, $4, $12 +; MIPS32R2-NEXT: movn $3, $2, $15 ; MIPS32R2-NEXT: movz $1, $7, $9 -; MIPS32R2-NEXT: movz $3, $zero, $25 -; MIPS32R2-NEXT: movn $gp, $10, $11 -; MIPS32R2-NEXT: movn $13, $zero, $14 -; MIPS32R2-NEXT: or $4, $13, $gp -; MIPS32R2-NEXT: movn $8, $zero, $15 -; MIPS32R2-NEXT: movn $8, $4, $25 +; MIPS32R2-NEXT: movz $3, $zero, $gp +; MIPS32R2-NEXT: movn $16, $10, $11 +; MIPS32R2-NEXT: movn $14, $zero, $15 +; MIPS32R2-NEXT: or $4, $14, $16 +; MIPS32R2-NEXT: movn $8, $zero, $24 +; MIPS32R2-NEXT: movn $8, $4, $gp ; MIPS32R2-NEXT: movz $8, $6, $9 -; MIPS32R2-NEXT: movn $2, $zero, $14 -; MIPS32R2-NEXT: movz $2, $zero, $25 +; MIPS32R2-NEXT: movn $2, $zero, $15 +; MIPS32R2-NEXT: movz $2, $zero, $gp ; MIPS32R2-NEXT: move $4, $8 -; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: move $5, $1 +; MIPS32R2-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 8 ; ; MIPS32R6-LABEL: lshr_i128: ; MIPS32R6: # %bb.0: # %entry @@ -622,70 +643,73 @@ ; MIPS32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill ; MIPS32R6-NEXT: .cfi_offset 16, -4 ; MIPS32R6-NEXT: lw $1, 36($sp) -; MIPS32R6-NEXT: srlv $2, $7, $1 -; MIPS32R6-NEXT: not $3, $1 +; MIPS32R6-NEXT: andi $2, $1, 63 +; MIPS32R6-NEXT: srlv $3, $7, $2 ; MIPS32R6-NEXT: sll $8, $6, 1 -; MIPS32R6-NEXT: sllv $8, $8, $3 -; MIPS32R6-NEXT: or $2, $8, $2 -; MIPS32R6-NEXT: addiu $8, $1, -64 -; MIPS32R6-NEXT: srlv $9, $5, $8 -; MIPS32R6-NEXT: sll $10, $4, 1 -; MIPS32R6-NEXT: not $11, $8 -; MIPS32R6-NEXT: sllv $11, $10, $11 -; MIPS32R6-NEXT: andi $12, $1, 32 -; MIPS32R6-NEXT: seleqz $2, $2, $12 -; MIPS32R6-NEXT: or $9, $11, $9 -; MIPS32R6-NEXT: srlv $11, $6, $1 -; MIPS32R6-NEXT: selnez $13, $11, $12 -; MIPS32R6-NEXT: addiu $14, $zero, 64 -; MIPS32R6-NEXT: subu $14, $14, $1 -; MIPS32R6-NEXT: sllv $15, $5, $14 -; MIPS32R6-NEXT: andi $24, $14, 32 -; MIPS32R6-NEXT: andi $25, $8, 32 -; MIPS32R6-NEXT: seleqz $9, $9, $25 -; MIPS32R6-NEXT: seleqz $gp, $15, $24 -; MIPS32R6-NEXT: or $2, $13, $2 -; MIPS32R6-NEXT: selnez $13, $15, $24 -; MIPS32R6-NEXT: sllv $15, $4, $14 -; MIPS32R6-NEXT: not $14, $14 -; MIPS32R6-NEXT: srl $16, $5, 1 -; MIPS32R6-NEXT: srlv $14, $16, $14 -; MIPS32R6-NEXT: or $14, $15, $14 -; MIPS32R6-NEXT: seleqz $14, $14, $24 -; MIPS32R6-NEXT: srlv $8, $4, $8 -; MIPS32R6-NEXT: or $13, $13, $14 -; MIPS32R6-NEXT: or $2, $2, $gp -; MIPS32R6-NEXT: srlv $5, $5, $1 -; MIPS32R6-NEXT: selnez $14, $8, $25 -; MIPS32R6-NEXT: sltiu $15, $1, 64 -; MIPS32R6-NEXT: selnez $2, $2, $15 -; MIPS32R6-NEXT: or $9, $14, $9 -; MIPS32R6-NEXT: sllv $3, $10, $3 -; MIPS32R6-NEXT: seleqz $10, $11, $12 -; MIPS32R6-NEXT: or $10, $10, $13 -; MIPS32R6-NEXT: or $3, $3, $5 -; MIPS32R6-NEXT: seleqz $5, $9, $15 -; MIPS32R6-NEXT: seleqz $9, $zero, $15 -; MIPS32R6-NEXT: srlv $4, $4, $1 -; MIPS32R6-NEXT: seleqz $11, $4, $12 -; MIPS32R6-NEXT: selnez $11, $11, $15 -; MIPS32R6-NEXT: seleqz $7, $7, $1 -; MIPS32R6-NEXT: or $2, $2, $5 -; MIPS32R6-NEXT: selnez $2, $2, $1 -; MIPS32R6-NEXT: or $5, $7, $2 -; MIPS32R6-NEXT: or $2, $9, $11 -; MIPS32R6-NEXT: seleqz $3, $3, $12 -; MIPS32R6-NEXT: selnez $7, $4, $12 -; MIPS32R6-NEXT: seleqz $4, $6, $1 -; MIPS32R6-NEXT: selnez $6, $10, $15 -; MIPS32R6-NEXT: seleqz $8, $8, $25 -; MIPS32R6-NEXT: seleqz $8, $8, $15 -; MIPS32R6-NEXT: or $6, $6, $8 -; MIPS32R6-NEXT: selnez $1, $6, $1 -; MIPS32R6-NEXT: or $4, $4, $1 +; MIPS32R6-NEXT: not $9, $2 +; MIPS32R6-NEXT: sllv $8, $8, $9 +; MIPS32R6-NEXT: or $3, $8, $3 +; MIPS32R6-NEXT: addiu $8, $zero, 64 +; MIPS32R6-NEXT: subu $8, $8, $1 +; MIPS32R6-NEXT: andi $10, $8, 63 +; MIPS32R6-NEXT: andi $11, $1, 32 +; MIPS32R6-NEXT: seleqz $3, $3, $11 +; MIPS32R6-NEXT: not $12, $10 +; MIPS32R6-NEXT: srl $13, $5, 1 +; MIPS32R6-NEXT: sllv $14, $4, $10 +; MIPS32R6-NEXT: srlv $12, $13, $12 +; MIPS32R6-NEXT: srlv $13, $6, $2 +; MIPS32R6-NEXT: selnez $15, $13, $11 +; MIPS32R6-NEXT: sllv $10, $5, $10 +; MIPS32R6-NEXT: andi $8, $8, 32 +; MIPS32R6-NEXT: seleqz $24, $10, $8 +; MIPS32R6-NEXT: or $3, $15, $3 +; MIPS32R6-NEXT: or $12, $14, $12 +; MIPS32R6-NEXT: selnez $10, $10, $8 +; MIPS32R6-NEXT: seleqz $8, $12, $8 +; MIPS32R6-NEXT: srlv $12, $4, $2 +; MIPS32R6-NEXT: sll $14, $4, 1 +; MIPS32R6-NEXT: addiu $15, $1, -64 +; MIPS32R6-NEXT: andi $25, $15, 63 +; MIPS32R6-NEXT: not $gp, $25 +; MIPS32R6-NEXT: or $3, $3, $24 +; MIPS32R6-NEXT: srlv $24, $5, $25 +; MIPS32R6-NEXT: sllv $gp, $14, $gp +; MIPS32R6-NEXT: seleqz $16, $12, $11 +; MIPS32R6-NEXT: srlv $2, $5, $2 +; MIPS32R6-NEXT: sllv $5, $14, $9 +; MIPS32R6-NEXT: sltiu $9, $1, 64 +; MIPS32R6-NEXT: or $8, $10, $8 +; MIPS32R6-NEXT: or $10, $5, $2 +; MIPS32R6-NEXT: seleqz $14, $zero, $9 +; MIPS32R6-NEXT: selnez $2, $16, $9 +; MIPS32R6-NEXT: seleqz $5, $7, $1 +; MIPS32R6-NEXT: selnez $3, $3, $9 +; MIPS32R6-NEXT: or $7, $gp, $24 +; MIPS32R6-NEXT: andi $15, $15, 32 +; MIPS32R6-NEXT: seleqz $7, $7, $15 +; MIPS32R6-NEXT: srlv $4, $4, $25 +; MIPS32R6-NEXT: selnez $24, $4, $15 +; MIPS32R6-NEXT: or $7, $24, $7 +; MIPS32R6-NEXT: seleqz $7, $7, $9 +; MIPS32R6-NEXT: or $3, $3, $7 +; MIPS32R6-NEXT: selnez $3, $3, $1 +; MIPS32R6-NEXT: or $5, $5, $3 +; MIPS32R6-NEXT: or $2, $14, $2 +; MIPS32R6-NEXT: seleqz $3, $10, $11 +; MIPS32R6-NEXT: selnez $7, $12, $11 +; MIPS32R6-NEXT: seleqz $6, $6, $1 +; MIPS32R6-NEXT: seleqz $10, $13, $11 +; MIPS32R6-NEXT: or $8, $10, $8 +; MIPS32R6-NEXT: selnez $8, $8, $9 +; MIPS32R6-NEXT: seleqz $4, $4, $15 +; MIPS32R6-NEXT: seleqz $4, $4, $9 +; MIPS32R6-NEXT: or $4, $8, $4 +; MIPS32R6-NEXT: selnez $1, $4, $1 +; MIPS32R6-NEXT: or $4, $6, $1 ; MIPS32R6-NEXT: or $1, $7, $3 -; MIPS32R6-NEXT: selnez $1, $1, $15 -; MIPS32R6-NEXT: or $3, $9, $1 +; MIPS32R6-NEXT: selnez $1, $1, $9 +; MIPS32R6-NEXT: or $3, $14, $1 ; MIPS32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload ; MIPS32R6-NEXT: jr $ra ; MIPS32R6-NEXT: addiu $sp, $sp, 8 @@ -770,183 +794,194 @@ ; ; MMR3-LABEL: lshr_i128: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: addiusp -40 -; MMR3-NEXT: .cfi_def_cfa_offset 40 -; MMR3-NEXT: swp $16, 32($sp) +; MMR3-NEXT: addiusp -48 +; MMR3-NEXT: .cfi_def_cfa_offset 48 +; MMR3-NEXT: swp $16, 40($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 ; MMR3-NEXT: move $8, $7 -; MMR3-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 68($sp) +; MMR3-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: swp $4, 32($sp) +; MMR3-NEXT: lw $16, 76($sp) ; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $7, $2, $16 -; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: move $17, $5 -; MMR3-NEXT: sw $5, 0($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $7, 32 +; MMR3-NEXT: subu16 $2, $2, $16 +; MMR3-NEXT: andi16 $3, $2, 63 ; MMR3-NEXT: sw $3, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $9, $5, $3 +; MMR3-NEXT: andi16 $4, $2, 32 +; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $7, $16, 63 +; MMR3-NEXT: move $17, $16 ; MMR3-NEXT: li16 $2, 0 -; MMR3-NEXT: move $4, $9 -; MMR3-NEXT: movn $4, $2, $3 -; MMR3-NEXT: srlv $5, $8, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: move $3, $9 +; MMR3-NEXT: movn $3, $2, $4 +; MMR3-NEXT: srlv $5, $8, $7 ; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $2, $2, $3 +; MMR3-NEXT: not16 $4, $7 +; MMR3-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $2, $2, $4 ; MMR3-NEXT: or16 $2, $5 -; MMR3-NEXT: srlv $5, $6, $16 -; MMR3-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $16, 32 -; MMR3-NEXT: sw $3, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $5, $3 -; MMR3-NEXT: addiu $3, $16, -64 -; MMR3-NEXT: or16 $2, $4 -; MMR3-NEXT: srlv $4, $17, $3 -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $4, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $4, 1 -; MMR3-NEXT: not16 $5, $3 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $17 -; MMR3-NEXT: srlv $1, $4, $3 -; MMR3-NEXT: andi16 $3, $3, 32 +; MMR3-NEXT: srlv $16, $6, $7 +; MMR3-NEXT: sw $17, 0($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $4, $17, 32 +; MMR3-NEXT: sw $4, 12($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $2, $16, $4 +; MMR3-NEXT: addiu $17, $17, -64 +; MMR3-NEXT: andi16 $4, $17, 63 +; MMR3-NEXT: or16 $2, $3 +; MMR3-NEXT: lw $3, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $3, $3, $4 ; MMR3-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $3 -; MMR3-NEXT: sltiu $10, $16, 64 +; MMR3-NEXT: lw $3, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $6, $3, 1 +; MMR3-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: not16 $5, $4 +; MMR3-NEXT: sllv $5, $6, $5 +; MMR3-NEXT: lw $6, 8($sp) # 4-byte Folded Reload +; MMR3-NEXT: or16 $5, $6 +; MMR3-NEXT: srlv $1, $3, $4 +; MMR3-NEXT: andi16 $4, $17, 32 +; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $5, $1, $4 +; MMR3-NEXT: lw $6, 0($sp) # 4-byte Folded Reload +; MMR3-NEXT: sltiu $10, $6, 64 ; MMR3-NEXT: movn $5, $2, $10 -; MMR3-NEXT: sllv $2, $4, $7 -; MMR3-NEXT: not16 $3, $7 -; MMR3-NEXT: lw $7, 0($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $4, $7, 1 +; MMR3-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $17, $3, $2 +; MMR3-NEXT: not16 $3, $2 +; MMR3-NEXT: lw $2, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: srl16 $4, $2, 1 ; MMR3-NEXT: srlv $4, $4, $3 -; MMR3-NEXT: or16 $4, $2 -; MMR3-NEXT: srlv $2, $7, $16 +; MMR3-NEXT: or16 $4, $17 +; MMR3-NEXT: srlv $2, $2, $7 ; MMR3-NEXT: lw $3, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $6, $3 +; MMR3-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $3, $17, $3 ; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: lw $2, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $2, $2, $16 +; MMR3-NEXT: lw $2, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $2, $2, $7 ; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload ; MMR3-NEXT: movn $3, $2, $17 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: li16 $6, 0 -; MMR3-NEXT: movz $3, $6, $10 -; MMR3-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $5, $8, $6 +; MMR3-NEXT: li16 $7, 0 +; MMR3-NEXT: movz $3, $7, $10 +; MMR3-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MMR3-NEXT: movn $4, $9, $7 -; MMR3-NEXT: lw $6, 4($sp) # 4-byte Folded Reload ; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $6, $7, $17 -; MMR3-NEXT: or16 $6, $4 +; MMR3-NEXT: movn $16, $7, $17 +; MMR3-NEXT: or16 $16, $4 ; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload ; MMR3-NEXT: movn $1, $7, $4 ; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $1, $6, $10 -; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $4, $16 +; MMR3-NEXT: movn $1, $16, $10 +; MMR3-NEXT: lw $4, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $1, $4, $6 ; MMR3-NEXT: movn $2, $7, $17 ; MMR3-NEXT: li16 $4, 0 ; MMR3-NEXT: movz $2, $4, $10 ; MMR3-NEXT: move $4, $1 -; MMR3-NEXT: lwp $16, 32($sp) -; MMR3-NEXT: addiusp 40 +; MMR3-NEXT: lwp $16, 40($sp) +; MMR3-NEXT: addiusp 48 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: lshr_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -32 -; MMR6-NEXT: .cfi_def_cfa_offset 32 -; MMR6-NEXT: sw $17, 28($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; MMR6-NEXT: addiu $sp, $sp, -40 +; MMR6-NEXT: .cfi_def_cfa_offset 40 +; MMR6-NEXT: sw $17, 36($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $16, 32($sp) # 4-byte Folded Spill ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 ; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: move $7, $5 -; MMR6-NEXT: lw $3, 60($sp) -; MMR6-NEXT: srlv $2, $1, $3 -; MMR6-NEXT: not16 $5, $3 -; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: move $17, $6 -; MMR6-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $6, $6, 1 -; MMR6-NEXT: sllv $6, $6, $5 -; MMR6-NEXT: or $8, $6, $2 -; MMR6-NEXT: addiu $5, $3, -64 -; MMR6-NEXT: srlv $9, $7, $5 -; MMR6-NEXT: move $6, $4 -; MMR6-NEXT: sll16 $2, $4, 1 -; MMR6-NEXT: sw $2, 8($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $16, $5 -; MMR6-NEXT: sllv $10, $2, $16 +; MMR6-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MMR6-NEXT: move $17, $5 +; MMR6-NEXT: move $5, $4 +; MMR6-NEXT: lw $3, 68($sp) +; MMR6-NEXT: andi16 $2, $3, 63 +; MMR6-NEXT: srlv $4, $7, $2 +; MMR6-NEXT: sll16 $7, $6, 1 +; MMR6-NEXT: not16 $16, $2 +; MMR6-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; MMR6-NEXT: sllv $7, $7, $16 +; MMR6-NEXT: or $8, $7, $4 +; MMR6-NEXT: li16 $7, 64 +; MMR6-NEXT: subu16 $4, $7, $3 +; MMR6-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: andi16 $4, $4, 63 ; MMR6-NEXT: andi16 $16, $3, 32 ; MMR6-NEXT: seleqz $8, $8, $16 -; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: srlv $10, $17, $3 +; MMR6-NEXT: not16 $7, $4 +; MMR6-NEXT: sw $7, 16($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill +; MMR6-NEXT: srl16 $7, $17, 1 +; MMR6-NEXT: sw $7, 8($sp) # 4-byte Folded Spill +; MMR6-NEXT: sllv $9, $5, $4 +; MMR6-NEXT: lw $7, 16($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $6, 8($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $13, $6, $7 +; MMR6-NEXT: lw $7, 28($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $10, $7, $2 ; MMR6-NEXT: selnez $11, $10, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $2, $17, $3 -; MMR6-NEXT: sllv $12, $7, $2 -; MMR6-NEXT: move $17, $7 -; MMR6-NEXT: andi16 $4, $2, 32 -; MMR6-NEXT: andi16 $7, $5, 32 -; MMR6-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MMR6-NEXT: seleqz $9, $9, $7 -; MMR6-NEXT: seleqz $13, $12, $4 +; MMR6-NEXT: sllv $4, $17, $4 +; MMR6-NEXT: lw $6, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: andi16 $7, $6, 32 +; MMR6-NEXT: seleqz $12, $4, $7 ; MMR6-NEXT: or $8, $11, $8 -; MMR6-NEXT: selnez $11, $12, $4 -; MMR6-NEXT: sllv $12, $6, $2 -; MMR6-NEXT: move $7, $6 -; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $2, $2 -; MMR6-NEXT: srl16 $6, $17, 1 +; MMR6-NEXT: or $11, $9, $13 +; MMR6-NEXT: selnez $9, $4, $7 +; MMR6-NEXT: seleqz $13, $11, $7 +; MMR6-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MMR6-NEXT: srlv $11, $5, $2 +; MMR6-NEXT: sll16 $17, $5, 1 +; MMR6-NEXT: addiu $4, $3, -64 +; MMR6-NEXT: andi16 $7, $4, 63 +; MMR6-NEXT: not16 $5, $7 +; MMR6-NEXT: or $8, $8, $12 +; MMR6-NEXT: lw $6, 12($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $12, $6, $7 +; MMR6-NEXT: sllv $25, $17, $5 +; MMR6-NEXT: seleqz $14, $11, $16 ; MMR6-NEXT: srlv $2, $6, $2 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: seleqz $2, $2, $4 -; MMR6-NEXT: srlv $4, $7, $5 -; MMR6-NEXT: or $11, $11, $2 -; MMR6-NEXT: or $5, $8, $13 -; MMR6-NEXT: srlv $6, $17, $3 -; MMR6-NEXT: lw $2, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: selnez $7, $4, $2 -; MMR6-NEXT: sltiu $8, $3, 64 -; MMR6-NEXT: selnez $12, $5, $8 -; MMR6-NEXT: or $7, $7, $9 -; MMR6-NEXT: lw $5, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $9, $2, $5 -; MMR6-NEXT: seleqz $10, $10, $16 -; MMR6-NEXT: li16 $5, 0 -; MMR6-NEXT: or $10, $10, $11 -; MMR6-NEXT: or $6, $9, $6 -; MMR6-NEXT: seleqz $2, $7, $8 -; MMR6-NEXT: seleqz $7, $5, $8 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $9, $5, $3 -; MMR6-NEXT: seleqz $11, $9, $16 -; MMR6-NEXT: selnez $11, $11, $8 +; MMR6-NEXT: lw $5, 24($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $5, $17, $5 +; MMR6-NEXT: sltiu $15, $3, 64 +; MMR6-NEXT: li16 $17, 0 +; MMR6-NEXT: or $9, $9, $13 +; MMR6-NEXT: or $13, $5, $2 +; MMR6-NEXT: seleqz $24, $17, $15 +; MMR6-NEXT: selnez $2, $14, $15 ; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: selnez $2, $2, $3 -; MMR6-NEXT: or $5, $1, $2 -; MMR6-NEXT: or $2, $7, $11 -; MMR6-NEXT: seleqz $1, $6, $16 -; MMR6-NEXT: selnez $6, $9, $16 -; MMR6-NEXT: lw $16, 16($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $9, $16, $3 -; MMR6-NEXT: selnez $10, $10, $8 -; MMR6-NEXT: lw $16, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $4, $4, $16 -; MMR6-NEXT: seleqz $4, $4, $8 -; MMR6-NEXT: or $4, $10, $4 +; MMR6-NEXT: selnez $5, $8, $15 +; MMR6-NEXT: or $6, $25, $12 +; MMR6-NEXT: andi16 $4, $4, 32 +; MMR6-NEXT: seleqz $6, $6, $4 +; MMR6-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $7, $17, $7 +; MMR6-NEXT: selnez $8, $7, $4 +; MMR6-NEXT: or $6, $8, $6 +; MMR6-NEXT: seleqz $6, $6, $15 +; MMR6-NEXT: or $5, $5, $6 +; MMR6-NEXT: selnez $5, $5, $3 +; MMR6-NEXT: or $5, $1, $5 +; MMR6-NEXT: or $2, $24, $2 +; MMR6-NEXT: seleqz $1, $13, $16 +; MMR6-NEXT: selnez $6, $11, $16 +; MMR6-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; MMR6-NEXT: seleqz $8, $17, $3 +; MMR6-NEXT: seleqz $10, $10, $16 +; MMR6-NEXT: or $9, $10, $9 +; MMR6-NEXT: selnez $9, $9, $15 +; MMR6-NEXT: seleqz $4, $7, $4 +; MMR6-NEXT: seleqz $4, $4, $15 +; MMR6-NEXT: or $4, $9, $4 ; MMR6-NEXT: selnez $3, $4, $3 -; MMR6-NEXT: or $4, $9, $3 +; MMR6-NEXT: or $4, $8, $3 ; MMR6-NEXT: or $1, $6, $1 -; MMR6-NEXT: selnez $1, $1, $8 -; MMR6-NEXT: or $3, $7, $1 -; MMR6-NEXT: lw $16, 24($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 28($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 32 +; MMR6-NEXT: selnez $1, $1, $15 +; MMR6-NEXT: or $3, $24, $1 +; MMR6-NEXT: lw $16, 32($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $17, 36($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 40 ; MMR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll @@ -456,307 +456,344 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) { ; MIPS2-LABEL: shl_i128: ; MIPS2: # %bb.0: # %entry -; MIPS2-NEXT: addiu $sp, $sp, -8 -; MIPS2-NEXT: .cfi_def_cfa_offset 8 +; MIPS2-NEXT: addiu $sp, $sp, -16 +; MIPS2-NEXT: .cfi_def_cfa_offset 16 +; MIPS2-NEXT: sw $19, 12($sp) # 4-byte Folded Spill +; MIPS2-NEXT: sw $18, 8($sp) # 4-byte Folded Spill ; MIPS2-NEXT: sw $17, 4($sp) # 4-byte Folded Spill ; MIPS2-NEXT: sw $16, 0($sp) # 4-byte Folded Spill -; MIPS2-NEXT: .cfi_offset 17, -4 -; MIPS2-NEXT: .cfi_offset 16, -8 -; MIPS2-NEXT: lw $8, 36($sp) +; MIPS2-NEXT: .cfi_offset 19, -4 +; MIPS2-NEXT: .cfi_offset 18, -8 +; MIPS2-NEXT: .cfi_offset 17, -12 +; MIPS2-NEXT: .cfi_offset 16, -16 +; MIPS2-NEXT: lw $11, 44($sp) ; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $3, $1, $8 +; MIPS2-NEXT: subu $1, $1, $11 +; MIPS2-NEXT: andi $3, $1, 63 +; MIPS2-NEXT: addiu $13, $11, -64 +; MIPS2-NEXT: andi $8, $11, 63 ; MIPS2-NEXT: srlv $9, $6, $3 -; MIPS2-NEXT: andi $1, $3, 32 +; MIPS2-NEXT: andi $1, $1, 32 ; MIPS2-NEXT: bnez $1, $BB5_2 ; MIPS2-NEXT: addiu $2, $zero, 0 ; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: srlv $1, $7, $3 -; MIPS2-NEXT: not $3, $3 +; MIPS2-NEXT: not $1, $3 ; MIPS2-NEXT: sll $10, $6, 1 -; MIPS2-NEXT: sllv $3, $10, $3 -; MIPS2-NEXT: or $3, $3, $1 +; MIPS2-NEXT: srlv $3, $7, $3 +; MIPS2-NEXT: sllv $1, $10, $1 +; MIPS2-NEXT: or $3, $1, $3 ; MIPS2-NEXT: b $BB5_3 -; MIPS2-NEXT: move $15, $9 +; MIPS2-NEXT: move $gp, $9 ; MIPS2-NEXT: $BB5_2: -; MIPS2-NEXT: addiu $15, $zero, 0 +; MIPS2-NEXT: addiu $gp, $zero, 0 ; MIPS2-NEXT: move $3, $9 ; MIPS2-NEXT: $BB5_3: # %entry -; MIPS2-NEXT: not $13, $8 +; MIPS2-NEXT: andi $15, $13, 63 +; MIPS2-NEXT: not $14, $8 ; MIPS2-NEXT: sllv $9, $5, $8 -; MIPS2-NEXT: andi $10, $8, 32 +; MIPS2-NEXT: andi $10, $11, 32 ; MIPS2-NEXT: bnez $10, $BB5_5 -; MIPS2-NEXT: move $25, $9 +; MIPS2-NEXT: move $17, $9 ; MIPS2-NEXT: # %bb.4: # %entry -; MIPS2-NEXT: sllv $1, $4, $8 -; MIPS2-NEXT: srl $11, $5, 1 -; MIPS2-NEXT: srlv $11, $11, $13 -; MIPS2-NEXT: or $25, $1, $11 +; MIPS2-NEXT: srl $1, $5, 1 +; MIPS2-NEXT: sllv $12, $4, $8 +; MIPS2-NEXT: srlv $1, $1, $14 +; MIPS2-NEXT: or $17, $12, $1 ; MIPS2-NEXT: $BB5_5: # %entry -; MIPS2-NEXT: addiu $14, $8, -64 -; MIPS2-NEXT: srl $24, $7, 1 -; MIPS2-NEXT: sllv $11, $7, $14 -; MIPS2-NEXT: andi $12, $14, 32 -; MIPS2-NEXT: bnez $12, $BB5_7 -; MIPS2-NEXT: move $gp, $11 +; MIPS2-NEXT: srl $25, $7, 1 +; MIPS2-NEXT: sllv $12, $7, $15 +; MIPS2-NEXT: andi $13, $13, 32 +; MIPS2-NEXT: bnez $13, $BB5_7 +; MIPS2-NEXT: move $18, $12 ; MIPS2-NEXT: # %bb.6: # %entry -; MIPS2-NEXT: sllv $1, $6, $14 -; MIPS2-NEXT: not $14, $14 -; MIPS2-NEXT: srlv $14, $24, $14 -; MIPS2-NEXT: or $gp, $1, $14 +; MIPS2-NEXT: not $1, $15 +; MIPS2-NEXT: sllv $15, $6, $15 +; MIPS2-NEXT: srlv $1, $25, $1 +; MIPS2-NEXT: or $18, $15, $1 ; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: sltiu $14, $8, 64 -; MIPS2-NEXT: beqz $14, $BB5_9 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.8: -; MIPS2-NEXT: or $gp, $25, $15 +; MIPS2-NEXT: sllv $16, $7, $8 +; MIPS2-NEXT: bnez $10, $BB5_9 +; MIPS2-NEXT: addiu $19, $zero, 0 +; MIPS2-NEXT: # %bb.8: # %entry +; MIPS2-NEXT: move $19, $16 ; MIPS2-NEXT: $BB5_9: # %entry -; MIPS2-NEXT: sllv $25, $7, $8 -; MIPS2-NEXT: bnez $10, $BB5_11 -; MIPS2-NEXT: addiu $17, $zero, 0 +; MIPS2-NEXT: addiu $7, $zero, 63 +; MIPS2-NEXT: sltiu $24, $11, 64 +; MIPS2-NEXT: bnez $24, $BB5_20 +; MIPS2-NEXT: sltiu $15, $11, 1 ; MIPS2-NEXT: # %bb.10: # %entry -; MIPS2-NEXT: move $17, $25 -; MIPS2-NEXT: $BB5_11: # %entry -; MIPS2-NEXT: addiu $1, $zero, 63 -; MIPS2-NEXT: sltiu $15, $8, 1 ; MIPS2-NEXT: beqz $15, $BB5_21 -; MIPS2-NEXT: sltu $16, $1, $8 -; MIPS2-NEXT: # %bb.12: # %entry -; MIPS2-NEXT: beqz $16, $BB5_22 +; MIPS2-NEXT: sltu $11, $7, $11 +; MIPS2-NEXT: $BB5_11: # %entry +; MIPS2-NEXT: beqz $11, $BB5_22 ; MIPS2-NEXT: addiu $7, $zero, 0 -; MIPS2-NEXT: $BB5_13: # %entry +; MIPS2-NEXT: $BB5_12: # %entry ; MIPS2-NEXT: beqz $10, $BB5_23 ; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: beqz $16, $BB5_24 +; MIPS2-NEXT: $BB5_13: # %entry +; MIPS2-NEXT: beqz $11, $BB5_24 ; MIPS2-NEXT: addiu $6, $zero, 0 -; MIPS2-NEXT: $BB5_15: # %entry +; MIPS2-NEXT: $BB5_14: # %entry ; MIPS2-NEXT: beqz $10, $BB5_25 ; MIPS2-NEXT: addiu $8, $zero, 0 +; MIPS2-NEXT: $BB5_15: # %entry +; MIPS2-NEXT: beqz $13, $BB5_26 +; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: beqz $12, $BB5_26 +; MIPS2-NEXT: bnez $24, $BB5_27 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_17: # %entry -; MIPS2-NEXT: bnez $14, $BB5_27 +; MIPS2-NEXT: bnez $15, $BB5_19 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_18: # %entry -; MIPS2-NEXT: bnez $15, $BB5_20 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_19: # %entry ; MIPS2-NEXT: move $5, $2 -; MIPS2-NEXT: $BB5_20: # %entry +; MIPS2-NEXT: $BB5_19: # %entry ; MIPS2-NEXT: move $2, $4 ; MIPS2-NEXT: move $3, $5 ; MIPS2-NEXT: move $4, $6 ; MIPS2-NEXT: move $5, $7 ; MIPS2-NEXT: lw $16, 0($sp) # 4-byte Folded Reload ; MIPS2-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MIPS2-NEXT: lw $18, 8($sp) # 4-byte Folded Reload +; MIPS2-NEXT: lw $19, 12($sp) # 4-byte Folded Reload ; MIPS2-NEXT: jr $ra -; MIPS2-NEXT: addiu $sp, $sp, 8 +; MIPS2-NEXT: addiu $sp, $sp, 16 +; MIPS2-NEXT: $BB5_20: +; MIPS2-NEXT: or $18, $17, $gp +; MIPS2-NEXT: bnez $15, $BB5_11 +; MIPS2-NEXT: sltu $11, $7, $11 ; MIPS2-NEXT: $BB5_21: # %entry -; MIPS2-NEXT: move $4, $gp -; MIPS2-NEXT: bnez $16, $BB5_13 +; MIPS2-NEXT: move $4, $18 +; MIPS2-NEXT: bnez $11, $BB5_12 ; MIPS2-NEXT: addiu $7, $zero, 0 ; MIPS2-NEXT: $BB5_22: # %entry -; MIPS2-NEXT: bnez $10, $BB5_14 -; MIPS2-NEXT: move $7, $17 +; MIPS2-NEXT: bnez $10, $BB5_13 +; MIPS2-NEXT: move $7, $19 ; MIPS2-NEXT: $BB5_23: # %entry ; MIPS2-NEXT: sllv $1, $6, $8 -; MIPS2-NEXT: srlv $6, $24, $13 -; MIPS2-NEXT: or $25, $1, $6 -; MIPS2-NEXT: bnez $16, $BB5_15 +; MIPS2-NEXT: srlv $6, $25, $14 +; MIPS2-NEXT: or $16, $1, $6 +; MIPS2-NEXT: bnez $11, $BB5_14 ; MIPS2-NEXT: addiu $6, $zero, 0 ; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: move $6, $25 -; MIPS2-NEXT: bnez $10, $BB5_16 +; MIPS2-NEXT: move $6, $16 +; MIPS2-NEXT: bnez $10, $BB5_15 ; MIPS2-NEXT: addiu $8, $zero, 0 ; MIPS2-NEXT: $BB5_25: # %entry -; MIPS2-NEXT: bnez $12, $BB5_17 +; MIPS2-NEXT: bnez $13, $BB5_16 ; MIPS2-NEXT: move $8, $9 ; MIPS2-NEXT: $BB5_26: # %entry -; MIPS2-NEXT: beqz $14, $BB5_18 -; MIPS2-NEXT: move $2, $11 +; MIPS2-NEXT: beqz $24, $BB5_17 +; MIPS2-NEXT: move $2, $12 ; MIPS2-NEXT: $BB5_27: -; MIPS2-NEXT: bnez $15, $BB5_20 +; MIPS2-NEXT: bnez $15, $BB5_19 ; MIPS2-NEXT: or $2, $8, $3 ; MIPS2-NEXT: # %bb.28: -; MIPS2-NEXT: b $BB5_19 +; MIPS2-NEXT: b $BB5_18 ; MIPS2-NEXT: nop ; ; MIPS32-LABEL: shl_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $8, 28($sp) +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 16, -4 +; MIPS32-NEXT: lw $8, 36($sp) ; MIPS32-NEXT: addiu $1, $zero, 64 ; MIPS32-NEXT: subu $1, $1, $8 -; MIPS32-NEXT: srlv $9, $6, $1 -; MIPS32-NEXT: andi $10, $1, 32 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: movn $2, $zero, $10 -; MIPS32-NEXT: sllv $3, $4, $8 -; MIPS32-NEXT: not $11, $8 -; MIPS32-NEXT: srl $12, $5, 1 -; MIPS32-NEXT: srlv $12, $12, $11 -; MIPS32-NEXT: or $3, $3, $12 -; MIPS32-NEXT: sllv $12, $5, $8 -; MIPS32-NEXT: andi $13, $8, 32 -; MIPS32-NEXT: movn $3, $12, $13 -; MIPS32-NEXT: addiu $14, $8, -64 -; MIPS32-NEXT: or $15, $3, $2 -; MIPS32-NEXT: sllv $2, $6, $14 -; MIPS32-NEXT: srl $24, $7, 1 -; MIPS32-NEXT: not $3, $14 -; MIPS32-NEXT: srlv $3, $24, $3 +; MIPS32-NEXT: andi $9, $1, 63 +; MIPS32-NEXT: srlv $10, $6, $9 +; MIPS32-NEXT: andi $11, $1, 32 +; MIPS32-NEXT: andi $12, $8, 63 +; MIPS32-NEXT: move $1, $10 +; MIPS32-NEXT: movn $1, $zero, $11 +; MIPS32-NEXT: sllv $2, $4, $12 +; MIPS32-NEXT: srl $3, $5, 1 +; MIPS32-NEXT: not $13, $12 +; MIPS32-NEXT: srlv $3, $3, $13 ; MIPS32-NEXT: or $2, $2, $3 -; MIPS32-NEXT: sllv $3, $7, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $2, $3, $14 -; MIPS32-NEXT: sltiu $25, $8, 64 -; MIPS32-NEXT: movn $2, $15, $25 -; MIPS32-NEXT: srlv $15, $7, $1 -; MIPS32-NEXT: not $1, $1 -; MIPS32-NEXT: sll $gp, $6, 1 -; MIPS32-NEXT: sllv $1, $gp, $1 -; MIPS32-NEXT: or $15, $1, $15 -; MIPS32-NEXT: sllv $1, $6, $8 -; MIPS32-NEXT: srlv $6, $24, $11 +; MIPS32-NEXT: sllv $14, $5, $12 +; MIPS32-NEXT: andi $15, $8, 32 +; MIPS32-NEXT: movn $2, $14, $15 +; MIPS32-NEXT: addiu $24, $8, -64 +; MIPS32-NEXT: andi $3, $24, 63 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sllv $2, $6, $3 +; MIPS32-NEXT: srl $25, $7, 1 +; MIPS32-NEXT: not $gp, $3 +; MIPS32-NEXT: srlv $gp, $25, $gp +; MIPS32-NEXT: or $2, $2, $gp +; MIPS32-NEXT: sllv $3, $7, $3 +; MIPS32-NEXT: andi $24, $24, 32 +; MIPS32-NEXT: movn $2, $3, $24 +; MIPS32-NEXT: sltiu $gp, $8, 64 +; MIPS32-NEXT: movn $2, $1, $gp +; MIPS32-NEXT: srlv $1, $7, $9 +; MIPS32-NEXT: not $9, $9 +; MIPS32-NEXT: sll $16, $6, 1 +; MIPS32-NEXT: sllv $9, $16, $9 +; MIPS32-NEXT: or $9, $9, $1 +; MIPS32-NEXT: sllv $1, $6, $12 +; MIPS32-NEXT: srlv $6, $25, $13 ; MIPS32-NEXT: or $1, $1, $6 -; MIPS32-NEXT: sllv $6, $7, $8 -; MIPS32-NEXT: movn $1, $6, $13 +; MIPS32-NEXT: sllv $6, $7, $12 +; MIPS32-NEXT: movn $1, $6, $15 ; MIPS32-NEXT: movz $2, $4, $8 -; MIPS32-NEXT: movz $1, $zero, $25 -; MIPS32-NEXT: movn $15, $9, $10 -; MIPS32-NEXT: movn $12, $zero, $13 -; MIPS32-NEXT: or $4, $12, $15 -; MIPS32-NEXT: movn $3, $zero, $14 -; MIPS32-NEXT: movn $3, $4, $25 +; MIPS32-NEXT: movz $1, $zero, $gp +; MIPS32-NEXT: movn $9, $10, $11 +; MIPS32-NEXT: movn $14, $zero, $15 +; MIPS32-NEXT: or $4, $14, $9 +; MIPS32-NEXT: movn $3, $zero, $24 +; MIPS32-NEXT: movn $3, $4, $gp ; MIPS32-NEXT: movz $3, $5, $8 -; MIPS32-NEXT: movn $6, $zero, $13 -; MIPS32-NEXT: movz $6, $zero, $25 +; MIPS32-NEXT: movn $6, $zero, $15 +; MIPS32-NEXT: movz $6, $zero, $gp ; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: move $5, $6 +; MIPS32-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 8 ; ; MIPS32R2-LABEL: shl_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $8, 28($sp) +; MIPS32R2-NEXT: addiu $sp, $sp, -8 +; MIPS32R2-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R2-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32R2-NEXT: .cfi_offset 16, -4 +; MIPS32R2-NEXT: lw $8, 36($sp) ; MIPS32R2-NEXT: addiu $1, $zero, 64 ; MIPS32R2-NEXT: subu $1, $1, $8 -; MIPS32R2-NEXT: srlv $9, $6, $1 -; MIPS32R2-NEXT: andi $10, $1, 32 -; MIPS32R2-NEXT: move $2, $9 -; MIPS32R2-NEXT: movn $2, $zero, $10 -; MIPS32R2-NEXT: sllv $3, $4, $8 -; MIPS32R2-NEXT: not $11, $8 -; MIPS32R2-NEXT: srl $12, $5, 1 -; MIPS32R2-NEXT: srlv $12, $12, $11 -; MIPS32R2-NEXT: or $3, $3, $12 -; MIPS32R2-NEXT: sllv $12, $5, $8 -; MIPS32R2-NEXT: andi $13, $8, 32 -; MIPS32R2-NEXT: movn $3, $12, $13 -; MIPS32R2-NEXT: addiu $14, $8, -64 -; MIPS32R2-NEXT: or $15, $3, $2 -; MIPS32R2-NEXT: sllv $2, $6, $14 -; MIPS32R2-NEXT: srl $24, $7, 1 -; MIPS32R2-NEXT: not $3, $14 -; MIPS32R2-NEXT: srlv $3, $24, $3 +; MIPS32R2-NEXT: andi $9, $1, 63 +; MIPS32R2-NEXT: srlv $10, $6, $9 +; MIPS32R2-NEXT: andi $11, $1, 32 +; MIPS32R2-NEXT: andi $12, $8, 63 +; MIPS32R2-NEXT: move $1, $10 +; MIPS32R2-NEXT: movn $1, $zero, $11 +; MIPS32R2-NEXT: sllv $2, $4, $12 +; MIPS32R2-NEXT: srl $3, $5, 1 +; MIPS32R2-NEXT: not $13, $12 +; MIPS32R2-NEXT: srlv $3, $3, $13 ; MIPS32R2-NEXT: or $2, $2, $3 -; MIPS32R2-NEXT: sllv $3, $7, $14 -; MIPS32R2-NEXT: andi $14, $14, 32 -; MIPS32R2-NEXT: movn $2, $3, $14 -; MIPS32R2-NEXT: sltiu $25, $8, 64 -; MIPS32R2-NEXT: movn $2, $15, $25 -; MIPS32R2-NEXT: srlv $15, $7, $1 -; MIPS32R2-NEXT: not $1, $1 -; MIPS32R2-NEXT: sll $gp, $6, 1 -; MIPS32R2-NEXT: sllv $1, $gp, $1 -; MIPS32R2-NEXT: or $15, $1, $15 -; MIPS32R2-NEXT: sllv $1, $6, $8 -; MIPS32R2-NEXT: srlv $6, $24, $11 +; MIPS32R2-NEXT: sllv $14, $5, $12 +; MIPS32R2-NEXT: andi $15, $8, 32 +; MIPS32R2-NEXT: movn $2, $14, $15 +; MIPS32R2-NEXT: addiu $24, $8, -64 +; MIPS32R2-NEXT: andi $3, $24, 63 +; MIPS32R2-NEXT: or $1, $2, $1 +; MIPS32R2-NEXT: sllv $2, $6, $3 +; MIPS32R2-NEXT: srl $25, $7, 1 +; MIPS32R2-NEXT: not $gp, $3 +; MIPS32R2-NEXT: srlv $gp, $25, $gp +; MIPS32R2-NEXT: or $2, $2, $gp +; MIPS32R2-NEXT: sllv $3, $7, $3 +; MIPS32R2-NEXT: andi $24, $24, 32 +; MIPS32R2-NEXT: movn $2, $3, $24 +; MIPS32R2-NEXT: sltiu $gp, $8, 64 +; MIPS32R2-NEXT: movn $2, $1, $gp +; MIPS32R2-NEXT: srlv $1, $7, $9 +; MIPS32R2-NEXT: not $9, $9 +; MIPS32R2-NEXT: sll $16, $6, 1 +; MIPS32R2-NEXT: sllv $9, $16, $9 +; MIPS32R2-NEXT: or $9, $9, $1 +; MIPS32R2-NEXT: sllv $1, $6, $12 +; MIPS32R2-NEXT: srlv $6, $25, $13 ; MIPS32R2-NEXT: or $1, $1, $6 -; MIPS32R2-NEXT: sllv $6, $7, $8 -; MIPS32R2-NEXT: movn $1, $6, $13 +; MIPS32R2-NEXT: sllv $6, $7, $12 +; MIPS32R2-NEXT: movn $1, $6, $15 ; MIPS32R2-NEXT: movz $2, $4, $8 -; MIPS32R2-NEXT: movz $1, $zero, $25 -; MIPS32R2-NEXT: movn $15, $9, $10 -; MIPS32R2-NEXT: movn $12, $zero, $13 -; MIPS32R2-NEXT: or $4, $12, $15 -; MIPS32R2-NEXT: movn $3, $zero, $14 -; MIPS32R2-NEXT: movn $3, $4, $25 +; MIPS32R2-NEXT: movz $1, $zero, $gp +; MIPS32R2-NEXT: movn $9, $10, $11 +; MIPS32R2-NEXT: movn $14, $zero, $15 +; MIPS32R2-NEXT: or $4, $14, $9 +; MIPS32R2-NEXT: movn $3, $zero, $24 +; MIPS32R2-NEXT: movn $3, $4, $gp ; MIPS32R2-NEXT: movz $3, $5, $8 -; MIPS32R2-NEXT: movn $6, $zero, $13 -; MIPS32R2-NEXT: movz $6, $zero, $25 +; MIPS32R2-NEXT: movn $6, $zero, $15 +; MIPS32R2-NEXT: movz $6, $zero, $gp ; MIPS32R2-NEXT: move $4, $1 -; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: move $5, $6 +; MIPS32R2-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 8 ; ; MIPS32R6-LABEL: shl_i128: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $3, 28($sp) -; MIPS32R6-NEXT: sllv $1, $4, $3 -; MIPS32R6-NEXT: not $2, $3 +; MIPS32R6-NEXT: addiu $sp, $sp, -8 +; MIPS32R6-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32R6-NEXT: .cfi_offset 16, -4 +; MIPS32R6-NEXT: lw $3, 36($sp) +; MIPS32R6-NEXT: andi $1, $3, 63 +; MIPS32R6-NEXT: sllv $2, $4, $1 ; MIPS32R6-NEXT: srl $8, $5, 1 -; MIPS32R6-NEXT: srlv $8, $8, $2 -; MIPS32R6-NEXT: or $1, $1, $8 -; MIPS32R6-NEXT: sllv $8, $5, $3 -; MIPS32R6-NEXT: andi $9, $3, 32 -; MIPS32R6-NEXT: seleqz $1, $1, $9 -; MIPS32R6-NEXT: selnez $10, $8, $9 -; MIPS32R6-NEXT: addiu $11, $zero, 64 -; MIPS32R6-NEXT: subu $11, $11, $3 -; MIPS32R6-NEXT: srlv $12, $6, $11 -; MIPS32R6-NEXT: andi $13, $11, 32 -; MIPS32R6-NEXT: seleqz $14, $12, $13 -; MIPS32R6-NEXT: or $1, $10, $1 -; MIPS32R6-NEXT: selnez $10, $12, $13 -; MIPS32R6-NEXT: srlv $12, $7, $11 -; MIPS32R6-NEXT: not $11, $11 -; MIPS32R6-NEXT: sll $15, $6, 1 -; MIPS32R6-NEXT: sllv $11, $15, $11 -; MIPS32R6-NEXT: or $11, $11, $12 -; MIPS32R6-NEXT: seleqz $11, $11, $13 -; MIPS32R6-NEXT: addiu $12, $3, -64 -; MIPS32R6-NEXT: or $10, $10, $11 -; MIPS32R6-NEXT: or $1, $1, $14 -; MIPS32R6-NEXT: sllv $11, $6, $12 -; MIPS32R6-NEXT: srl $13, $7, 1 -; MIPS32R6-NEXT: not $14, $12 -; MIPS32R6-NEXT: srlv $14, $13, $14 -; MIPS32R6-NEXT: or $11, $11, $14 -; MIPS32R6-NEXT: andi $14, $12, 32 -; MIPS32R6-NEXT: seleqz $11, $11, $14 -; MIPS32R6-NEXT: sllv $12, $7, $12 -; MIPS32R6-NEXT: selnez $15, $12, $14 -; MIPS32R6-NEXT: sltiu $24, $3, 64 -; MIPS32R6-NEXT: selnez $1, $1, $24 -; MIPS32R6-NEXT: or $11, $15, $11 -; MIPS32R6-NEXT: sllv $6, $6, $3 -; MIPS32R6-NEXT: srlv $2, $13, $2 -; MIPS32R6-NEXT: seleqz $8, $8, $9 -; MIPS32R6-NEXT: or $8, $8, $10 -; MIPS32R6-NEXT: or $6, $6, $2 -; MIPS32R6-NEXT: seleqz $2, $11, $24 -; MIPS32R6-NEXT: seleqz $10, $zero, $24 -; MIPS32R6-NEXT: sllv $7, $7, $3 -; MIPS32R6-NEXT: seleqz $11, $7, $9 -; MIPS32R6-NEXT: selnez $11, $11, $24 +; MIPS32R6-NEXT: not $9, $1 +; MIPS32R6-NEXT: srlv $8, $8, $9 +; MIPS32R6-NEXT: or $2, $2, $8 +; MIPS32R6-NEXT: addiu $8, $zero, 64 +; MIPS32R6-NEXT: subu $8, $8, $3 +; MIPS32R6-NEXT: andi $10, $8, 63 +; MIPS32R6-NEXT: andi $11, $3, 32 +; MIPS32R6-NEXT: seleqz $2, $2, $11 +; MIPS32R6-NEXT: not $12, $10 +; MIPS32R6-NEXT: sll $13, $6, 1 +; MIPS32R6-NEXT: srlv $14, $7, $10 +; MIPS32R6-NEXT: sllv $12, $13, $12 +; MIPS32R6-NEXT: sllv $13, $5, $1 +; MIPS32R6-NEXT: selnez $15, $13, $11 +; MIPS32R6-NEXT: srlv $10, $6, $10 +; MIPS32R6-NEXT: andi $8, $8, 32 +; MIPS32R6-NEXT: seleqz $24, $10, $8 +; MIPS32R6-NEXT: or $2, $15, $2 +; MIPS32R6-NEXT: or $12, $12, $14 +; MIPS32R6-NEXT: selnez $10, $10, $8 +; MIPS32R6-NEXT: seleqz $8, $12, $8 +; MIPS32R6-NEXT: sllv $12, $7, $1 +; MIPS32R6-NEXT: srl $14, $7, 1 +; MIPS32R6-NEXT: addiu $15, $3, -64 +; MIPS32R6-NEXT: andi $25, $15, 63 +; MIPS32R6-NEXT: not $gp, $25 +; MIPS32R6-NEXT: or $2, $2, $24 +; MIPS32R6-NEXT: sllv $24, $6, $25 +; MIPS32R6-NEXT: srlv $gp, $14, $gp +; MIPS32R6-NEXT: seleqz $16, $12, $11 +; MIPS32R6-NEXT: sllv $1, $6, $1 +; MIPS32R6-NEXT: srlv $6, $14, $9 +; MIPS32R6-NEXT: sltiu $9, $3, 64 +; MIPS32R6-NEXT: or $8, $10, $8 +; MIPS32R6-NEXT: or $6, $1, $6 +; MIPS32R6-NEXT: seleqz $10, $zero, $9 +; MIPS32R6-NEXT: selnez $1, $16, $9 ; MIPS32R6-NEXT: seleqz $4, $4, $3 -; MIPS32R6-NEXT: or $1, $1, $2 -; MIPS32R6-NEXT: selnez $1, $1, $3 -; MIPS32R6-NEXT: or $2, $4, $1 -; MIPS32R6-NEXT: or $1, $10, $11 -; MIPS32R6-NEXT: seleqz $4, $6, $9 -; MIPS32R6-NEXT: selnez $6, $7, $9 +; MIPS32R6-NEXT: selnez $2, $2, $9 +; MIPS32R6-NEXT: or $14, $24, $gp +; MIPS32R6-NEXT: andi $15, $15, 32 +; MIPS32R6-NEXT: seleqz $14, $14, $15 +; MIPS32R6-NEXT: sllv $7, $7, $25 +; MIPS32R6-NEXT: selnez $24, $7, $15 +; MIPS32R6-NEXT: or $14, $24, $14 +; MIPS32R6-NEXT: seleqz $14, $14, $9 +; MIPS32R6-NEXT: or $2, $2, $14 +; MIPS32R6-NEXT: selnez $2, $2, $3 +; MIPS32R6-NEXT: or $2, $4, $2 +; MIPS32R6-NEXT: or $1, $10, $1 +; MIPS32R6-NEXT: seleqz $4, $6, $11 +; MIPS32R6-NEXT: selnez $6, $12, $11 ; MIPS32R6-NEXT: seleqz $5, $5, $3 -; MIPS32R6-NEXT: selnez $7, $8, $24 -; MIPS32R6-NEXT: seleqz $8, $12, $14 -; MIPS32R6-NEXT: seleqz $8, $8, $24 -; MIPS32R6-NEXT: or $7, $7, $8 +; MIPS32R6-NEXT: seleqz $11, $13, $11 +; MIPS32R6-NEXT: or $8, $11, $8 +; MIPS32R6-NEXT: selnez $8, $8, $9 +; MIPS32R6-NEXT: seleqz $7, $7, $15 +; MIPS32R6-NEXT: seleqz $7, $7, $9 +; MIPS32R6-NEXT: or $7, $8, $7 ; MIPS32R6-NEXT: selnez $3, $7, $3 ; MIPS32R6-NEXT: or $3, $5, $3 ; MIPS32R6-NEXT: or $4, $6, $4 -; MIPS32R6-NEXT: selnez $4, $4, $24 +; MIPS32R6-NEXT: selnez $4, $4, $9 ; MIPS32R6-NEXT: or $4, $10, $4 -; MIPS32R6-NEXT: jr $ra ; MIPS32R6-NEXT: move $5, $1 +; MIPS32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: addiu $sp, $sp, 8 ; ; MIPS3-LABEL: shl_i128: ; MIPS3: # %bb.0: # %entry @@ -844,171 +881,196 @@ ; ; MMR3-LABEL: shl_i128: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: addiusp -40 -; MMR3-NEXT: .cfi_def_cfa_offset 40 -; MMR3-NEXT: swp $16, 32($sp) +; MMR3-NEXT: addiusp -48 +; MMR3-NEXT: .cfi_def_cfa_offset 48 +; MMR3-NEXT: swp $16, 40($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 -; MMR3-NEXT: move $17, $7 -; MMR3-NEXT: sw $7, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $7, $6 +; MMR3-NEXT: sw $7, 32($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $6, 36($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MMR3-NEXT: move $1, $4 -; MMR3-NEXT: lw $16, 68($sp) +; MMR3-NEXT: lw $7, 76($sp) ; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $6, $2, $16 -; MMR3-NEXT: srlv $9, $7, $6 -; MMR3-NEXT: andi16 $4, $6, 32 -; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MMR3-NEXT: subu16 $2, $2, $7 +; MMR3-NEXT: andi16 $3, $2, 63 +; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: srlv $9, $6, $3 +; MMR3-NEXT: andi16 $4, $2, 32 +; MMR3-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $16, $7, 63 +; MMR3-NEXT: move $6, $7 ; MMR3-NEXT: li16 $3, 0 ; MMR3-NEXT: move $2, $9 ; MMR3-NEXT: movn $2, $3, $4 -; MMR3-NEXT: sllv $3, $1, $16 -; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $4, $16 -; MMR3-NEXT: sw $4, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $4, $1, $16 ; MMR3-NEXT: srl16 $3, $5, 1 -; MMR3-NEXT: srlv $3, $3, $4 -; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: not16 $7, $16 +; MMR3-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MMR3-NEXT: srlv $3, $3, $7 ; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $5, $5, $16 -; MMR3-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $4, $16, 32 -; MMR3-NEXT: sw $4, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $3, $5, $4 -; MMR3-NEXT: addiu $4, $16, -64 +; MMR3-NEXT: sllv $7, $5, $16 +; MMR3-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $4, $6, 32 +; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $3, $7, $4 +; MMR3-NEXT: addiu $4, $6, -64 +; MMR3-NEXT: sw $4, 0($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $5, $4, 63 ; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: sllv $2, $7, $4 -; MMR3-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: srl16 $5, $17, 1 -; MMR3-NEXT: not16 $2, $4 -; MMR3-NEXT: srlv $2, $5, $2 -; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $2, $17 +; MMR3-NEXT: lw $2, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $2, $2, $5 +; MMR3-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $4, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: srl16 $6, $4, 1 +; MMR3-NEXT: not16 $2, $5 +; MMR3-NEXT: srlv $2, $6, $2 ; MMR3-NEXT: lw $17, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $8, $17, $4 -; MMR3-NEXT: andi16 $4, $4, 32 -; MMR3-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $8, $4 -; MMR3-NEXT: sltiu $10, $16, 64 +; MMR3-NEXT: or16 $2, $17 +; MMR3-NEXT: sllv $8, $4, $5 +; MMR3-NEXT: lw $5, 0($sp) # 4-byte Folded Reload +; MMR3-NEXT: andi16 $5, $5, 32 +; MMR3-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $2, $8, $5 +; MMR3-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: sltiu $10, $5, 64 ; MMR3-NEXT: movn $2, $3, $10 -; MMR3-NEXT: srlv $4, $17, $6 -; MMR3-NEXT: not16 $3, $6 -; MMR3-NEXT: sll16 $6, $7, 1 -; MMR3-NEXT: sllv $3, $6, $3 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $6, $7, $16 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $4, $5, $4 -; MMR3-NEXT: or16 $4, $6 -; MMR3-NEXT: sllv $6, $17, $16 -; MMR3-NEXT: lw $17, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $4, $6, $17 -; MMR3-NEXT: movz $2, $1, $16 -; MMR3-NEXT: li16 $5, 0 -; MMR3-NEXT: movz $4, $5, $10 -; MMR3-NEXT: lw $7, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $9, $7 +; MMR3-NEXT: lw $3, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $17, $4, $3 +; MMR3-NEXT: not16 $3, $3 +; MMR3-NEXT: lw $5, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $4, $5, 1 +; MMR3-NEXT: sllv $3, $4, $3 +; MMR3-NEXT: or16 $3, $17 +; MMR3-NEXT: sllv $17, $5, $16 +; MMR3-NEXT: lw $4, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $4, $6, $4 +; MMR3-NEXT: or16 $4, $17 +; MMR3-NEXT: lw $5, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $6, $5, $16 ; MMR3-NEXT: lw $5, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $5, $7, $17 -; MMR3-NEXT: or16 $5, $3 -; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $8, $7, $3 -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $8, $5, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $4, $6, $5 +; MMR3-NEXT: lw $16, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $2, $1, $16 +; MMR3-NEXT: li16 $17, 0 +; MMR3-NEXT: movz $4, $17, $10 +; MMR3-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $3, $9, $17 +; MMR3-NEXT: li16 $17, 0 +; MMR3-NEXT: movn $7, $17, $5 +; MMR3-NEXT: or16 $7, $3 +; MMR3-NEXT: lw $3, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $8, $17, $3 +; MMR3-NEXT: li16 $17, 0 +; MMR3-NEXT: movn $8, $7, $10 +; MMR3-NEXT: lw $3, 24($sp) # 4-byte Folded Reload ; MMR3-NEXT: movz $8, $3, $16 -; MMR3-NEXT: movn $6, $7, $17 +; MMR3-NEXT: movn $6, $17, $5 ; MMR3-NEXT: li16 $3, 0 ; MMR3-NEXT: movz $6, $3, $10 ; MMR3-NEXT: move $3, $8 ; MMR3-NEXT: move $5, $6 -; MMR3-NEXT: lwp $16, 32($sp) -; MMR3-NEXT: addiusp 40 +; MMR3-NEXT: lwp $16, 40($sp) +; MMR3-NEXT: addiusp 48 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: shl_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -16 -; MMR6-NEXT: .cfi_def_cfa_offset 16 -; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 8($sp) # 4-byte Folded Spill +; MMR6-NEXT: addiu $sp, $sp, -32 +; MMR6-NEXT: .cfi_def_cfa_offset 32 +; MMR6-NEXT: sw $17, 28($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $16, 24($sp) # 4-byte Folded Spill ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 -; MMR6-NEXT: move $11, $4 -; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: sllv $1, $4, $3 -; MMR6-NEXT: not16 $2, $3 -; MMR6-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $7, 16($sp) # 4-byte Folded Spill +; MMR6-NEXT: move $7, $6 +; MMR6-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MMR6-NEXT: move $1, $4 +; MMR6-NEXT: lw $3, 60($sp) +; MMR6-NEXT: andi16 $2, $3, 63 +; MMR6-NEXT: sllv $4, $4, $2 ; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $8, $16, $2 -; MMR6-NEXT: or $1, $1, $8 -; MMR6-NEXT: sllv $8, $5, $3 +; MMR6-NEXT: not16 $5, $2 +; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill +; MMR6-NEXT: srlv $5, $16, $5 +; MMR6-NEXT: or $8, $4, $5 +; MMR6-NEXT: li16 $5, 64 +; MMR6-NEXT: subu16 $5, $5, $3 +; MMR6-NEXT: andi16 $4, $5, 63 ; MMR6-NEXT: andi16 $16, $3, 32 -; MMR6-NEXT: seleqz $1, $1, $16 -; MMR6-NEXT: selnez $9, $8, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $17, $17, $3 -; MMR6-NEXT: srlv $10, $6, $17 -; MMR6-NEXT: andi16 $2, $17, 32 -; MMR6-NEXT: seleqz $12, $10, $2 -; MMR6-NEXT: or $1, $9, $1 -; MMR6-NEXT: selnez $9, $10, $2 -; MMR6-NEXT: srlv $10, $7, $17 -; MMR6-NEXT: not16 $17, $17 -; MMR6-NEXT: sll16 $4, $6, 1 -; MMR6-NEXT: sllv $4, $4, $17 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: seleqz $2, $4, $2 -; MMR6-NEXT: addiu $4, $3, -64 -; MMR6-NEXT: or $10, $9, $2 -; MMR6-NEXT: or $1, $1, $12 -; MMR6-NEXT: sllv $9, $6, $4 -; MMR6-NEXT: srl16 $2, $7, 1 -; MMR6-NEXT: not16 $17, $4 -; MMR6-NEXT: srlv $12, $2, $17 -; MMR6-NEXT: or $9, $9, $12 -; MMR6-NEXT: andi16 $17, $4, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: sllv $14, $7, $4 -; MMR6-NEXT: selnez $12, $14, $17 -; MMR6-NEXT: sltiu $13, $3, 64 -; MMR6-NEXT: selnez $1, $1, $13 -; MMR6-NEXT: or $9, $12, $9 -; MMR6-NEXT: sllv $6, $6, $3 -; MMR6-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $2, $2, $4 ; MMR6-NEXT: seleqz $8, $8, $16 -; MMR6-NEXT: li16 $4, 0 -; MMR6-NEXT: or $8, $8, $10 -; MMR6-NEXT: or $6, $6, $2 -; MMR6-NEXT: seleqz $2, $9, $13 -; MMR6-NEXT: seleqz $9, $4, $13 -; MMR6-NEXT: sllv $7, $7, $3 -; MMR6-NEXT: seleqz $10, $7, $16 -; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: seleqz $11, $11, $3 -; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: selnez $1, $1, $3 -; MMR6-NEXT: or $2, $11, $1 -; MMR6-NEXT: or $1, $9, $10 +; MMR6-NEXT: not16 $6, $4 +; MMR6-NEXT: sw $6, 8($sp) # 4-byte Folded Spill +; MMR6-NEXT: sll16 $6, $7, 1 +; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: lw $6, 16($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $9, $6, $4 +; MMR6-NEXT: lw $17, 8($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $7, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $13, $7, $17 +; MMR6-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $10, $17, $2 +; MMR6-NEXT: selnez $11, $10, $16 +; MMR6-NEXT: lw $7, 0($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $4, $7, $4 +; MMR6-NEXT: andi16 $5, $5, 32 +; MMR6-NEXT: seleqz $12, $4, $5 +; MMR6-NEXT: or $8, $11, $8 +; MMR6-NEXT: or $11, $13, $9 +; MMR6-NEXT: selnez $9, $4, $5 +; MMR6-NEXT: seleqz $13, $11, $5 +; MMR6-NEXT: sllv $11, $6, $2 +; MMR6-NEXT: srl16 $17, $6, 1 +; MMR6-NEXT: addiu $5, $3, -64 +; MMR6-NEXT: andi16 $4, $5, 63 +; MMR6-NEXT: not16 $6, $4 +; MMR6-NEXT: or $8, $8, $12 +; MMR6-NEXT: sllv $12, $7, $4 +; MMR6-NEXT: srlv $24, $17, $6 +; MMR6-NEXT: seleqz $14, $11, $16 +; MMR6-NEXT: sllv $2, $7, $2 +; MMR6-NEXT: lw $6, 12($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $6, $17, $6 +; MMR6-NEXT: sltiu $15, $3, 64 +; MMR6-NEXT: li16 $17, 0 +; MMR6-NEXT: or $9, $9, $13 +; MMR6-NEXT: or $6, $2, $6 +; MMR6-NEXT: seleqz $13, $17, $15 +; MMR6-NEXT: selnez $14, $14, $15 +; MMR6-NEXT: seleqz $1, $1, $3 +; MMR6-NEXT: selnez $2, $8, $15 +; MMR6-NEXT: or $7, $12, $24 +; MMR6-NEXT: andi16 $5, $5, 32 +; MMR6-NEXT: seleqz $7, $7, $5 +; MMR6-NEXT: lw $17, 16($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $4, $17, $4 +; MMR6-NEXT: selnez $8, $4, $5 +; MMR6-NEXT: or $7, $8, $7 +; MMR6-NEXT: seleqz $7, $7, $15 +; MMR6-NEXT: or $2, $2, $7 +; MMR6-NEXT: selnez $2, $2, $3 +; MMR6-NEXT: or $2, $1, $2 +; MMR6-NEXT: or $1, $13, $14 ; MMR6-NEXT: seleqz $6, $6, $16 -; MMR6-NEXT: selnez $7, $7, $16 -; MMR6-NEXT: seleqz $5, $5, $3 -; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: seleqz $4, $14, $17 -; MMR6-NEXT: seleqz $4, $4, $13 -; MMR6-NEXT: or $4, $8, $4 +; MMR6-NEXT: selnez $7, $11, $16 +; MMR6-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; MMR6-NEXT: seleqz $8, $17, $3 +; MMR6-NEXT: seleqz $10, $10, $16 +; MMR6-NEXT: or $9, $10, $9 +; MMR6-NEXT: selnez $9, $9, $15 +; MMR6-NEXT: seleqz $4, $4, $5 +; MMR6-NEXT: seleqz $4, $4, $15 +; MMR6-NEXT: or $4, $9, $4 ; MMR6-NEXT: selnez $3, $4, $3 -; MMR6-NEXT: or $3, $5, $3 +; MMR6-NEXT: or $3, $8, $3 ; MMR6-NEXT: or $4, $7, $6 -; MMR6-NEXT: selnez $4, $4, $13 -; MMR6-NEXT: or $4, $9, $4 +; MMR6-NEXT: selnez $4, $4, $15 +; MMR6-NEXT: or $4, $13, $4 ; MMR6-NEXT: move $5, $1 -; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 16 +; MMR6-NEXT: lw $16, 24($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 32 ; MMR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -145,81 +145,84 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: sll t0, a5, a6 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: andi a6, t0, 63 +; RV32I-NEXT: addi t1, a6, -32 ; RV32I-NEXT: bltz t1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: sll t4, a5, t1 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: sll a7, a4, t0 +; RV32I-NEXT: xori a6, a6, 31 ; RV32I-NEXT: srli t2, a5, 1 -; RV32I-NEXT: srl a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 +; RV32I-NEXT: srl a6, t2, a6 +; RV32I-NEXT: or t4, a7, a6 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw t5, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: lw t6, 4(a1) +; RV32I-NEXT: andi s0, a2, 63 +; RV32I-NEXT: addi a6, s0, -32 ; RV32I-NEXT: bgez a6, .LBB6_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: srl a7, t6, a2 +; RV32I-NEXT: or t4, t4, a7 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz t4, .LBB6_7 +; RV32I-NEXT: addi t2, a2, -64 +; RV32I-NEXT: andi t3, t2, 63 +; RV32I-NEXT: addi t5, t3, -32 +; RV32I-NEXT: bltz t5, .LBB6_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: bgeu a2, a3, .LBB6_8 ; RV32I-NEXT: j .LBB6_9 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a7, a4, t3 +; RV32I-NEXT: srl a7, a4, t2 ; RV32I-NEXT: bltu a2, a3, .LBB6_9 ; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: mv t4, a7 ; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: beqz a2, .LBB6_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: mv a7, t4 ; RV32I-NEXT: .LBB6_11: ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t2, a2, 31 +; RV32I-NEXT: xori t4, s0, 31 ; RV32I-NEXT: bltz a6, .LBB6_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t5, t5, a6 +; RV32I-NEXT: srl t6, t6, a6 ; RV32I-NEXT: bltz t1, .LBB6_14 ; RV32I-NEXT: j .LBB6_15 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl t6, a1, a2 -; RV32I-NEXT: slli t5, t5, 1 -; RV32I-NEXT: sll t5, t5, t2 -; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: srl s0, a1, a2 +; RV32I-NEXT: slli t6, t6, 1 +; RV32I-NEXT: sll t6, t6, t4 +; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: bgez t1, .LBB6_15 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: or t5, t5, t0 +; RV32I-NEXT: sll t0, a5, t0 +; RV32I-NEXT: or t6, t6, t0 ; RV32I-NEXT: .LBB6_15: ; RV32I-NEXT: slli t0, a4, 1 -; RV32I-NEXT: bltz t4, .LBB6_17 +; RV32I-NEXT: bltz t5, .LBB6_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: srl t1, a4, t4 +; RV32I-NEXT: srl t1, a4, t5 ; RV32I-NEXT: bgeu a2, a3, .LBB6_18 ; RV32I-NEXT: j .LBB6_19 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: srl t1, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 +; RV32I-NEXT: srl t1, a5, t2 +; RV32I-NEXT: xori t2, t3, 31 +; RV32I-NEXT: sll t2, t0, t2 +; RV32I-NEXT: or t1, t1, t2 ; RV32I-NEXT: bltu a2, a3, .LBB6_19 ; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: mv t6, t1 ; RV32I-NEXT: .LBB6_19: ; RV32I-NEXT: bnez a2, .LBB6_22 ; RV32I-NEXT: # %bb.20: @@ -229,11 +232,11 @@ ; RV32I-NEXT: bgeu a2, a3, .LBB6_24 ; RV32I-NEXT: j .LBB6_25 ; RV32I-NEXT: .LBB6_22: -; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: mv a1, t6 ; RV32I-NEXT: bgez a6, .LBB6_21 ; RV32I-NEXT: .LBB6_23: ; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: sll t0, t0, t4 ; RV32I-NEXT: or a5, a5, t0 ; RV32I-NEXT: bltu a2, a3, .LBB6_25 ; RV32I-NEXT: .LBB6_24: @@ -254,6 +257,8 @@ ; RV32I-NEXT: sw a5, 8(a0) ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a7, 4(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -281,37 +286,38 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t2, a7, a2 -; RV32I-NEXT: sll t1, a5, a6 +; RV32I-NEXT: sub t1, a3, a2 +; RV32I-NEXT: andi a6, t1, 63 +; RV32I-NEXT: addi t2, a6, -32 ; RV32I-NEXT: bltz t2, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: sll t4, a5, t2 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: sll a7, a4, t1 +; RV32I-NEXT: xori a6, a6, 31 ; RV32I-NEXT: srli t0, a5, 1 -; RV32I-NEXT: srl a7, t0, a7 -; RV32I-NEXT: or t4, a6, a7 +; RV32I-NEXT: srl a6, t0, a6 +; RV32I-NEXT: or t4, a7, a6 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: lw t6, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: lw s0, 4(a1) +; RV32I-NEXT: andi s1, a2, 63 +; RV32I-NEXT: addi a6, s1, -32 ; RV32I-NEXT: bgez a6, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t6, a2 +; RV32I-NEXT: srl a7, s0, a2 ; RV32I-NEXT: or t4, t4, a7 ; RV32I-NEXT: .LBB7_5: ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi t5, a2, -96 +; RV32I-NEXT: andi t5, t3, 63 +; RV32I-NEXT: addi t6, t5, -32 ; RV32I-NEXT: srai a7, a4, 31 -; RV32I-NEXT: bltz t5, .LBB7_7 +; RV32I-NEXT: bltz t6, .LBB7_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: bgeu a2, a3, .LBB7_8 @@ -322,41 +328,42 @@ ; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: mv t4, t0 ; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: mv t0, t6 +; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: beqz a2, .LBB7_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t0, t4 ; RV32I-NEXT: .LBB7_11: ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t4, a2, 31 +; RV32I-NEXT: xori t4, s1, 31 ; RV32I-NEXT: bltz a6, .LBB7_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t6, t6, a6 +; RV32I-NEXT: srl s0, s0, a6 ; RV32I-NEXT: bltz t2, .LBB7_14 ; RV32I-NEXT: j .LBB7_15 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl s0, a1, a2 -; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t4 -; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: srl s1, a1, a2 +; RV32I-NEXT: slli s0, s0, 1 +; RV32I-NEXT: sll s0, s0, t4 +; RV32I-NEXT: or s0, s1, s0 ; RV32I-NEXT: bgez t2, .LBB7_15 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: or t6, t6, t1 +; RV32I-NEXT: sll t1, a5, t1 +; RV32I-NEXT: or s0, s0, t1 ; RV32I-NEXT: .LBB7_15: ; RV32I-NEXT: slli t1, a4, 1 -; RV32I-NEXT: bltz t5, .LBB7_17 +; RV32I-NEXT: bltz t6, .LBB7_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sra t2, a4, t5 +; RV32I-NEXT: sra t2, a4, t6 ; RV32I-NEXT: bgeu a2, a3, .LBB7_18 ; RV32I-NEXT: j .LBB7_19 ; RV32I-NEXT: .LBB7_17: ; RV32I-NEXT: srl t2, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 +; RV32I-NEXT: xori t3, t5, 31 ; RV32I-NEXT: sll t3, t1, t3 ; RV32I-NEXT: or t2, t2, t3 ; RV32I-NEXT: bltu a2, a3, .LBB7_19 ; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: mv t6, t2 +; RV32I-NEXT: mv s0, t2 ; RV32I-NEXT: .LBB7_19: ; RV32I-NEXT: bnez a2, .LBB7_22 ; RV32I-NEXT: # %bb.20: @@ -366,7 +373,7 @@ ; RV32I-NEXT: bgeu a2, a3, .LBB7_24 ; RV32I-NEXT: j .LBB7_25 ; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: mv a1, t6 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: bgez a6, .LBB7_21 ; RV32I-NEXT: .LBB7_23: ; RV32I-NEXT: srl a5, a5, a2 @@ -392,6 +399,7 @@ ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw t0, 4(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -418,81 +426,84 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: srl t0, a5, a6 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: andi a6, t0, 63 +; RV32I-NEXT: addi t1, a6, -32 ; RV32I-NEXT: bltz t1, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: srl t4, a5, t1 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: srl a7, a4, t0 +; RV32I-NEXT: xori a6, a6, 31 ; RV32I-NEXT: slli t2, a5, 1 -; RV32I-NEXT: sll a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 +; RV32I-NEXT: sll a6, t2, a6 +; RV32I-NEXT: or t4, a7, a6 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: lw t5, 8(a1) -; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: lw t6, 8(a1) +; RV32I-NEXT: andi s0, a2, 63 +; RV32I-NEXT: addi a6, s0, -32 ; RV32I-NEXT: bgez a6, .LBB8_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: sll a7, t6, a2 +; RV32I-NEXT: or t4, t4, a7 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz t4, .LBB8_7 +; RV32I-NEXT: addi t2, a2, -64 +; RV32I-NEXT: andi t3, t2, 63 +; RV32I-NEXT: addi t5, t3, -32 +; RV32I-NEXT: bltz t5, .LBB8_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: bgeu a2, a3, .LBB8_8 ; RV32I-NEXT: j .LBB8_9 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: sll a7, a4, t3 +; RV32I-NEXT: sll a7, a4, t2 ; RV32I-NEXT: bltu a2, a3, .LBB8_9 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: mv t4, a7 ; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: beqz a2, .LBB8_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: mv a7, t4 ; RV32I-NEXT: .LBB8_11: ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori t2, a2, 31 +; RV32I-NEXT: xori t4, s0, 31 ; RV32I-NEXT: bltz a6, .LBB8_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sll t5, t5, a6 +; RV32I-NEXT: sll t6, t6, a6 ; RV32I-NEXT: bltz t1, .LBB8_14 ; RV32I-NEXT: j .LBB8_15 ; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll t6, a1, a2 -; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t5, t5, t2 -; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: sll s0, a1, a2 +; RV32I-NEXT: srli t6, t6, 1 +; RV32I-NEXT: srl t6, t6, t4 +; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: bgez t1, .LBB8_15 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: or t5, t5, t0 +; RV32I-NEXT: srl t0, a5, t0 +; RV32I-NEXT: or t6, t6, t0 ; RV32I-NEXT: .LBB8_15: ; RV32I-NEXT: srli t0, a4, 1 -; RV32I-NEXT: bltz t4, .LBB8_17 +; RV32I-NEXT: bltz t5, .LBB8_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll t1, a4, t4 +; RV32I-NEXT: sll t1, a4, t5 ; RV32I-NEXT: bgeu a2, a3, .LBB8_18 ; RV32I-NEXT: j .LBB8_19 ; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: sll t1, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: srl t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 +; RV32I-NEXT: sll t1, a5, t2 +; RV32I-NEXT: xori t2, t3, 31 +; RV32I-NEXT: srl t2, t0, t2 +; RV32I-NEXT: or t1, t1, t2 ; RV32I-NEXT: bltu a2, a3, .LBB8_19 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: mv t6, t1 ; RV32I-NEXT: .LBB8_19: ; RV32I-NEXT: bnez a2, .LBB8_22 ; RV32I-NEXT: # %bb.20: @@ -502,11 +513,11 @@ ; RV32I-NEXT: bgeu a2, a3, .LBB8_24 ; RV32I-NEXT: j .LBB8_25 ; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: mv a1, t6 ; RV32I-NEXT: bgez a6, .LBB8_21 ; RV32I-NEXT: .LBB8_23: ; RV32I-NEXT: sll a5, a5, a2 -; RV32I-NEXT: srl t0, t0, t2 +; RV32I-NEXT: srl t0, t0, t4 ; RV32I-NEXT: or a5, a5, t0 ; RV32I-NEXT: bltu a2, a3, .LBB8_25 ; RV32I-NEXT: .LBB8_24: @@ -527,6 +538,8 @@ ; RV32I-NEXT: sw a5, 4(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: sw a7, 8(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: diff --git a/llvm/test/CodeGen/SystemZ/shift-12.ll b/llvm/test/CodeGen/SystemZ/shift-12.ll --- a/llvm/test/CodeGen/SystemZ/shift-12.ll +++ b/llvm/test/CodeGen/SystemZ/shift-12.ll @@ -122,25 +122,21 @@ define i128 @f11(i128 %a, i32 %sh) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r14, %r15, 112(%r15) -; CHECK-NEXT: .cfi_offset %r14, -48 -; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 8(%r3) -; CHECK-NEXT: lg %r1, 0(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: sllg %r5, %r1, 0(%r4) -; CHECK-NEXT: srlg %r14, %r0, 0(%r14) -; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: sllg %r3, %r0, -64(%r3) +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: srlg %r3, %r1, 0(%r3) +; CHECK-NEXT: sllg %r5, %r0, 0(%r4) +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: sllg %r5, %r1, -64(%r4) ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: sllg %r0, %r0, 0(%r4) -; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r5, %r3 +; CHECK-NEXT: locgre %r5, %r0 +; CHECK-NEXT: sllg %r0, %r1, 0(%r4) ; CHECK-NEXT: locghinle %r0, 0 ; CHECK-NEXT: stg %r0, 8(%r2) -; CHECK-NEXT: stg %r3, 0(%r2) -; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: stg %r5, 0(%r2) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 %ext = zext i32 %and to i128 @@ -151,25 +147,21 @@ define i128 @f12(i128 %a, i32 %sh) { ; CHECK-LABEL: f12: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r14, %r15, 112(%r15) -; CHECK-NEXT: .cfi_offset %r14, -48 -; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 0(%r3) -; CHECK-NEXT: lg %r1, 8(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: srlg %r5, %r1, 0(%r4) -; CHECK-NEXT: sllg %r14, %r0, 0(%r14) -; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: srlg %r3, %r0, -64(%r3) +; CHECK-NEXT: lg %r0, 8(%r3) +; CHECK-NEXT: lg %r1, 0(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: sllg %r3, %r1, 0(%r3) +; CHECK-NEXT: srlg %r5, %r0, 0(%r4) +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: srlg %r5, %r1, -64(%r4) ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: srlg %r0, %r0, 0(%r4) -; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r5, %r3 +; CHECK-NEXT: locgre %r5, %r0 +; CHECK-NEXT: srlg %r0, %r1, 0(%r4) ; CHECK-NEXT: locghinle %r0, 0 ; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: stg %r3, 8(%r2) -; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: stg %r5, 8(%r2) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 %ext = zext i32 %and to i128 @@ -183,22 +175,22 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 0(%r3) -; CHECK-NEXT: lg %r1, 8(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: srlg %r5, %r1, 0(%r4) -; CHECK-NEXT: sllg %r14, %r0, 0(%r14) -; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: srag %r14, %r0, 0(%r4) -; CHECK-NEXT: srag %r3, %r0, -64(%r3) -; CHECK-NEXT: srag %r0, %r0, 63 +; CHECK-NEXT: lg %r0, 8(%r3) +; CHECK-NEXT: lg %r1, 0(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: sllg %r3, %r1, 0(%r3) +; CHECK-NEXT: srlg %r5, %r0, 0(%r4) +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: srag %r5, %r1, 0(%r4) +; CHECK-NEXT: srag %r14, %r1, -64(%r4) +; CHECK-NEXT: srag %r1, %r1, 63 ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: locgre %r3, %r1 -; CHECK-NEXT: locgrle %r0, %r14 -; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: locgrle %r14, %r3 +; CHECK-NEXT: locgre %r14, %r0 +; CHECK-NEXT: locgrle %r1, %r5 +; CHECK-NEXT: stg %r1, 0(%r2) +; CHECK-NEXT: stg %r14, 8(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127