Index: llvm/lib/Target/AVR/AVRISelLowering.h =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.h +++ llvm/lib/Target/AVR/AVRISelLowering.h @@ -39,14 +39,17 @@ LSLBN, ///< Byte logical shift left N bits. LSLWN, ///< Word logical shift left N bits. LSLHI, ///< Higher 8-bit of word logical shift left. + LSLW, ///< Wide logical shift left. LSR, ///< Logical shift right. LSRBN, ///< Byte logical shift right N bits. LSRWN, ///< Word logical shift right N bits. LSRLO, ///< Lower 8-bit of word logical shift right. + LSRW, ///< Wide logical shift right. ASR, ///< Arithmetic shift right. ASRBN, ///< Byte arithmetic shift right N bits. ASRWN, ///< Word arithmetic shift right N bits. ASRLO, ///< Lower 8-bit of word arithmetic shift right. + ASRW, ///< Wide arithmetic shift right. ROR, ///< Bit rotate right. ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. @@ -186,6 +189,8 @@ private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *insertWideShift(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertCopyZero(MachineInstr &MI, MachineBasicBlock *BB) const; Index: llvm/lib/Target/AVR/AVRISelLowering.cpp =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.cpp +++ llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -88,6 +88,9 @@ setOperationAction(ISD::SRA, MVT::i16, Custom); setOperationAction(ISD::SHL, MVT::i16, Custom); setOperationAction(ISD::SRL, MVT::i16, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRL, MVT::i32, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); @@ -247,10 +250,13 @@ NODE(CALL); NODE(WRAPPER); NODE(LSL); + NODE(LSLW); NODE(LSR); + NODE(LSRW); NODE(ROL); NODE(ROR); NODE(ASR); + NODE(ASRW); NODE(LSLLOOP); NODE(LSRLOOP); NODE(ROLLOOP); @@ -279,6 +285,41 @@ assert(isPowerOf2_32(VT.getSizeInBits()) && "Expected power-of-2 shift amount"); + if (VT.getSizeInBits() == 32) { + if (!isa(N->getOperand(1))) { + // 32-bit shifts are converted to a loop in IR. + // This should be unreachable. + report_fatal_error("Expected a constant shift amount!"); + } + SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16); + SDValue SrcLo = + DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), + DAG.getConstant(0, dl, MVT::i16)); + SDValue SrcHi = + DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), + DAG.getConstant(1, dl, MVT::i16)); + uint64_t ShiftAmount = + cast(N->getOperand(1))->getZExtValue(); + SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); + unsigned Opc; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Invalid 32-bit shift opcode!"); + case ISD::SHL: + Opc = AVRISD::LSLW; + break; + case ISD::SRL: + Opc = AVRISD::LSRW; + break; + case ISD::SRA: + Opc = AVRISD::ASRW; + break; + } + SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0), + Result.getValue(1)); + } + // Expand non-constant shifts to loops. if (!isa(N->getOperand(1))) { switch (Op.getOpcode()) { @@ -1789,6 +1830,114 @@ return RemBB; } +// Do a multibyte AVR shift. Insert shift instructions and put the output +// registers in the Regs array. +// Because AVR does not have a normal shift instruction (only a single bit shift +// instruction), we have to emulate this behavior with other instructions. +static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB, + MutableArrayRef> Regs, + ISD::NodeType Opc, int64_t ShiftAmt) { + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const DebugLoc &dl = MI.getDebugLoc(); + + bool ShiftLeft = Opc == ISD::SHL; + bool ArithmeticShift = Opc == ISD::SRA; + + // Shift by one. This is the fallback that always works, and the shift + // operation that is used for 1, 2, and 3 bit shifts. + while (ShiftLeft && ShiftAmt) { + // Shift one to the left. + for (ssize_t I = Regs.size() - 1; I >= 0; I--) { + Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); + Register In = Regs[I].first; + Register InSubreg = Regs[I].second; + if (I == (ssize_t)Regs.size() - 1) { // first iteration + BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out) + .addReg(In, 0, InSubreg) + .addReg(In, 0, InSubreg); + } else { + BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out) + .addReg(In, 0, InSubreg) + .addReg(In, 0, InSubreg); + } + Regs[I] = std::pair(Out, 0); + } + ShiftAmt--; + } + while (!ShiftLeft && ShiftAmt) { + // Shift one to the right. + for (size_t i = 0; i < Regs.size(); i++) { + Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); + Register In = Regs[i].first; + Register InSubreg = Regs[i].second; + if (i == 0) { + unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd; + BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); + } else { + BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg); + } + Regs[i] = std::pair(Out, 0); + } + ShiftAmt--; + } + + if (ShiftAmt != 0) { + llvm_unreachable("don't know how to shift!"); // sanity check + } +} + +// Do a wide (32-bit) shift. +MachineBasicBlock * +AVRTargetLowering::insertWideShift(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const DebugLoc &dl = MI.getDebugLoc(); + + // How much to shift to the right (meaning: a negative number indicates a left + // shift). + int64_t ShiftAmt = MI.getOperand(4).getImm(); + ISD::NodeType Opc; + switch (MI.getOpcode()) { + case AVR::Lsl32: + Opc = ISD::SHL; + break; + case AVR::Lsr32: + Opc = ISD::SRL; + break; + case AVR::Asr32: + Opc = ISD::SRA; + break; + } + + // Read the input registers, with the most significant register at index 0. + std::array, 4> Registers = { + std::pair(MI.getOperand(3).getReg(), AVR::sub_hi), + std::pair(MI.getOperand(3).getReg(), AVR::sub_lo), + std::pair(MI.getOperand(2).getReg(), AVR::sub_hi), + std::pair(MI.getOperand(2).getReg(), AVR::sub_lo), + }; + + // Do the shift. The registers are modified in-place. + insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt); + + // Combine the 8-bit registers into 16-bit register pairs. + BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) + .addReg(Registers[0].first, 0, Registers[0].second) + .addImm(AVR::sub_hi) + .addReg(Registers[1].first, 0, Registers[1].second) + .addImm(AVR::sub_lo); + BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) + .addReg(Registers[2].first, 0, Registers[2].second) + .addImm(AVR::sub_hi) + .addReg(Registers[3].first, 0, Registers[3].second) + .addImm(AVR::sub_lo); + + // Remove the pseudo instruction. + MI.eraseFromParent(); + return BB; +} + static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { if (I->getOpcode() == AVR::COPY) { Register SrcReg = I->getOperand(1).getReg(); @@ -1901,6 +2050,10 @@ case AVR::Asr8: case AVR::Asr16: return insertShift(MI, MBB); + case AVR::Lsl32: + case AVR::Lsr32: + case AVR::Asr32: + return insertWideShift(MI, MBB); case AVR::MULRdRr: case AVR::MULSRdRr: return insertMul(MI, MBB); Index: llvm/lib/Target/AVR/AVRInstrInfo.td =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.td +++ llvm/lib/Target/AVR/AVRInstrInfo.td @@ -69,6 +69,9 @@ def AVRlslwn : SDNode<"AVRISD::LSLWN", SDTIntBinOp>; def AVRlsrwn : SDNode<"AVRISD::LSRWN", SDTIntBinOp>; def AVRasrwn : SDNode<"AVRISD::ASRWN", SDTIntBinOp>; +def AVRlslw : SDNode<"AVRISD::LSLW", SDTIntShiftDOp>; +def AVRlsrw : SDNode<"AVRISD::LSRW", SDTIntShiftDOp>; +def AVRasrw : SDNode<"AVRISD::ASRW", SDTIntShiftDOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; @@ -2321,6 +2324,11 @@ : $src, i8 : $cnt))]>; +def Lsl32 : ShiftPseudo<(outs DREGS:$dstlo, DREGS:$dsthi), + (ins DREGS:$srclo, DREGS:$srchi, i8imm:$cnt), + "# Lsl32 PSEUDO", + [(set i16:$dstlo, i16:$dsthi, (AVRlslw i16:$srclo, i16:$srchi, i8:$cnt))]>; + def Lsr8 : ShiftPseudo<(outs GPR8 : $dst), (ins GPR8 @@ -2341,6 +2349,11 @@ : $src, i8 : $cnt))]>; +def Lsr32 : ShiftPseudo<(outs DREGS:$dstlo, DREGS:$dsthi), + (ins DREGS:$srclo, DREGS:$srchi, i8imm:$cnt), + "# Lsr32 PSEUDO", + [(set i16:$dstlo, i16:$dsthi, (AVRlsrw i16:$srclo, i16:$srchi, i8:$cnt))]>; + def Rol8 : ShiftPseudo<(outs GPR8 : $dst), (ins GPR8 @@ -2401,6 +2414,11 @@ : $src, i8 : $cnt))]>; +def Asr32 : ShiftPseudo<(outs DREGS:$dstlo, DREGS:$dsthi), + (ins DREGS:$srclo, DREGS:$srchi, i8imm:$cnt), + "# Asr32 PSEUDO", + [(set i16:$dstlo, i16:$dsthi, (AVRasrw i16:$srclo, i16:$srchi, i8:$cnt))]>; + // lowered to a copy from the zero register. let usesCustomInserter=1 in def CopyZero : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>; Index: llvm/test/CodeGen/AVR/avr-rust-issue-123.ll =================================================================== --- llvm/test/CodeGen/AVR/avr-rust-issue-123.ll +++ llvm/test/CodeGen/AVR/avr-rust-issue-123.ll @@ -46,10 +46,10 @@ store i8 %tmp3, i8* getelementptr inbounds (%UInt8, %UInt8* @delayFactor, i64 0, i32 0), align 1 %tmp4 = zext i8 %tmp3 to i32 %tmp5 = mul nuw nsw i32 %tmp4, 100 + ; CHECK: sts delay+1, r{{[0-9]+}} + ; CHECK-NEXT: sts delay, r{{[0-9]+}} ; CHECK: sts delay+3, r{{[0-9]+}} ; CHECK-NEXT: sts delay+2, r{{[0-9]+}} - ; CHECK-NEXT: sts delay+1, r{{[0-9]+}} - ; CHECK-NEXT: sts delay, r{{[0-9]+}} store i32 %tmp5, i32* getelementptr inbounds (%UInt32, %UInt32* @delay, i64 0, i32 0), align 4 tail call void @eeprom_write(i16 34, i8 %tmp3) br label %bb7 Index: llvm/test/CodeGen/AVR/div.ll =================================================================== --- llvm/test/CodeGen/AVR/div.ll +++ llvm/test/CodeGen/AVR/div.ll @@ -44,9 +44,7 @@ define i32 @udiv32(i32 %a, i32 %b) { ; CHECK-LABEL: udiv32: ; CHECK: call __udivmodsi4 -; CHECK-NEXT: movw r22, r18 -; CHECK-NEXT: movw r24, r20 -; CHECK-NEXT: ret +; CHECK: ret %quot = udiv i32 %a, %b ret i32 %quot } @@ -55,9 +53,7 @@ define i32 @sdiv32(i32 %a, i32 %b) { ; CHECK-LABEL: sdiv32: ; CHECK: call __divmodsi4 -; CHECK-NEXT: movw r22, r18 -; CHECK-NEXT: movw r24, r20 -; CHECK-NEXT: ret +; CHECK: ret %quot = sdiv i32 %a, %b ret i32 %quot } Index: llvm/test/CodeGen/AVR/rem.ll =================================================================== --- llvm/test/CodeGen/AVR/rem.ll +++ llvm/test/CodeGen/AVR/rem.ll @@ -42,7 +42,7 @@ define i32 @urem32(i32 %a, i32 %b) { ; CHECK-LABEL: urem32: ; CHECK: call __udivmodsi4 -; CHECK-NEXT: ret +; CHECK: ret %rem = urem i32 %a, %b ret i32 %rem } @@ -51,7 +51,7 @@ define i32 @srem32(i32 %a, i32 %b) { ; CHECK-LABEL: srem32: ; CHECK: call __divmodsi4 -; CHECK-NEXT: ret +; CHECK: ret %rem = srem i32 %a, %b ret i32 %rem } Index: llvm/test/CodeGen/AVR/return.ll =================================================================== --- llvm/test/CodeGen/AVR/return.ll +++ llvm/test/CodeGen/AVR/return.ll @@ -390,10 +390,76 @@ ; AVR-NEXT: push r29 ; AVR-NEXT: in r28, 61 ; AVR-NEXT: in r29, 62 +; AVR-NEXT: ldd r18, Y+7 +; AVR-NEXT: ldd r19, Y+8 ; AVR-NEXT: ldd r22, Y+5 ; AVR-NEXT: ldd r23, Y+6 -; AVR-NEXT: ldd r24, Y+7 -; AVR-NEXT: ldd r25, Y+8 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: mov r25, r23 +; AVR-NEXT: ror r25 +; AVR-NEXT: mov r24, r22 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 +; AVR-NEXT: lsr r19 +; AVR-NEXT: ror r18 +; AVR-NEXT: ror r25 +; AVR-NEXT: ror r24 ; AVR-NEXT: pop r29 ; AVR-NEXT: pop r28 ; AVR-NEXT: ret @@ -405,25 +471,91 @@ ; TINY-NEXT: in r28, 61 ; TINY-NEXT: in r29, 62 ; TINY-NEXT: in r16, 63 -; TINY-NEXT: subi r28, 243 +; TINY-NEXT: subi r28, 241 ; TINY-NEXT: sbci r29, 255 -; TINY-NEXT: ld r22, Y+ -; TINY-NEXT: ld r23, Y+ +; TINY-NEXT: ld r20, Y+ +; TINY-NEXT: ld r21, Y+ ; TINY-NEXT: subi r28, 2 ; TINY-NEXT: sbci r29, 0 -; TINY-NEXT: subi r28, 13 +; TINY-NEXT: subi r28, 15 ; TINY-NEXT: sbci r29, 0 ; TINY-NEXT: out 63, r16 ; TINY-NEXT: in r16, 63 -; TINY-NEXT: subi r28, 241 +; TINY-NEXT: subi r28, 243 ; TINY-NEXT: sbci r29, 255 -; TINY-NEXT: ld r24, Y+ -; TINY-NEXT: ld r25, Y+ +; TINY-NEXT: ld r22, Y+ +; TINY-NEXT: ld r23, Y+ ; TINY-NEXT: subi r28, 2 ; TINY-NEXT: sbci r29, 0 -; TINY-NEXT: subi r28, 15 +; TINY-NEXT: subi r28, 13 ; TINY-NEXT: sbci r29, 0 ; TINY-NEXT: out 63, r16 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: mov r25, r23 +; TINY-NEXT: ror r25 +; TINY-NEXT: mov r24, r22 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 +; TINY-NEXT: lsr r21 +; TINY-NEXT: ror r20 +; TINY-NEXT: ror r25 +; TINY-NEXT: ror r24 ; TINY-NEXT: pop r29 ; TINY-NEXT: pop r28 ; TINY-NEXT: ret Index: llvm/test/CodeGen/AVR/shift32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AVR/shift32.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=avr -mattr=movw -verify-machineinstrs | FileCheck %s + +; Lowering of constant 32-bit shift instructions. +; The main reason these functions are tested separate from shift.ll is because +; of update_llc_test_checks.py. + +define i32 @shl_i32_1(i32 %a) { +; CHECK-LABEL: shl_i32_1: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: ret + %res = shl i32 %a, 1 + ret i32 %res +} + +define i32 @shl_i32_2(i32 %a) { +; CHECK-LABEL: shl_i32_2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: ret + %res = shl i32 %a, 2 + ret i32 %res +} + +define i32 @lshr_i32_1(i32 %a) { +; CHECK-LABEL: lshr_i32_1: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: ret + %res = lshr i32 %a, 1 + ret i32 %res +} + +define i32 @lshr_i32_2(i32 %a) { +; CHECK-LABEL: lshr_i32_2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: ret + %res = lshr i32 %a, 2 + ret i32 %res +} + +define i32 @ashr_i32_1(i32 %a) { +; CHECK-LABEL: ashr_i32_1: +; CHECK: ; %bb.0: +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: ret + %res = ashr i32 %a, 1 + ret i32 %res +} + +define i32 @ashr_i32_2(i32 %a) { +; CHECK-LABEL: ashr_i32_2: +; CHECK: ; %bb.0: +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: ret + %res = ashr i32 %a, 2 + ret i32 %res +}