Index: llvm/lib/Target/AVR/AVR.h =================================================================== --- llvm/lib/Target/AVR/AVR.h +++ llvm/lib/Target/AVR/AVR.h @@ -78,6 +78,19 @@ return false; } +// Get the index of the program memory bank. +// -1: not program memory +// 0: ordinary program memory +// 1~5: extended program memory +inline int getProgramMemoryBank(MemSDNode const *N) { + auto *V = N->getMemOperand()->getValue(); + if (V == nullptr || !isProgramMemoryAddress(V)) + return -1; + AddressSpace AS = getAddressSpace(V); + assert(ProgramMemory <= AS && AS <= ProgramMemory5); + return static_cast(AS - ProgramMemory); +} + } // end of namespace AVR } // end namespace llvm Index: llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -101,6 +101,9 @@ bool expandLSLW12Rd(Block &MBB, BlockIt MBBI); bool expandLSRW12Rd(Block &MBB, BlockIt MBBI); + // Common implementation of LPMWRdZ and ELPMWRdZ. + bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); + /// Scavenges a free GPR8 register for use. Register scavengeGPR8(MachineInstr &MI); }; @@ -808,18 +811,51 @@ return true; } -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { +bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); Register TmpReg = 0; // 0 for no temporary register Register SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned OpLo = AVR::LPMRdZPi; - unsigned OpHi = AVR::LPMRdZ; + unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi; + unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); + // Set the I/O register RAMPZ for ELPM. + if (IsExt) { + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); + Register TmpReg = scavengeGPR8(MI); + int Bank = MI.getOperand(2).getImm(); + assert((1 <= Bank && Bank <= 5) && + "unexpected extended program memory bank"); + if (TmpReg > AVR::R15) { // LDIRdK only support Rd in range R16~R31. + // ldi rtmp, bank + buildMI(MBB, MBBI, AVR::LDIRdK) + .addReg(TmpReg, RegState::Define) + .addImm(Bank); + // out RAMPZ, rtmp + buildMI(MBB, MBBI, AVR::OUTARr) + .addImm(STI.getIORegRAMPZ()) + .addReg(TmpReg, RegState::Kill); + } else { + // mov rtmp, r30 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(TmpReg, RegState::Define) + .addReg(AVR::R30); + // ldi r30, bank + buildMI(MBB, MBBI, AVR::LDIRdK).addReg(AVR::R30).addImm(Bank); + // out RAMPZ, r30 + buildMI(MBB, MBBI, AVR::OUTARr) + .addImm(STI.getIORegRAMPZ()) + .addReg(AVR::R30); + // mov r30, rtmp + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(AVR::R30) + .addReg(TmpReg, RegState::Kill); + } + } + // Use a temporary register if src and dst registers are the same. if (DstReg == SrcReg) TmpReg = scavengeGPR8(MI); @@ -856,9 +892,79 @@ return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMWELPMW(MBB, MBBI, false); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMWELPMW(MBB, MBBI, true); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register TmpReg = scavengeGPR8(MI); + bool SrcIsKill = MI.getOperand(1).isKill(); + int Bank = MI.getOperand(2).getImm(); + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); + + assert((1 <= Bank && Bank <= 5) && "unexpected extended program memory bank"); + + // Set the I/O register RAMPZ for ELPM. + if (TmpReg > AVR::R15) { // LDIRdK only support Rd in range R16~R31. + // ldi rtmp, bank + buildMI(MBB, MBBI, AVR::LDIRdK) + .addReg(TmpReg, RegState::Define) + .addImm(Bank); + // out RAMPZ, rtmp + buildMI(MBB, MBBI, AVR::OUTARr) + .addImm(STI.getIORegRAMPZ()) + .addReg(TmpReg, RegState::Kill); + } else { + // mov rtmp, r30 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(TmpReg, RegState::Define) + .addReg(AVR::R30); + // ldi r30, bank + buildMI(MBB, MBBI, AVR::LDIRdK).addReg(AVR::R30).addImm(Bank); + // out RAMPZ, r30 + buildMI(MBB, MBBI, AVR::OUTARr) + .addImm(STI.getIORegRAMPZ()) + .addReg(AVR::R30); + // mov r30, rtmp + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(AVR::R30) + .addReg(TmpReg, RegState::Kill); + } + + // Load byte. + auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ) + .addReg(DstReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + + MILB.setMemRefs(MI.memoperands()); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - llvm_unreachable("wide LPMPi is unimplemented"); + llvm_unreachable("16-bit LPMPi is unimplemented"); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + llvm_unreachable("byte ELPMPi is unimplemented"); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + llvm_unreachable("16-bit ELPMPi is unimplemented"); } template @@ -2158,6 +2264,10 @@ EXPAND(AVR::LDDWRdPtrQ); EXPAND(AVR::LPMWRdZ); EXPAND(AVR::LPMWRdZPi); + EXPAND(AVR::ELPMBRdZ); + EXPAND(AVR::ELPMWRdZ); + EXPAND(AVR::ELPMBRdZPi); + EXPAND(AVR::ELPMWRdZPi); EXPAND(AVR::AtomicLoad8); EXPAND(AVR::AtomicLoad16); EXPAND(AVR::AtomicStore8); Index: llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -38,7 +38,7 @@ bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp); bool selectIndexedLoad(SDNode *N); - unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT); + unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, int Bank); bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode, std::vector &OutOps) override; @@ -165,24 +165,28 @@ return true; } -unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, - MVT VT) { +unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, + int Bank) { // Progmem indexed loads only work in POSTINC mode. if (LD->getExtensionType() != ISD::NON_EXTLOAD || LD->getAddressingMode() != ISD::POST_INC) return 0; + // Feature ELPM is needed for loading from extended program memory. + assert((Bank == 0 || Subtarget->hasELPM()) && + "cannot load from extended program memory on this mcu"); + unsigned Opcode = 0; int Offs = cast(LD->getOffset())->getSExtValue(); switch (VT.SimpleTy) { case MVT::i8: if (Offs == 1) - Opcode = AVR::LPMRdZPi; + Opcode = Bank > 0 ? AVR::ELPMBRdZPi : AVR::LPMRdZPi; break; case MVT::i16: if (Offs == 2) - Opcode = AVR::LPMWRdZPi; + Opcode = Bank > 0 ? AVR::ELPMWRdZPi : AVR::LPMWRdZPi; break; default: break; @@ -354,6 +358,12 @@ assert(Subtarget->hasLPM() && "cannot load from program memory on this mcu"); + // For subtargets with LPM but without ELPM, program memory bank 1~5 fall + // back to 0. + int ProgMemBank = AVR::getProgramMemoryBank(LD); + if (!Subtarget->hasELPM() && ProgMemBank > 0) + ProgMemBank = 0; + // This is a flash memory load, move the pointer into R31R30 and emit // the lpm instruction. MVT VT = LD->getMemoryVT().getSimpleVT(); @@ -361,25 +371,39 @@ SDValue Ptr = LD->getBasePtr(); SDNode *ResNode; SDLoc DL(N); + SDValue RZ = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8); Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Ptr, SDValue()); Ptr = CurDAG->getCopyFromReg(Chain, DL, AVR::R31R30, MVT::i16, Chain.getValue(1)); // Check if the opcode can be converted into an indexed load. - if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) { + if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT, ProgMemBank)) { // It is legal to fold the load into an indexed load. - ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr); + if (ProgMemBank == 0) + ResNode = + CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr); + else + ResNode = + CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RZ); } else { // Selecting an indexed load is not legal, fallback to a normal load. switch (VT.SimpleTy) { case MVT::i8: - ResNode = - CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr); + if (ProgMemBank == 0) + ResNode = + CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr); + else + ResNode = CurDAG->getMachineNode(AVR::ELPMBRdZ, DL, MVT::i8, MVT::Other, + Ptr, RZ); break; case MVT::i16: - ResNode = - CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, Ptr); + if (ProgMemBank == 0) + ResNode = + CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, Ptr); + else + ResNode = CurDAG->getMachineNode(AVR::ELPMWRdZ, DL, MVT::i16, + MVT::Other, Ptr, RZ); break; default: llvm_unreachable("Unsupported VT!"); Index: llvm/lib/Target/AVR/AVRInstrInfo.td =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.td +++ llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1699,21 +1699,34 @@ : F16<0b1001010111011000, (outs), (ins), "elpm", []>, Requires<[HasELPM]>; - def ELPMRdZ : FLPMX<1, 0, - (outs GPR8 - : $dst), - (ins ZREG - : $z), + def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), (ins ZREG:$z), "elpm\t$dst, $z", []>, Requires<[HasELPMX]>; - let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1, - (outs GPR8 - : $dst), - (ins ZREG - : $z), - "elpm\t$dst, $z+", []>, - Requires<[HasELPMX]>; + let Defs = [R31R30] in { + def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), (ins ZREG:$z), + "elpm\t$dst, $z+", []>, + Requires<[HasELPMX]>; + } + + // These pseudos are combination of the OUT and ELPM instructions. + let Defs = [R31R30], hasSideEffects = 1 in { + def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, imm_port6:$p), + "elpmb\t$dst, $z, $p", []>, + Requires<[HasELPMX]>; + + def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, imm_port6:$p), + "elpmw\t$dst, $z, $p", []>, + Requires<[HasELPMX]>; + + def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, imm_port6:$p), + "elpmb\t$dst, $z+, $p", []>, + Requires<[HasELPMX]>; + + def ELPMWRdZPi : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, imm_port6:$p), + "elpmw\t$dst, $z+, $p", []>, + Requires<[HasELPMX]>; + } } // Store program memory operations. Index: llvm/lib/Target/AVR/AVRSubtarget.h =================================================================== --- llvm/lib/Target/AVR/AVRSubtarget.h +++ llvm/lib/Target/AVR/AVRSubtarget.h @@ -91,6 +91,11 @@ return ELFArch; } + /// Get I/O register address. + int getIORegRAMPZ(void) const { + return 0x3b; + } + private: /// The ELF e_flags architecture. unsigned ELFArch; Index: llvm/test/CodeGen/AVR/elpm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AVR/elpm.ll @@ -0,0 +1,403 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=avr --mcpu=atmega2560 | FileCheck %s -check-prefix=CHECK-AVR5 +; RUN: llc < %s -mtriple=avr --mcpu=at90s8515 | FileCheck %s -check-prefix=CHECK-AVR2 + +@arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 24, i16 56, i16 37], align 1 +@arr1 = addrspace(2) constant [4 x i16] [i16 123, i16 34, i16 46, i16 27], align 1 +@arr2 = addrspace(3) constant [4 x i16] [i16 123, i16 23, i16 45, i16 17], align 1 + +define i16 @foo0(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foo0: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: lsl r22 +; CHECK-AVR5-NEXT: rol r23 +; CHECK-AVR5-NEXT: subi r22, -lo8(arr0) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arr0) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: lpm r18, Z+ +; CHECK-AVR5-NEXT: lpm r19, Z +; CHECK-AVR5-NEXT: lsl r24 +; CHECK-AVR5-NEXT: rol r25 +; CHECK-AVR5-NEXT: subi r24, -lo8(arr0) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arr0) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: lpm r24, Z+ +; CHECK-AVR5-NEXT: lpm r25, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: sbc r25, r19 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foo0: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: lsl r22 +; CHECK-AVR2-NEXT: rol r23 +; CHECK-AVR2-NEXT: subi r22, -lo8(arr0) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arr0) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z+ +; CHECK-AVR2-NEXT: lpm r19, Z +; CHECK-AVR2-NEXT: lsl r24 +; CHECK-AVR2-NEXT: rol r25 +; CHECK-AVR2-NEXT: subi r24, -lo8(arr0) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arr0) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z+ +; CHECK-AVR2-NEXT: lpm r25, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: sbc r25, r19 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a + %0 = load i16, i16 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b + %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo1(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foo1: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: lsl r22 +; CHECK-AVR5-NEXT: rol r23 +; CHECK-AVR5-NEXT: subi r22, -lo8(arr1) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arr1) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: ldi r20, 1 +; CHECK-AVR5-NEXT: out 59, r20 +; CHECK-AVR5-NEXT: elpm r18, Z+ +; CHECK-AVR5-NEXT: elpm r19, Z +; CHECK-AVR5-NEXT: lsl r24 +; CHECK-AVR5-NEXT: rol r25 +; CHECK-AVR5-NEXT: subi r24, -lo8(arr0) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arr0) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: lpm r24, Z+ +; CHECK-AVR5-NEXT: lpm r25, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: sbc r25, r19 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foo1: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: lsl r22 +; CHECK-AVR2-NEXT: rol r23 +; CHECK-AVR2-NEXT: subi r22, -lo8(arr1) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arr1) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z+ +; CHECK-AVR2-NEXT: lpm r19, Z +; CHECK-AVR2-NEXT: lsl r24 +; CHECK-AVR2-NEXT: rol r25 +; CHECK-AVR2-NEXT: subi r24, -lo8(arr0) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arr0) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z+ +; CHECK-AVR2-NEXT: lpm r25, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: sbc r25, r19 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a + %0 = load i16, i16 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b + %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo2(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foo2: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: lsl r22 +; CHECK-AVR5-NEXT: rol r23 +; CHECK-AVR5-NEXT: subi r22, -lo8(arr0) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arr0) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: lpm r18, Z+ +; CHECK-AVR5-NEXT: lpm r19, Z +; CHECK-AVR5-NEXT: lsl r24 +; CHECK-AVR5-NEXT: rol r25 +; CHECK-AVR5-NEXT: subi r24, -lo8(arr2) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arr2) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: ldi r20, 2 +; CHECK-AVR5-NEXT: out 59, r20 +; CHECK-AVR5-NEXT: elpm r24, Z+ +; CHECK-AVR5-NEXT: elpm r25, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: sbc r25, r19 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foo2: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: lsl r22 +; CHECK-AVR2-NEXT: rol r23 +; CHECK-AVR2-NEXT: subi r22, -lo8(arr0) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arr0) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z+ +; CHECK-AVR2-NEXT: lpm r19, Z +; CHECK-AVR2-NEXT: lsl r24 +; CHECK-AVR2-NEXT: rol r25 +; CHECK-AVR2-NEXT: subi r24, -lo8(arr2) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arr2) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z+ +; CHECK-AVR2-NEXT: lpm r25, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: sbc r25, r19 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a + %0 = load i16, i16 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b + %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo3(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foo3: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: lsl r22 +; CHECK-AVR5-NEXT: rol r23 +; CHECK-AVR5-NEXT: subi r22, -lo8(arr1) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arr1) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: ldi r20, 1 +; CHECK-AVR5-NEXT: out 59, r20 +; CHECK-AVR5-NEXT: elpm r18, Z+ +; CHECK-AVR5-NEXT: elpm r19, Z +; CHECK-AVR5-NEXT: lsl r24 +; CHECK-AVR5-NEXT: rol r25 +; CHECK-AVR5-NEXT: subi r24, -lo8(arr2) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arr2) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: ldi r20, 2 +; CHECK-AVR5-NEXT: out 59, r20 +; CHECK-AVR5-NEXT: elpm r24, Z+ +; CHECK-AVR5-NEXT: elpm r25, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: sbc r25, r19 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foo3: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: lsl r22 +; CHECK-AVR2-NEXT: rol r23 +; CHECK-AVR2-NEXT: subi r22, -lo8(arr1) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arr1) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z+ +; CHECK-AVR2-NEXT: lpm r19, Z +; CHECK-AVR2-NEXT: lsl r24 +; CHECK-AVR2-NEXT: rol r25 +; CHECK-AVR2-NEXT: subi r24, -lo8(arr2) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arr2) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z+ +; CHECK-AVR2-NEXT: lpm r25, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: sbc r25, r19 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a + %0 = load i16, i16 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b + %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +@arrb1 = addrspace(1) constant [4 x i8] c"{\188%", align 1 +@arrb3 = addrspace(3) constant [4 x i8] c"{\22.\1B", align 1 +@arrb5 = addrspace(5) constant [4 x i8] c"{\17-\11", align 1 + +define signext i8 @foob0(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foob0: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: subi r22, -lo8(arrb1) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arrb1) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: lpm r18, Z +; CHECK-AVR5-NEXT: subi r24, -lo8(arrb1) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arrb1) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: lpm r24, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: mov r25, r24 +; CHECK-AVR5-NEXT: lsl r25 +; CHECK-AVR5-NEXT: sbc r25, r25 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foob0: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: subi r22, -lo8(arrb1) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arrb1) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z +; CHECK-AVR2-NEXT: subi r24, -lo8(arrb1) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arrb1) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: mov r25, r24 +; CHECK-AVR2-NEXT: lsl r25 +; CHECK-AVR2-NEXT: sbc r25, r25 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a + %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b + %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob1(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foob1: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: subi r22, -lo8(arrb3) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arrb3) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: ldi r19, 2 +; CHECK-AVR5-NEXT: out 59, r19 +; CHECK-AVR5-NEXT: elpm r18, Z +; CHECK-AVR5-NEXT: subi r24, -lo8(arrb1) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arrb1) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: lpm r24, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: mov r25, r24 +; CHECK-AVR5-NEXT: lsl r25 +; CHECK-AVR5-NEXT: sbc r25, r25 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foob1: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: subi r22, -lo8(arrb3) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arrb3) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z +; CHECK-AVR2-NEXT: subi r24, -lo8(arrb1) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arrb1) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: mov r25, r24 +; CHECK-AVR2-NEXT: lsl r25 +; CHECK-AVR2-NEXT: sbc r25, r25 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a + %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %b + %1 = load i8, i8 addrspace(3)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob2(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foob2: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: subi r22, -lo8(arrb1) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arrb1) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: lpm r18, Z +; CHECK-AVR5-NEXT: subi r24, -lo8(arrb5) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arrb5) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: ldi r19, 4 +; CHECK-AVR5-NEXT: out 59, r19 +; CHECK-AVR5-NEXT: elpm r24, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: mov r25, r24 +; CHECK-AVR5-NEXT: lsl r25 +; CHECK-AVR5-NEXT: sbc r25, r25 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foob2: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: subi r22, -lo8(arrb1) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arrb1) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z +; CHECK-AVR2-NEXT: subi r24, -lo8(arrb5) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arrb5) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: mov r25, r24 +; CHECK-AVR2-NEXT: lsl r25 +; CHECK-AVR2-NEXT: sbc r25, r25 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %a + %0 = load i8, i8 addrspace(5)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b + %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob3(i16 %a, i16 %b) { +; CHECK-AVR5-LABEL: foob3: +; CHECK-AVR5: ; %bb.0: ; %entry +; CHECK-AVR5-NEXT: subi r22, -lo8(arrb5) +; CHECK-AVR5-NEXT: sbci r23, -hi8(arrb5) +; CHECK-AVR5-NEXT: movw r30, r22 +; CHECK-AVR5-NEXT: ldi r19, 4 +; CHECK-AVR5-NEXT: out 59, r19 +; CHECK-AVR5-NEXT: elpm r18, Z +; CHECK-AVR5-NEXT: subi r24, -lo8(arrb3) +; CHECK-AVR5-NEXT: sbci r25, -hi8(arrb3) +; CHECK-AVR5-NEXT: movw r30, r24 +; CHECK-AVR5-NEXT: ldi r19, 2 +; CHECK-AVR5-NEXT: out 59, r19 +; CHECK-AVR5-NEXT: elpm r24, Z +; CHECK-AVR5-NEXT: sub r24, r18 +; CHECK-AVR5-NEXT: mov r25, r24 +; CHECK-AVR5-NEXT: lsl r25 +; CHECK-AVR5-NEXT: sbc r25, r25 +; CHECK-AVR5-NEXT: ret +; +; CHECK-AVR2-LABEL: foob3: +; CHECK-AVR2: ; %bb.0: ; %entry +; CHECK-AVR2-NEXT: subi r22, -lo8(arrb5) +; CHECK-AVR2-NEXT: sbci r23, -hi8(arrb5) +; CHECK-AVR2-NEXT: mov r30, r22 +; CHECK-AVR2-NEXT: mov r31, r23 +; CHECK-AVR2-NEXT: lpm r18, Z +; CHECK-AVR2-NEXT: subi r24, -lo8(arrb3) +; CHECK-AVR2-NEXT: sbci r25, -hi8(arrb3) +; CHECK-AVR2-NEXT: mov r30, r24 +; CHECK-AVR2-NEXT: mov r31, r25 +; CHECK-AVR2-NEXT: lpm r24, Z +; CHECK-AVR2-NEXT: sub r24, r18 +; CHECK-AVR2-NEXT: mov r25, r24 +; CHECK-AVR2-NEXT: lsl r25 +; CHECK-AVR2-NEXT: sbc r25, r25 +; CHECK-AVR2-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a + %0 = load i8, i8 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %b + %1 = load i8, i8 addrspace(5)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +}