diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h --- a/llvm/lib/Target/AVR/AVR.h +++ b/llvm/lib/Target/AVR/AVR.h @@ -77,6 +77,19 @@ return false; } +// Get the index of the program memory bank. +// -1: not program memory +// 0: ordinary program memory +// 1~5: extended program memory +inline int getProgramMemoryBank(MemSDNode const *N) { + auto *V = N->getMemOperand()->getValue(); + if (V == nullptr || !isProgramMemoryAddress(V)) + return -1; + AddressSpace AS = getAddressSpace(V); + assert(ProgramMemory <= AS && AS <= ProgramMemory5); + return static_cast(AS - ProgramMemory); +} + } // end of namespace AVR } // end namespace llvm diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -102,6 +102,9 @@ bool expandLSLW12Rd(Block &MBB, BlockIt MBBI); bool expandLSRW12Rd(Block &MBB, BlockIt MBBI); + // Common implementation of LPMWRdZ and ELPMWRdZ. + bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); + /// Scavenges a free GPR8 register for use. Register scavengeGPR8(MachineInstr &MI); }; @@ -809,18 +812,25 @@ return true; } -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { +bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); Register TmpReg = 0; // 0 for no temporary register Register SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned OpLo = AVR::LPMRdZPi; - unsigned OpHi = AVR::LPMRdZ; + unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi; + unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); + // Set the I/O register RAMPZ for ELPM. + if (IsExt) { + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); + Register Bank = MI.getOperand(2).getReg(); + // out RAMPZ, rtmp + buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank); + } + // Use a temporary register if src and dst registers are the same. if (DstReg == SrcReg) TmpReg = scavengeGPR8(MI); @@ -857,9 +867,52 @@ return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMWELPMW(MBB, MBBI, false); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMWELPMW(MBB, MBBI, true); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register BankReg = MI.getOperand(2).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); + + // Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp). + buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg); + + // Load byte. + auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ) + .addReg(DstReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + + MILB.setMemRefs(MI.memoperands()); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - llvm_unreachable("wide LPMPi is unimplemented"); + llvm_unreachable("16-bit LPMPi is unimplemented"); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + llvm_unreachable("byte ELPMPi is unimplemented"); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + llvm_unreachable("16-bit ELPMPi is unimplemented"); } template @@ -2269,6 +2322,10 @@ EXPAND(AVR::LDDWRdPtrQ); EXPAND(AVR::LPMWRdZ); EXPAND(AVR::LPMWRdZPi); + EXPAND(AVR::ELPMBRdZ); + EXPAND(AVR::ELPMWRdZ); + EXPAND(AVR::ELPMBRdZPi); + EXPAND(AVR::ELPMWRdZPi); EXPAND(AVR::AtomicLoad8); EXPAND(AVR::AtomicLoad16); EXPAND(AVR::AtomicStore8); diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -38,7 +38,7 @@ bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp); bool selectIndexedLoad(SDNode *N); - unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT); + unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, int Bank); bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode, std::vector &OutOps) override; @@ -165,35 +165,31 @@ return true; } -unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, - MVT VT) { - ISD::MemIndexedMode AM = LD->getAddressingMode(); - +unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, + int Bank) { // Progmem indexed loads only work in POSTINC mode. - if (LD->getExtensionType() != ISD::NON_EXTLOAD || AM != ISD::POST_INC) { + if (LD->getExtensionType() != ISD::NON_EXTLOAD || + LD->getAddressingMode() != ISD::POST_INC) return 0; - } + + // Feature ELPM is needed for loading from extended program memory. + assert((Bank == 0 || Subtarget->hasELPM()) && + "cannot load from extended program memory on this mcu"); unsigned Opcode = 0; int Offs = cast(LD->getOffset())->getSExtValue(); switch (VT.SimpleTy) { - case MVT::i8: { - if (Offs != 1) { - return 0; - } - Opcode = AVR::LPMRdZPi; + case MVT::i8: + if (Offs == 1) + Opcode = Bank > 0 ? AVR::ELPMBRdZPi : AVR::LPMRdZPi; break; - } - case MVT::i16: { - if (Offs != 2) { - return 0; - } - Opcode = AVR::LPMWRdZPi; + case MVT::i16: + if (Offs == 2) + Opcode = Bank > 0 ? AVR::ELPMWRdZPi : AVR::LPMWRdZPi; break; - } default: - return 0; + break; } return Opcode; @@ -360,7 +356,12 @@ return selectIndexedLoad(N); } - assert(Subtarget->hasLPM() && "cannot load from program memory on this mcu"); + if (!Subtarget->hasLPM()) + report_fatal_error("cannot load from program memory on this mcu"); + + int ProgMemBank = AVR::getProgramMemoryBank(LD); + if (ProgMemBank < 0 || ProgMemBank > 5) + report_fatal_error("unexpected program memory bank"); // This is a flash memory load, move the pointer into R31R30 and emit // the lpm instruction. @@ -374,25 +375,48 @@ Ptr = CurDAG->getCopyFromReg(Chain, DL, AVR::R31R30, MVT::i16, Chain.getValue(1)); - SDValue RegZ = CurDAG->getRegister(AVR::R31R30, MVT::i16); - // Check if the opcode can be converted into an indexed load. - if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) { + if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT, ProgMemBank)) { // It is legal to fold the load into an indexed load. - ResNode = - CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RegZ); - ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1)); + if (ProgMemBank == 0) { + ResNode = + CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr); + } else { + // Do not combine the LDI instruction into the ELPM pseudo instruction, + // since it may be reused by other ELPM pseudo instructions. + SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8); + auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC); + ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, + Ptr, SDValue(NP, 0)); + } } else { // Selecting an indexed load is not legal, fallback to a normal load. switch (VT.SimpleTy) { case MVT::i8: - ResNode = CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, - Ptr, RegZ); + if (ProgMemBank == 0) { + ResNode = + CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr); + } else { + // Do not combine the LDI instruction into the ELPM pseudo instruction, + // since it may be reused by other ELPM pseudo instructions. + SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8); + auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC); + ResNode = CurDAG->getMachineNode(AVR::ELPMBRdZ, DL, MVT::i8, MVT::Other, + Ptr, SDValue(NP, 0)); + } break; case MVT::i16: - ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, - Ptr, RegZ); - ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1)); + if (ProgMemBank == 0) { + ResNode = + CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, Ptr); + } else { + // Do not combine the LDI instruction into the ELPM pseudo instruction, + // since LDI requires the destination register in range R16~R31. + SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8); + auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC); + ResNode = CurDAG->getMachineNode(AVR::ELPMWRdZ, DL, MVT::i16, + MVT::Other, Ptr, SDValue(NP, 0)); + } break; default: llvm_unreachable("Unsupported VT!"); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1702,21 +1702,34 @@ : F16<0b1001010111011000, (outs), (ins), "elpm", []>, Requires<[HasELPM]>; - def ELPMRdZ : FLPMX<1, 0, - (outs GPR8 - : $dst), - (ins ZREG - : $z), + def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), (ins ZREG:$z), "elpm\t$dst, $z", []>, Requires<[HasELPMX]>; - let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1, - (outs GPR8 - : $dst), - (ins ZREG - : $z), - "elpm\t$dst, $z+", []>, - Requires<[HasELPMX]>; + let Defs = [R31R30] in { + def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), (ins ZREG:$z), + "elpm\t$dst, $z+", []>, + Requires<[HasELPMX]>; + } + + // These pseudos are combination of the OUT and ELPM instructions. + let Defs = [R31R30], hasSideEffects = 1 in { + def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p), + "elpmb\t$dst, $z, $p", []>, + Requires<[HasELPMX]>; + + def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p), + "elpmw\t$dst, $z, $p", []>, + Requires<[HasELPMX]>; + + def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p), + "elpmb\t$dst, $z+, $p", []>, + Requires<[HasELPMX]>; + + def ELPMWRdZPi : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p), + "elpmw\t$dst, $z+, $p", []>, + Requires<[HasELPMX]>; + } } // Store program memory operations. diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -91,6 +91,9 @@ return ELFArch; } + /// Get I/O register address. + int getIORegRAMPZ(void) const { return 0x3b; } + private: /// The ELF e_flags architecture. unsigned ELFArch; diff --git a/llvm/test/CodeGen/AVR/elpm.ll b/llvm/test/CodeGen/AVR/elpm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/elpm.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=avr --mcpu=atmega2560 -verify-machineinstrs | FileCheck %s + +@arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 24, i16 56, i16 37], align 1 +@arr1 = addrspace(2) constant [4 x i16] [i16 123, i16 34, i16 46, i16 27], align 1 +@arr2 = addrspace(3) constant [4 x i16] [i16 123, i16 23, i16 45, i16 17], align 1 + +define i16 @foo0(i16 %a, i16 %b) { +; CHECK-LABEL: foo0: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: subi r22, -lo8(arr0) +; CHECK-NEXT: sbci r23, -hi8(arr0) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: lpm r18, Z+ +; CHECK-NEXT: lpm r19, Z +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: subi r24, -lo8(arr0) +; CHECK-NEXT: sbci r25, -hi8(arr0) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: lpm r24, Z+ +; CHECK-NEXT: lpm r25, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a + %0 = load i16, i16 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b + %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo1(i16 %a, i16 %b) { +; CHECK-LABEL: foo1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: subi r22, -lo8(arr1) +; CHECK-NEXT: sbci r23, -hi8(arr1) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: ldi r18, 1 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r18, Z+ +; CHECK-NEXT: elpm r19, Z +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: subi r24, -lo8(arr0) +; CHECK-NEXT: sbci r25, -hi8(arr0) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: lpm r24, Z+ +; CHECK-NEXT: lpm r25, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a + %0 = load i16, i16 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b + %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo2(i16 %a, i16 %b) { +; CHECK-LABEL: foo2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: subi r24, -lo8(arr2) +; CHECK-NEXT: sbci r25, -hi8(arr2) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: ldi r24, 2 +; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: elpm r24, Z+ +; CHECK-NEXT: elpm r25, Z +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: subi r22, -lo8(arr0) +; CHECK-NEXT: sbci r23, -hi8(arr0) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: lpm r18, Z+ +; CHECK-NEXT: lpm r19, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a + %0 = load i16, i16 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b + %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +define i16 @foo3(i16 %a, i16 %b) { +; CHECK-LABEL: foo3: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: subi r22, -lo8(arr1) +; CHECK-NEXT: sbci r23, -hi8(arr1) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: ldi r18, 1 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r18, Z+ +; CHECK-NEXT: elpm r19, Z +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: subi r24, -lo8(arr2) +; CHECK-NEXT: sbci r25, -hi8(arr2) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: ldi r24, 2 +; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: elpm r24, Z+ +; CHECK-NEXT: elpm r25, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a + %0 = load i16, i16 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b + %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1 + %sub = sub nsw i16 %0, %1 + ret i16 %sub +} + +@arrb1 = addrspace(1) constant [4 x i8] c"{\188%", align 1 +@arrb3 = addrspace(3) constant [4 x i8] c"{\22.\1B", align 1 +@arrb5 = addrspace(5) constant [4 x i8] c"{\17-\11", align 1 + +define signext i8 @foob0(i16 %a, i16 %b) { +; CHECK-LABEL: foob0: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: subi r22, -lo8(arrb1) +; CHECK-NEXT: sbci r23, -hi8(arrb1) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: lpm r18, Z +; CHECK-NEXT: subi r24, -lo8(arrb1) +; CHECK-NEXT: sbci r25, -hi8(arrb1) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: lpm r24, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a + %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b + %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob1(i16 %a, i16 %b) { +; CHECK-LABEL: foob1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: subi r22, -lo8(arrb3) +; CHECK-NEXT: sbci r23, -hi8(arrb3) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: ldi r18, 2 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r18, Z +; CHECK-NEXT: subi r24, -lo8(arrb1) +; CHECK-NEXT: sbci r25, -hi8(arrb1) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: lpm r24, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a + %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %b + %1 = load i8, i8 addrspace(3)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob2(i16 %a, i16 %b) { +; CHECK-LABEL: foob2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: subi r24, -lo8(arrb5) +; CHECK-NEXT: sbci r25, -hi8(arrb5) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: ldi r24, 4 +; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: elpm r24, Z +; CHECK-NEXT: subi r22, -lo8(arrb1) +; CHECK-NEXT: sbci r23, -hi8(arrb1) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: lpm r25, Z +; CHECK-NEXT: sub r24, r25 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %a + %0 = load i8, i8 addrspace(5)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b + %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob3(i16 %a, i16 %b) { +; CHECK-LABEL: foob3: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: subi r22, -lo8(arrb5) +; CHECK-NEXT: sbci r23, -hi8(arrb5) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: ldi r18, 4 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r18, Z +; CHECK-NEXT: subi r24, -lo8(arrb3) +; CHECK-NEXT: sbci r25, -hi8(arrb3) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: ldi r24, 2 +; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: elpm r24, Z +; CHECK-NEXT: sub r24, r18 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a + %0 = load i8, i8 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %b + %1 = load i8, i8 addrspace(5)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} + +define signext i8 @foob4(i16 %a, i16 %b) { +; CHECK-LABEL: foob4: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: subi r22, -lo8(arrb3) +; CHECK-NEXT: sbci r23, -hi8(arrb3) +; CHECK-NEXT: movw r30, r22 +; CHECK-NEXT: ldi r18, 2 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r19, Z +; CHECK-NEXT: subi r24, -lo8(arrb3) +; CHECK-NEXT: sbci r25, -hi8(arrb3) +; CHECK-NEXT: movw r30, r24 +; CHECK-NEXT: out 59, r18 +; CHECK-NEXT: elpm r24, Z +; CHECK-NEXT: sub r24, r19 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a + %0 = load i8, i8 addrspace(3)* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %b + %1 = load i8, i8 addrspace(3)* %arrayidx1, align 1 + %sub = sub i8 %0, %1 + ret i8 %sub +} diff --git a/llvm/test/CodeGen/AVR/lpmx.ll b/llvm/test/CodeGen/AVR/lpmx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/lpmx.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=avr --mcpu=atmega328 -O0 -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=CHECK-O0 %s +; RUN: llc < %s -mtriple=avr --mcpu=atmega328 -O3 -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=CHECK-O3 %s + +@arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 234, i16 456, i16 67], align 1 +@arr1 = addrspace(1) constant [4 x i8] c"ABCD", align 1 + +define i16 @foo0(i16 %a) addrspace(1) { +; CHECK-O0-LABEL: foo0: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: push r28 +; CHECK-O0-NEXT: push r29 +; CHECK-O0-NEXT: in r28, 61 +; CHECK-O0-NEXT: in r29, 62 +; CHECK-O0-NEXT: sbiw r28, 2 +; CHECK-O0-NEXT: in r0, 63 +; CHECK-O0-NEXT: cli +; CHECK-O0-NEXT: out 62, r29 +; CHECK-O0-NEXT: out 63, r0 +; CHECK-O0-NEXT: out 61, r28 +; CHECK-O0-NEXT: std Y+1, r24 +; CHECK-O0-NEXT: std Y+2, r25 +; CHECK-O0-NEXT: ldd r24, Y+1 +; CHECK-O0-NEXT: ldd r25, Y+2 +; CHECK-O0-NEXT: lsl r24 +; CHECK-O0-NEXT: rol r25 +; CHECK-O0-NEXT: subi r24, -lo8(arr0) +; CHECK-O0-NEXT: sbci r25, -hi8(arr0) +; CHECK-O0-NEXT: movw r30, r24 +; CHECK-O0-NEXT: lpm r24, Z+ +; CHECK-O0-NEXT: lpm r25, Z +; CHECK-O0-NEXT: adiw r28, 2 +; CHECK-O0-NEXT: in r0, 63 +; CHECK-O0-NEXT: cli +; CHECK-O0-NEXT: out 62, r29 +; CHECK-O0-NEXT: out 63, r0 +; CHECK-O0-NEXT: out 61, r28 +; CHECK-O0-NEXT: pop r29 +; CHECK-O0-NEXT: pop r28 +; CHECK-O0-NEXT: ret +; +; CHECK-O3-LABEL: foo0: +; CHECK-O3: ; %bb.0: ; %entry +; CHECK-O3-NEXT: push r28 +; CHECK-O3-NEXT: push r29 +; CHECK-O3-NEXT: in r28, 61 +; CHECK-O3-NEXT: in r29, 62 +; CHECK-O3-NEXT: sbiw r28, 2 +; CHECK-O3-NEXT: in r0, 63 +; CHECK-O3-NEXT: cli +; CHECK-O3-NEXT: out 62, r29 +; CHECK-O3-NEXT: out 63, r0 +; CHECK-O3-NEXT: out 61, r28 +; CHECK-O3-NEXT: std Y+1, r24 +; CHECK-O3-NEXT: std Y+2, r25 +; CHECK-O3-NEXT: lsl r24 +; CHECK-O3-NEXT: rol r25 +; CHECK-O3-NEXT: subi r24, -lo8(arr0) +; CHECK-O3-NEXT: sbci r25, -hi8(arr0) +; CHECK-O3-NEXT: movw r30, r24 +; CHECK-O3-NEXT: lpm r24, Z+ +; CHECK-O3-NEXT: lpm r25, Z +; CHECK-O3-NEXT: adiw r28, 2 +; CHECK-O3-NEXT: in r0, 63 +; CHECK-O3-NEXT: cli +; CHECK-O3-NEXT: out 62, r29 +; CHECK-O3-NEXT: out 63, r0 +; CHECK-O3-NEXT: out 61, r28 +; CHECK-O3-NEXT: pop r29 +; CHECK-O3-NEXT: pop r28 +; CHECK-O3-NEXT: ret +entry: + %a.addr = alloca i16, align 1 + store i16 %a, i16* %a.addr, align 1 + %0 = load i16, i16* %a.addr, align 1 + %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %0 + %1 = load i16, i16 addrspace(1)* %arrayidx, align 1 + ret i16 %1 +} + +define i8 @foo1(i16 %a) addrspace(1) { +; CHECK-O0-LABEL: foo1: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: push r28 +; CHECK-O0-NEXT: push r29 +; CHECK-O0-NEXT: in r28, 61 +; CHECK-O0-NEXT: in r29, 62 +; CHECK-O0-NEXT: sbiw r28, 2 +; CHECK-O0-NEXT: in r0, 63 +; CHECK-O0-NEXT: cli +; CHECK-O0-NEXT: out 62, r29 +; CHECK-O0-NEXT: out 63, r0 +; CHECK-O0-NEXT: out 61, r28 +; CHECK-O0-NEXT: std Y+1, r24 +; CHECK-O0-NEXT: std Y+2, r25 +; CHECK-O0-NEXT: ldd r24, Y+1 +; CHECK-O0-NEXT: ldd r25, Y+2 +; CHECK-O0-NEXT: subi r24, -lo8(arr1) +; CHECK-O0-NEXT: sbci r25, -hi8(arr1) +; CHECK-O0-NEXT: movw r30, r24 +; CHECK-O0-NEXT: lpm r24, Z +; CHECK-O0-NEXT: adiw r28, 2 +; CHECK-O0-NEXT: in r0, 63 +; CHECK-O0-NEXT: cli +; CHECK-O0-NEXT: out 62, r29 +; CHECK-O0-NEXT: out 63, r0 +; CHECK-O0-NEXT: out 61, r28 +; CHECK-O0-NEXT: pop r29 +; CHECK-O0-NEXT: pop r28 +; CHECK-O0-NEXT: ret +; +; CHECK-O3-LABEL: foo1: +; CHECK-O3: ; %bb.0: ; %entry +; CHECK-O3-NEXT: push r28 +; CHECK-O3-NEXT: push r29 +; CHECK-O3-NEXT: in r28, 61 +; CHECK-O3-NEXT: in r29, 62 +; CHECK-O3-NEXT: sbiw r28, 2 +; CHECK-O3-NEXT: in r0, 63 +; CHECK-O3-NEXT: cli +; CHECK-O3-NEXT: out 62, r29 +; CHECK-O3-NEXT: out 63, r0 +; CHECK-O3-NEXT: out 61, r28 +; CHECK-O3-NEXT: std Y+1, r24 +; CHECK-O3-NEXT: std Y+2, r25 +; CHECK-O3-NEXT: subi r24, -lo8(arr1) +; CHECK-O3-NEXT: sbci r25, -hi8(arr1) +; CHECK-O3-NEXT: movw r30, r24 +; CHECK-O3-NEXT: lpm r24, Z +; CHECK-O3-NEXT: adiw r28, 2 +; CHECK-O3-NEXT: in r0, 63 +; CHECK-O3-NEXT: cli +; CHECK-O3-NEXT: out 62, r29 +; CHECK-O3-NEXT: out 63, r0 +; CHECK-O3-NEXT: out 61, r28 +; CHECK-O3-NEXT: pop r29 +; CHECK-O3-NEXT: pop r28 +; CHECK-O3-NEXT: ret +entry: + %a.addr = alloca i16, align 1 + store i16 %a, i16* %a.addr, align 1 + %0 = load i16, i16* %a.addr, align 1 + %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arr1, i16 0, i16 %0 + %1 = load i8, i8 addrspace(1)* %arrayidx, align 1 + ret i8 %1 +}