diff --git a/llvm/lib/Target/ARC/ARCISelLowering.h b/llvm/lib/Target/ARC/ARCISelLowering.h --- a/llvm/lib/Target/ARC/ARCISelLowering.h +++ b/llvm/lib/Target/ARC/ARCISelLowering.h @@ -77,6 +77,9 @@ private: const ARCSubtarget &Subtarget; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + // Lower Operand helpers SDValue LowerCallArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp --- a/llvm/lib/Target/ARC/ARCISelLowering.cpp +++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp @@ -68,6 +68,31 @@ } } +void ARCTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "[ARC-ISEL] ReplaceNodeResults "); + LLVM_DEBUG(N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "; use_count=" << N->use_size() << "\n"); + + switch (N->getOpcode()) { + case ISD::READCYCLECOUNTER: + if (N->getValueType(0) == MVT::i64) { + // We read the TIMER0 and zero-extend it to 64-bits as the intrinsic + // requires. + SDValue V = + DAG.getNode(ISD::READCYCLECOUNTER, SDLoc(N), + DAG.getVTList(MVT::i32, MVT::Other), N->getOperand(0)); + SDValue Op = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i64, V); + Results.push_back(Op); + Results.push_back(V.getValue(1)); + } + break; + default: + break; + } +} + ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM, const ARCSubtarget &Subtarget) : TargetLowering(TM), Subtarget(Subtarget) { @@ -140,6 +165,10 @@ // when the HasBitScan predicate is available. setOperationAction(ISD::CTLZ, MVT::i32, Legal); setOperationAction(ISD::CTTZ, MVT::i32, Legal); + + setOperationAction(ISD::READCYCLECOUNTER, MVT::i32, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, + isTypeLegal(MVT::i64) ? Legal : Custom); } const char *ARCTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -766,6 +795,13 @@ return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::READCYCLECOUNTER: + // As of LLVM 3.8, the lowering code insists that we customize it even + // though we've declared the i32 version as legal. This is because it only + // thinks i64 is the truly supported version. We've already converted the + // i64 version to a widened i32. + assert(Op.getSimpleValueType() == MVT::i32); + return Op; default: llvm_unreachable("unimplemented operand"); } diff --git a/llvm/lib/Target/ARC/ARCInstrFormats.td b/llvm/lib/Target/ARC/ARCInstrFormats.td --- a/llvm/lib/Target/ARC/ARCInstrFormats.td +++ b/llvm/lib/Target/ARC/ARCInstrFormats.td @@ -395,6 +395,50 @@ let Inst{5-0} = S12{11-6}; } +// 1-register, signed 12-bit immediate Dual Operand instruction. +// This instruction uses B as the first operand (i.e., lr B, [%count0]). +// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0| +// |B[2-0] | 1| 0| subop| F|B[5-3] |S12[5-0] |S12[11-6] | +class F32_SOP_RS12 major, bits<6> subop, bit F, dag outs, dag ins, + string asmstr, list pattern> : + InstARC<4, outs, ins, asmstr, pattern> { + bits<6> B; + bits<12> S12; + + let Inst{31-27} = major; + let Inst{26-24} = B{2-0}; + let Inst{23-22} = 0b10; + let Inst{21-16} = subop; + let Inst{15} = F; + let Inst{14-12} = B{5-3}; + let Inst{11-6} = S12{5-0}; + let Inst{5-0} = S12{11-6}; + + let DecoderMethod = "DecodeSOPwithRS12"; +} + +// 1-register, unsigned 6-bit immediate Dual Operand instruction. +// This instruction uses B as the first operand. +// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0| +// |B[2-0] | 0| 1| subop| F|B[5-3] |U6 |0|0|0|0|0|0| +class F32_SOP_RU6 major, bits<6> subop, bit F, dag outs, dag ins, + string asmstr, list pattern> : + InstARC<4, outs, ins, asmstr, pattern> { + bits<6> B; + bits<6> U6; + + let Inst{31-27} = major; + let Inst{26-24} = B{2-0}; + let Inst{23-22} = 0b01; + let Inst{21-16} = subop; + let Inst{15} = F; + let Inst{14-12} = B{5-3}; + let Inst{11-6} = U6; + let Inst{5-0} = 0; + + let DecoderMethod = "DecodeSOPwithRU6"; +} + // 2-register, 32-bit immediate (LImm) Dual Operand instruction. // This instruction has the 32-bit immediate in bits 32-63, and // 62 in the C register operand slot, but is otherwise F32_DOP_RR. diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td --- a/llvm/lib/Target/ARC/ARCInstrInfo.td +++ b/llvm/lib/Target/ARC/ARCInstrInfo.td @@ -270,6 +270,19 @@ def _rrlimm : Pat<(InFrag i32:$B, imm32:$LImm), (RRLImm i32:$B, imm32:$LImm)>; } +// NOTE: This could be specialized later with a custom `PrintMethod` for +// displaying the aux register name. E.g. `[%count0]` instead of [33]. +def AuxReg : Operand; + +def LR_rs12 : F32_SOP_RS12<0b00100, 0b101010, 0, + (outs GPR32:$B), (ins AuxReg:$C), + "lr\t$B, [$C]", []>; +def LR_ru6 : F32_SOP_RU6<0b00100, 0b101010, 0, + (outs GPR32:$B), (ins AuxReg:$C), + "lr\t$B, [$C]", []>; + +def: Pat<(i32 readcyclecounter), (LR_rs12 0x21) >; // read timer + // --------------------------------------------------------------------------- // Instruction definitions and patterns for 3 operand binary instructions. // --------------------------------------------------------------------------- diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp --- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp +++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp @@ -107,6 +107,12 @@ static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t, const void *); +static DecodeStatus DecodeSOPwithRS12(MCInst &, uint64_t, uint64_t, + const void *); + +static DecodeStatus DecodeSOPwithRU6(MCInst &, uint64_t, uint64_t, + const void *); + static DecodeStatus DecodeCCRU6Instruction(MCInst &, uint64_t, uint64_t, const void *); @@ -311,6 +317,29 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn, + uint64_t Address, const void *Decoder) { + unsigned DstB = decodeBField(Insn); + DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder); + using Field = decltype(Insn); + Field U6 = fieldFromInstruction(Insn, 6, 6); + Inst.addOperand(MCOperand::createImm(U6)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSOPwithRS12(MCInst &Inst, uint64_t Insn, + uint64_t Address, const void *Decoder) { + unsigned DstB = decodeBField(Insn); + DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder); + using Field = decltype(Insn); + Field Lower = fieldFromInstruction(Insn, 6, 6); + Field Upper = fieldFromInstruction(Insn, 0, 5); + Field Sign = fieldFromInstruction(Insn, 5, 1) ? -1 : 1; + Field Result = Sign * ((Upper << 6) + Lower); + Inst.addOperand(MCOperand::createImm(Result)); + return MCDisassembler::Success; +} + DecodeStatus ARCDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, diff --git a/llvm/test/CodeGen/ARC/intrinsics.ll b/llvm/test/CodeGen/ARC/intrinsics.ll --- a/llvm/test/CodeGen/ARC/intrinsics.ll +++ b/llvm/test/CodeGen/ARC/intrinsics.ll @@ -4,20 +4,29 @@ declare i32 @llvm.ctlz.i32(i32, i1) declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.readcyclecounter() -; CHECK-LABEL: clz32: +; CHECK-LABEL: test_ctlz_i32: ; CHECK: fls.f %r0, %r0 ; CHECK-NEXT: mov.eq %r0, 32 ; CHECK-NEXT: rsub.ne %r0, %r0, 31 -define i32 @clz32(i32 %x) { +define i32 @test_ctlz_i32(i32 %x) { %a = call i32 @llvm.ctlz.i32(i32 %x, i1 false) ret i32 %a } -; CHECK-LABEL: ctz32: +; CHECK-LABEL: test_cttz_i32: ; CHECK: ffs.f %r0, %r0 ; CHECK-NEXT: mov.eq %r0, 32 -define i32 @ctz32(i32 %x) { +define i32 @test_cttz_i32(i32 %x) { %a = call i32 @llvm.cttz.i32(i32 %x, i1 false) ret i32 %a } + +; CHECK-LABEL: test_readcyclecounter: +; CHECK: lr %r0, [33] +; CHECK-NEXT: mov %r1, 0 +define i64 @test_readcyclecounter() nounwind { + %a = call i64 @llvm.readcyclecounter() + ret i64 %a +} diff --git a/llvm/test/MC/Disassembler/ARC/ldst.txt b/llvm/test/MC/Disassembler/ARC/ldst.txt --- a/llvm/test/MC/Disassembler/ARC/ldst.txt +++ b/llvm/test/MC/Disassembler/ARC/ldst.txt @@ -92,3 +92,35 @@ # CHECK: stb.di.ab %r0, [%r9,64] 0x40 0x19 0x32 0x10 + +# LR instructions with a U6 immediate bit pattern +# ([33] maps to the [%count0] auxilary register) + +# CHECK: lr %r0, [33] +0x6a 0x20 0x40 0x08 + +# CHECK: lr %r7, [33] +0x6a 0x27 0x40 0x08 + +# CHECK: lr %r15, [33] +0x6a 0x27 0x40 0x18 + +# CHECK: lr %r22, [33] +0x6a 0x26 0x40 0x28 + +# LR instructions with an S12 immediate bit pattern + +# CHECK: lr %r0, [33] +0xaa 0x20 0x40 0x08 + +# The following don't necessarily map to real auxilary registers, but +# the different range of numbers helps exercise the S12 decoder. + +# CHECK: lr %r0, [-33] +0xaa 0x20 0x60 0x08 + +# CHECK: lr %r0, [97] +0xaa 0x20 0x41 0x08 + +# CHECK: lr %r0, [-97] +0xaa 0x20 0x61 0x08