diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -32,7 +32,7 @@ } static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2", - "v3", "probe"}; + "v3", "v4", "probe"}; bool BPFTargetInfo::isValidCPUName(StringRef Name) const { return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -135,7 +135,7 @@ // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF // BPF: error: unknown target CPU 'not-a-cpu' -// BPF: note: valid target CPU values are: generic, v1, v2, v3, probe +// BPF: note: valid target CPU values are: generic, v1, v2, v3, v4, probe // RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR // AVR: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -21,6 +21,7 @@ def : Proc<"v1", []>; def : Proc<"v2", []>; def : Proc<"v3", []>; +def : Proc<"v4", []>; def : Proc<"probe", []>; def DummyFeature : SubtargetFeature<"dummy", "isDummyMode", diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td --- a/llvm/lib/Target/BPF/BPFInstrFormats.td +++ b/llvm/lib/Target/BPF/BPFInstrFormats.td @@ -44,6 +44,9 @@ def BPF_ARSH : BPFArithOp<0xc>; def BPF_END : BPFArithOp<0xd>; +def BPF_XCHG : BPFArithOp<0xe>; +def BPF_CMPXCHG : BPFArithOp<0xf>; + class BPFEndDir val> { bits<1> Value = val; } @@ -86,7 +89,13 @@ def BPF_ABS : BPFModeModifer<0x1>; def BPF_IND : BPFModeModifer<0x2>; def BPF_MEM : BPFModeModifer<0x3>; -def BPF_XADD : BPFModeModifer<0x6>; +def BPF_ATOMIC : BPFModeModifer<0x6>; + +class BPFAtomicFlag val> { + bits<4> Value = val; +} + +def BPF_FETCH : BPFAtomicFlag<0x1>; class InstBPF pattern> : Instruction { diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -53,6 +53,8 @@ def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">; +def BPFHasAtomicExt : Predicate<"Subtarget->getHasAtomicExt()">; +def BPFNoAtomicExt : Predicate<"!Subtarget->getHasAtomicExt()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -617,9 +619,9 @@ def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>; } -// Atomics +// Atomic XADD class XADD - : TYPE_LD_ST - : TYPE_LD_ST; } - let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { + let Predicates = [BPFNoAtomicExt, BPFHasALU32], DecoderNamespace = "BPFALU32" in { def XADDW32 : XADD32; } - def XADDD : XADD; + let Predicates = [BPFNoAtomicExt] in { + def XADDD : XADD; + } +} + +// Atomic Fetch-and-Op operations +class XFALU64 + : TYPE_LD_ST { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class XFALU32 + : TYPE_LD_ST { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +let Constraints = "$dst = $val" in { + let Predicates = [BPFHasAtomicExt], DecoderNamespace = "BPFALU32" in { + def XFADDW8 : XFALU32; + def XFADDW16 : XFALU32; + def XFADDW32 : XFALU32; + def XFSUBW8 : XFALU32; + def XFSUBW16 : XFALU32; + def XFSUBW32 : XFALU32; + } + + let Predicates = [BPFHasAtomicExt] in { + def XFADDD : XFALU64; + def XFSUBD : XFALU64; + } +} + +// Atomic Exchange +class XCHG + : TYPE_LD_ST { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_XCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class XCHG32 + : TYPE_LD_ST { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_XCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +let Constraints = "$dst = $val" in { + let Predicates = [BPFHasAtomicExt], DecoderNamespace = "BPFALU32" in { + def XCHGB32 : XCHG32; + def XCHGH32 : XCHG32; + def XCHGW32 : XCHG32; + } + + let Predicates = [BPFHasAtomicExt] in { + def XCHGD : XCHG; + } +} + +// Compare-And-Exchange +class CMPXCHG + : TYPE_LD_ST { + bits<4> dst; + bits<4> new; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{11-8} = new; + let Inst{7-4} = BPF_CMPXCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class CMPXCHG32 + : TYPE_LD_ST { + bits<4> dst; + bits<4> new; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{11-8} = new; + let Inst{7-4} = BPF_CMPXCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +let Predicates = [BPFHasAtomicExt], Uses = [W0], DecoderNamespace = "BPFALU32" in { + def CMPXCHGW32 : CMPXCHG32; +} + +let Predicates = [BPFHasAtomicExt], Uses = [R0] in { + def CMPXCHGD : CMPXCHG; } // bswap16, bswap32, bswap64 diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -57,6 +57,9 @@ // whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections bool UseDwarfRIS; + // whether the cpu supports extended atomic operations. + bool HasAtomicExt; + public: // This constructor initializes the data members to match that // of the specified triple. @@ -72,6 +75,7 @@ bool getHasJmp32() const { return HasJmp32; } bool getHasAlu32() const { return HasAlu32; } bool getUseDwarfRIS() const { return UseDwarfRIS; } + bool getHasAtomicExt() const { return HasAtomicExt; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -38,6 +38,7 @@ HasJmp32 = false; HasAlu32 = false; UseDwarfRIS = false; + HasAtomicExt = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -55,6 +56,13 @@ HasAlu32 = true; return; } + if (CPU == "v4") { + HasJmpExt = true; + HasJmp32 = true; + HasAlu32 = true; + HasAtomicExt = true; + return; + } } BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp --- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -58,7 +58,7 @@ BPF_MEM = 0x3, BPF_LEN = 0x4, BPF_MSH = 0x5, - BPF_XADD = 0x6 + BPF_ATOMIC = 0x6 }; BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) @@ -176,7 +176,7 @@ uint8_t InstMode = getInstMode(Insn); if ((InstClass == BPF_LDX || InstClass == BPF_STX) && getInstSize(Insn) != BPF_DW && - (InstMode == BPF_MEM || InstMode == BPF_XADD) && + (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) && STI.getFeatureBits()[BPF::ALU32]) Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address, this, STI); diff --git a/llvm/test/CodeGen/BPF/atomics.ll b/llvm/test/CodeGen/BPF/atomics.ll --- a/llvm/test/CodeGen/BPF/atomics.ll +++ b/llvm/test/CodeGen/BPF/atomics.ll @@ -1,8 +1,11 @@ ; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s +; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck --check-prefix=CHECK-V4 %s ; CHECK-LABEL: test_load_add_32 ; CHECK: lock *(u32 *)(r1 + 0) += r2 ; CHECK: encoding: [0xc3,0x21 +; CHECK-V4: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-V4: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00] define void @test_load_add_32(i32* %p, i32 zeroext %v) { entry: atomicrmw add i32* %p, i32 %v seq_cst @@ -12,6 +15,8 @@ ; CHECK-LABEL: test_load_add_64 ; CHECK: lock *(u64 *)(r1 + 0) += r2 ; CHECK: encoding: [0xdb,0x21 +; CHECK-V4: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-V4: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00] define void @test_load_add_64(i64* %p, i64 zeroext %v) { entry: atomicrmw add i64* %p, i64 %v seq_cst diff --git a/llvm/test/CodeGen/BPF/atomics_2.ll b/llvm/test/CodeGen/BPF/atomics_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_2.ll @@ -0,0 +1,139 @@ +; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s +; +; Source: +; char test_load_sub_8(char *p, char v) { +; return __sync_fetch_and_sub(p, v); +; } +; short test_load_sub_16(short *p, short v) { +; return __sync_fetch_and_sub(p, v); +; } +; int test_load_sub_32(int *p, int v) { +; return __sync_fetch_and_sub(p, v); +; } +; int test_load_sub_64(long *p, long v) { +; return __sync_fetch_and_sub(p, v); +; } +; int test_xchg_8(char *p, char v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_xchg_16(short *p, short v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_xchg_32(int *p, int v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_xchg_64(long *p, long v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_cas_32(int *p, int old, int new) { +; return __sync_val_compare_and_swap(p, old, new); +; } +; long test_cas_64(long *p, long old, long new) { +; return __sync_val_compare_and_swap(p, old, new); +; } + +; CHECK-LABEL: test_load_sub_8 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_sub((u8 *)(r1 + 0), w0) +; CHECK: encoding: [0xd3,0x01,0x00,0x00,0x11,0x00,0x00,0x00] +define dso_local signext i8 @test_load_sub_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw sub i8* %p, i8 %v seq_cst + ret i8 %0 +} + +; CHECK-LABEL: test_load_sub_16 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_sub((u16 *)(r1 + 0), w0) +; CHECK: encoding: [0xcb,0x01,0x00,0x00,0x11,0x00,0x00,0x00] +define dso_local signext i16 @test_load_sub_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr #0 { +entry: + %0 = atomicrmw sub i16* %p, i16 %v seq_cst + ret i16 %0 +} + +; CHECK-LABEL: test_load_sub_32 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00] +define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw sub i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_sub_64 +; CHECK: r0 = r2 +; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00] +define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw sub i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_xchg_8 +; CHECK: w0 = w2 +; CHECK: w0 = xchg32_8(r1 + 0, w0) +; CHECK: encoding: [0xd3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i8* %p, i8 %v seq_cst + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_xchg_16 +; CHECK: w0 = w2 +; CHECK: w0 = xchg32_16(r1 + 0, w0) +; CHECK: encoding: [0xcb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i16* %p, i16 %v seq_cst + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_xchg_32 +; CHECK: w0 = w2 +; CHECK: w0 = xchg32_32(r1 + 0, w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_xchg_64 +; CHECK: r0 = r2 +; CHECK: r0 = xchg_64(r1 + 0, r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_cas_32 +; CHECK: w0 = w2 +; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xf1,0x03,0x00,0x00] +define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr { +entry: + %0 = cmpxchg i32* %p, i32 %old, i32 %new seq_cst seq_cst + %1 = extractvalue { i32, i1 } %0, 0 + ret i32 %1 +} + +; CHECK-LABEL: test_cas_64 +; CHECK: r0 = r2 +; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xf1,0x03,0x00,0x00] +define dso_local i64 @test_cas_64(i64* nocapture %p, i64 %old, i64 %new) local_unnamed_addr { +entry: + %0 = cmpxchg i64* %p, i64 %old, i64 %new seq_cst seq_cst + %1 = extractvalue { i64, i1 } %0, 0 + ret i64 %1 +}