diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -59,6 +59,7 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">; def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">; def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">; +def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -75,6 +76,12 @@ [{return isInt<32>(N->getSExtValue()); }]>; def i32immSExt32 : PatLeaf<(i32 imm), [{return isInt<32>(N->getSExtValue()); }]>; +def i64immZExt32 : PatLeaf<(i64 imm), + [{return isUInt<32>(N->getZExtValue()); }]>; + +def imm_to_i64 : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; // Addressing modes. def ADDRri : ComplexPattern; @@ -449,7 +456,7 @@ } class STOREi64 - : STORE; + : STORE; let Predicates = [BPFNoALU32] in { def STW : STOREi64; @@ -458,6 +465,50 @@ } def STD : STOREi64; +class STORE_imm + : TYPE_LD_ST { + bits<20> addr; + bits<32> imm; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{47-32} = addr{15-0}; // offset + let Inst{31-0} = imm; + let BPFClass = BPF_ST; +} + +let Predicates = [BPFHasStoreImm] in { + // Opcode (BPF_ST | BPF_MEM | BPF_DW) implies sign extension for + // value stored to memory: + // - it is fine to generate such write when immediate is -1 + // - it is incorrect to generate such write when immediate is + // +0xffff_ffff. + // + // In the latter case two instructions would be generated instead of + // one BPF_ST: + // rA = 0xffffffff ll ; LD_imm64 + // *(u64 *)(rB + 0) = rA ; STX + // + // For BPF_{B,H,W} the size of value stored matches size of the immediate. + def STD_imm : STORE_imm; + def STW_imm : STORE_imm; + def STH_imm : STORE_imm; + def STB_imm : STORE_imm; +} + +let Predicates = [BPFHasALU32, BPFHasStoreImm] in { + def : Pat<(store (i32 imm:$src), ADDRri:$dst), + (STW_imm (imm_to_i64 $src), ADDRri:$dst)>; + def : Pat<(truncstorei16 (i32 imm:$src), ADDRri:$dst), + (STH_imm (imm_to_i64 imm:$src), ADDRri:$dst)>; + def : Pat<(truncstorei8 (i32 imm:$src), ADDRri:$dst), + (STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>; +} + // LOAD instructions class LOAD Pattern> : TYPE_LD_ST - : STORE32; + : STORE32; let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { def STW32 : STOREi32; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -93,6 +93,11 @@ LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } +static bool isST(unsigned Opcode) { + return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm || + Opcode == BPF::STW_imm || Opcode == BPF::STD_imm; +} + static bool isSTX32(unsigned Opcode) { return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32; } @@ -141,7 +146,7 @@ COREOp = BPF::CORE_LD64; else if (isLDX32(Opcode)) COREOp = BPF::CORE_LD32; - else if (isSTX64(Opcode) || isSTX32(Opcode)) + else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode)) COREOp = BPF::CORE_ST; else continue; diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -57,7 +57,7 @@ bool UseDwarfRIS; // whether cpu v4 insns are enabled. - bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol; + bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm; public: // This constructor initializes the data members to match that @@ -79,6 +79,7 @@ bool hasBswap() const { return HasBswap; } bool hasSdivSmod() const { return HasSdivSmod; } bool hasGotol() const { return HasGotol; } + bool hasStoreImm() const { return HasStoreImm; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -33,6 +33,9 @@ cl::init(false), cl::desc("Disable sdiv/smod insns")); static cl::opt Disable_gotol("disable-gotol", cl::Hidden, cl::init(false), cl::desc("Disable gotol insn")); +static cl::opt + Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false), + cl::desc("Disable BPF_ST (immediate store) insn")); void BPFSubtarget::anchor() {} @@ -54,6 +57,7 @@ HasBswap = false; HasSdivSmod = false; HasGotol = false; + HasStoreImm = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -80,6 +84,7 @@ HasBswap = !Disable_bswap; HasSdivSmod = !Disable_sdiv_smod; HasGotol = !Disable_gotol; + HasStoreImm = !Disable_StoreImm; return; } } diff --git a/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll b/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll @@ -0,0 +1,156 @@ +; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s + +; Make sure that CO-RE relocations had been generated correctly for +; BPF_ST (store immediate) instructions and that +; BPFMISimplifyPatchable optimizations had been applied. +; +; Generated from the following source code: +; +; #define __pai __attribute__((preserve_access_index)) +; +; struct foo { +; unsigned char b; +; unsigned short h; +; unsigned int w; +; unsigned long d; +; } __pai; +; +; void bar(volatile struct foo *p) { +; p->b = 1; +; p->h = 2; +; p->w = 3; +; p->d = 4; +; } +; +; Using the following command: +; +; clang -g -O2 -S -emit-llvm -mcpu=v4 --target=bpfel test.c + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" + +@"llvm.foo:0:0$0:0" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:2$0:1" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:4$0:2" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:8$0:3" = external global i64, !llvm.preserve.access.index !0 #0 + +; Function Attrs: nofree nounwind +define dso_local void @bar(ptr noundef %p) local_unnamed_addr #1 !dbg !18 { +entry: + call void @llvm.dbg.value(metadata ptr %p, metadata !24, metadata !DIExpression()), !dbg !25 + %0 = load i64, ptr @"llvm.foo:0:0$0:0", align 8 + %1 = getelementptr i8, ptr %p, i64 %0 + %2 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %1) + store volatile i8 1, ptr %2, align 8, !dbg !26, !tbaa !27 + %3 = load i64, ptr @"llvm.foo:0:2$0:1", align 8 + %4 = getelementptr i8, ptr %p, i64 %3 + %5 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 1, ptr %4) + store volatile i16 2, ptr %5, align 2, !dbg !34, !tbaa !35 + %6 = load i64, ptr @"llvm.foo:0:4$0:2", align 8 + %7 = getelementptr i8, ptr %p, i64 %6 + %8 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %7) + store volatile i32 3, ptr %8, align 4, !dbg !36, !tbaa !37 + %9 = load i64, ptr @"llvm.foo:0:8$0:3", align 8 + %10 = getelementptr i8, ptr %p, i64 %9 + %11 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %10) + store volatile i64 4, ptr %11, align 8, !dbg !38, !tbaa !39 + ret void, !dbg !40 +} + +; CHECK: [[L0:.Ltmp.*]]: +; CHECK: *(u8 *)(r1 + 0) = 1 +; CHECK: [[L2:.Ltmp.*]]: +; CHECK: *(u16 *)(r1 + 2) = 2 +; CHECK: [[L4:.Ltmp.*]]: +; CHECK: *(u32 *)(r1 + 4) = 3 +; CHECK: [[L6:.Ltmp.*]]: +; CHECK: *(u64 *)(r1 + 8) = 4 + +; CHECK: .section .BTF +; ... +; CHECK: .long [[FOO:.*]] # BTF_KIND_STRUCT(id = [[FOO_ID:.*]]) +; ... +; CHECK: .ascii "foo" # string offset=[[FOO]] +; CHECK: .ascii ".text" # string offset=[[TEXT:.*]] +; CHECK: .ascii "0:0" # string offset=[[S1:.*]] +; CHECK: .ascii "0:1" # string offset=[[S2:.*]] +; CHECK: .ascii "0:2" # string offset=[[S3:.*]] +; CHECK: .ascii "0:3" # string offset=[[S4:.*]] + +; CHECK: .section .BTF.ext +; ... +; CHECK: .long [[#]] # FieldReloc +; CHECK-NEXT: .long [[TEXT]] # Field reloc section string offset=[[TEXT]] +; CHECK-NEXT: .long [[#]] +; CHECK-NEXT: .long [[L0]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S1]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L2]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S2]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L4]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S3]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L6]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S4]] +; CHECK-NEXT: .long 0 + +; Function Attrs: nofree nosync nounwind memory(none) +declare ptr @llvm.bpf.passthrough.p0.p0(i32, ptr) #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #3 + +attributes #0 = { "btf_ama" } +attributes #1 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" } +attributes #2 = { nofree nosync nounwind memory(none) } +attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!11} +!llvm.module.flags = !{!12, !13, !14, !15, !16} +!llvm.ident = !{!17} + +!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 3, size: 128, elements: !2) +!1 = !DIFile(filename: "some-file.c", directory: "/some/dir", checksumkind: CSK_MD5, checksum: "e5d03b4d39dfffadc6c607e956c37996") +!2 = !{!3, !5, !7, !9} +!3 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !0, file: !1, line: 4, baseType: !4, size: 8) +!4 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!5 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !0, file: !1, line: 5, baseType: !6, size: 16, offset: 16) +!6 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !0, file: !1, line: 6, baseType: !8, size: 32, offset: 32) +!8 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!9 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !0, file: !1, line: 7, baseType: !10, size: 64, offset: 64) +!10 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned) +!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.0.0 ...", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!12 = !{i32 7, !"Dwarf Version", i32 5} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = !{i32 1, !"wchar_size", i32 4} +!15 = !{i32 7, !"frame-pointer", i32 2} +!16 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!17 = !{!"clang version 18.0.0 ..."} +!18 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !11, retainedNodes: !23) +!19 = !DISubroutineType(types: !20) +!20 = !{null, !21} +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64) +!22 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !0) +!23 = !{!24} +!24 = !DILocalVariable(name: "p", arg: 1, scope: !18, file: !1, line: 10, type: !21) +!25 = !DILocation(line: 0, scope: !18) +!26 = !DILocation(line: 11, column: 8, scope: !18) +!27 = !{!28, !29, i64 0} +!28 = !{!"foo", !29, i64 0, !31, i64 2, !32, i64 4, !33, i64 8} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !{!"short", !29, i64 0} +!32 = !{!"int", !29, i64 0} +!33 = !{!"long", !29, i64 0} +!34 = !DILocation(line: 12, column: 8, scope: !18) +!35 = !{!28, !31, i64 2} +!36 = !DILocation(line: 13, column: 8, scope: !18) +!37 = !{!28, !32, i64 4} +!38 = !DILocation(line: 14, column: 8, scope: !18) +!39 = !{!28, !33, i64 8} +!40 = !DILocation(line: 15, column: 1, scope: !18) diff --git a/llvm/test/CodeGen/BPF/store_imm.ll b/llvm/test/CodeGen/BPF/store_imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/store_imm.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=bpfel -mcpu=v4 -show-mc-encoding | FileCheck %s + +target triple = "bpf" + +define void @byte(ptr %p0) { +; CHECK-LABEL: byte: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u8 *)(r1 + 0) = 1 # encoding: [0x72,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u8 *)(r1 + 1) = 255 # encoding: [0x72,0x01,0x01,0x00,0xff,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 1 + + store volatile i8 1, ptr %p0, align 1 + store volatile i8 -1, ptr %p1, align 1 + + unreachable +} + +define void @half(ptr, ptr %p0) { +; CHECK-LABEL: half: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u16 *)(r2 + 0) = 1 # encoding: [0x6a,0x02,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r2 + 2) = 65535 # encoding: [0x6a,0x02,0x02,0x00,0xff,0xff,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 2 + + store volatile i16 1, ptr %p0, align 2 + store volatile i16 -1, ptr %p1, align 2 + + unreachable +} + +define void @word(ptr, ptr, ptr %p0) { +; CHECK-LABEL: word: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u32 *)(r3 + 0) = 1 # encoding: [0x62,0x03,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u32 *)(r3 + 4) = -1 # encoding: [0x62,0x03,0x04,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r3 + 8) = -2000000000 # encoding: [0x62,0x03,0x08,0x00,0x00,0x6c,0xca,0x88] +; CHECK-NEXT: *(u32 *)(r3 + 12) = -1 # encoding: [0x62,0x03,0x0c,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r3 + 12) = 0 # encoding: [0x62,0x03,0x0c,0x00,0x00,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 4 + %p2 = getelementptr i8, ptr %p0, i32 8 + %p3 = getelementptr i8, ptr %p0, i32 12 + + store volatile i32 1, ptr %p0, align 4 + store volatile i32 -1, ptr %p1, align 4 + store volatile i32 -2000000000, ptr %p2, align 4 + store volatile i32 4294967295, ptr %p3, align 4 + store volatile i32 4294967296, ptr %p3, align 4 + + unreachable +} + +define void @dword(ptr, ptr, ptr, ptr %p0) { +; CHECK-LABEL: dword: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u64 *)(r4 + 0) = 1 # encoding: [0x7a,0x04,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u64 *)(r4 + 8) = -1 # encoding: [0x7a,0x04,0x08,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u64 *)(r4 + 16) = 2000000000 # encoding: [0x7a,0x04,0x10,0x00,0x00,0x94,0x35,0x77] +; CHECK-NEXT: *(u64 *)(r4 + 16) = -2000000000 # encoding: [0x7a,0x04,0x10,0x00,0x00,0x6c,0xca,0x88] +; CHECK-NEXT: r1 = 4294967295 ll # encoding: [0x18,0x01,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: *(u64 *)(r4 + 24) = r1 # encoding: [0x7b,0x14,0x18,0x00,0x00,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 8 + %p2 = getelementptr i8, ptr %p0, i32 16 + %p3 = getelementptr i8, ptr %p0, i32 24 + + store volatile i64 1, ptr %p0, align 8 + store volatile i64 -1, ptr %p1, align 8 + store volatile i64 2000000000, ptr %p2, align 8 + store volatile i64 -2000000000, ptr %p2, align 8 + store volatile i64 4294967295, ptr %p3, align 8 + + unreachable +} + +define void @unaligned(ptr %p0) { +; CHECK-LABEL: unaligned: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u8 *)(r1 + 1) = 255 # encoding: [0x72,0x01,0x01,0x00,0xff,0x00,0x00,0x00] +; CHECK-NEXT: *(u8 *)(r1 + 0) = 254 # encoding: [0x72,0x01,0x00,0x00,0xfe,0x00,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r1 + 10) = 65535 # encoding: [0x6a,0x01,0x0a,0x00,0xff,0xff,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r1 + 8) = 65534 # encoding: [0x6a,0x01,0x08,0x00,0xfe,0xff,0x00,0x00] +; CHECK-NEXT: *(u32 *)(r1 + 20) = -1 # encoding: [0x62,0x01,0x14,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r1 + 16) = -2 # encoding: [0x62,0x01,0x10,0x00,0xfe,0xff,0xff,0xff] + %p1 = getelementptr i8, ptr %p0, i32 8 + %p2 = getelementptr i8, ptr %p0, i32 16 + + store volatile i16 -2, ptr %p0, align 1 + store volatile i32 -2, ptr %p1, align 2 + store volatile i64 -2, ptr %p2, align 4 + + unreachable +} + +define void @inline_asm(ptr %p0) { +; CHECK-LABEL: inline_asm: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: *(u32 *)(r0 + 42) = 7 # encoding: [0x62,0x00,0x2a,0x00,0x07,0x00,0x00,0x00] +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP + call void asm "*(u32 *)(r0 + 42) = 7;", "~{r0},~{mem}"() + + unreachable +}