Skip to content

Commit c195651

Browse files
yonghong-songAlexei Starovoitov
authored andcommitted
bpf: add bpf_get_stack helper
Currently, stackmap and bpf_get_stackid helper are provided for bpf program to get the stack trace. This approach has a limitation though. If two stack traces have the same hash, only one will get stored in the stackmap table, so some stack traces are missing from user perspective. This patch implements a new helper, bpf_get_stack, will send stack traces directly to bpf program. The bpf program is able to see all stack traces, and then can do in-kernel processing or send stack traces to user space through shared map or bpf_perf_event_output. Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Yonghong Song <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 5f41263 commit c195651

File tree

7 files changed

+183
-4
lines changed

7 files changed

+183
-4
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
692692
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
693693
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
694694
extern const struct bpf_func_proto bpf_get_stackid_proto;
695+
extern const struct bpf_func_proto bpf_get_stack_proto;
695696
extern const struct bpf_func_proto bpf_sock_map_update_proto;
696697

697698
/* Shared helpers among cBPF and eBPF. */

include/linux/filter.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ struct bpf_prog {
468468
dst_needed:1, /* Do we need dst entry? */
469469
blinded:1, /* Was blinded */
470470
is_func:1, /* program is a bpf function */
471-
kprobe_override:1; /* Do we override a kprobe? */
471+
kprobe_override:1, /* Do we override a kprobe? */
472+
has_callchain_buf:1; /* callchain buffer allocated? */
472473
enum bpf_prog_type type; /* Type of BPF program */
473474
enum bpf_attach_type expected_attach_type; /* For some prog types */
474475
u32 len; /* Number of filter blocks */

include/uapi/linux/bpf.h

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,6 +1767,40 @@ union bpf_attr {
17671767
* **CONFIG_XFRM** configuration option.
17681768
* Return
17691769
* 0 on success, or a negative error in case of failure.
1770+
*
1771+
* int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
1772+
* Description
1773+
* Return a user or a kernel stack in bpf program provided buffer.
1774+
* To achieve this, the helper needs *ctx*, which is a pointer
1775+
* to the context on which the tracing program is executed.
1776+
* To store the stacktrace, the bpf program provides *buf* with
1777+
* a nonnegative *size*.
1778+
*
1779+
* The last argument, *flags*, holds the number of stack frames to
1780+
* skip (from 0 to 255), masked with
1781+
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
1782+
* the following flags:
1783+
*
1784+
* **BPF_F_USER_STACK**
1785+
* Collect a user space stack instead of a kernel stack.
1786+
* **BPF_F_USER_BUILD_ID**
1787+
* Collect buildid+offset instead of ips for user stack,
1788+
* only valid if **BPF_F_USER_STACK** is also specified.
1789+
*
1790+
* **bpf_get_stack**\ () can collect up to
1791+
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
1792+
* to sufficient large buffer size. Note that
1793+
* this limit can be controlled with the **sysctl** program, and
1794+
* that it should be manually increased in order to profile long
1795+
* user stacks (such as stacks for Java programs). To do so, use:
1796+
*
1797+
* ::
1798+
*
1799+
* # sysctl kernel.perf_event_max_stack=<new value>
1800+
*
1801+
* Return
1802+
* a non-negative value equal to or less than size on success, or
1803+
* a negative error in case of failure.
17701804
*/
17711805
#define __BPF_FUNC_MAPPER(FN) \
17721806
FN(unspec), \
@@ -1835,7 +1869,8 @@ union bpf_attr {
18351869
FN(msg_pull_data), \
18361870
FN(bind), \
18371871
FN(xdp_adjust_tail), \
1838-
FN(skb_get_xfrm_state),
1872+
FN(skb_get_xfrm_state), \
1873+
FN(get_stack),
18391874

18401875
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
18411876
* function eBPF program intends to call
@@ -1869,11 +1904,14 @@ enum bpf_func_id {
18691904
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
18701905
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
18711906

1872-
/* BPF_FUNC_get_stackid flags. */
1907+
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
18731908
#define BPF_F_SKIP_FIELD_MASK 0xffULL
18741909
#define BPF_F_USER_STACK (1ULL << 8)
1910+
/* flags used by BPF_FUNC_get_stackid only. */
18751911
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
18761912
#define BPF_F_REUSE_STACKID (1ULL << 10)
1913+
/* flags used by BPF_FUNC_get_stack only. */
1914+
#define BPF_F_USER_BUILD_ID (1ULL << 11)
18771915

18781916
/* BPF_FUNC_skb_set_tunnel_key flags. */
18791917
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)

kernel/bpf/core.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <linux/rbtree_latch.h>
3232
#include <linux/kallsyms.h>
3333
#include <linux/rcupdate.h>
34+
#include <linux/perf_event.h>
3435

3536
#include <asm/unaligned.h>
3637

@@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
17221723
aux = container_of(work, struct bpf_prog_aux, work);
17231724
if (bpf_prog_is_dev_bound(aux))
17241725
bpf_prog_offload_destroy(aux->prog);
1726+
#ifdef CONFIG_PERF_EVENTS
1727+
if (aux->prog->has_callchain_buf)
1728+
put_callchain_buffers();
1729+
#endif
17251730
for (i = 0; i < aux->func_cnt; i++)
17261731
bpf_jit_free(aux->func[i]);
17271732
if (aux->func_cnt) {

kernel/bpf/stackmap.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
402402
.arg3_type = ARG_ANYTHING,
403403
};
404404

405+
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
406+
u64, flags)
407+
{
408+
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
409+
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
410+
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
411+
bool user = flags & BPF_F_USER_STACK;
412+
struct perf_callchain_entry *trace;
413+
bool kernel = !user;
414+
int err = -EINVAL;
415+
u64 *ips;
416+
417+
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
418+
BPF_F_USER_BUILD_ID)))
419+
goto clear;
420+
if (kernel && user_build_id)
421+
goto clear;
422+
423+
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
424+
: sizeof(u64);
425+
if (unlikely(size % elem_size))
426+
goto clear;
427+
428+
num_elem = size / elem_size;
429+
if (sysctl_perf_event_max_stack < num_elem)
430+
init_nr = 0;
431+
else
432+
init_nr = sysctl_perf_event_max_stack - num_elem;
433+
trace = get_perf_callchain(regs, init_nr, kernel, user,
434+
sysctl_perf_event_max_stack, false, false);
435+
if (unlikely(!trace))
436+
goto err_fault;
437+
438+
trace_nr = trace->nr - init_nr;
439+
if (trace_nr < skip)
440+
goto err_fault;
441+
442+
trace_nr -= skip;
443+
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
444+
copy_len = trace_nr * elem_size;
445+
ips = trace->ip + skip + init_nr;
446+
if (user && user_build_id)
447+
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
448+
else
449+
memcpy(buf, ips, copy_len);
450+
451+
if (size > copy_len)
452+
memset(buf + copy_len, 0, size - copy_len);
453+
return copy_len;
454+
455+
err_fault:
456+
err = -EFAULT;
457+
clear:
458+
memset(buf, 0, size);
459+
return err;
460+
}
461+
462+
const struct bpf_func_proto bpf_get_stack_proto = {
463+
.func = bpf_get_stack,
464+
.gpl_only = true,
465+
.ret_type = RET_INTEGER,
466+
.arg1_type = ARG_PTR_TO_CTX,
467+
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
468+
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
469+
.arg4_type = ARG_ANYTHING,
470+
};
471+
405472
/* Called from eBPF program */
406473
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
407474
{

kernel/bpf/verifier.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/stringify.h>
2323
#include <linux/bsearch.h>
2424
#include <linux/sort.h>
25+
#include <linux/perf_event.h>
2526

2627
#include "disasm.h"
2728

@@ -2450,6 +2451,24 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
24502451
if (err)
24512452
return err;
24522453

2454+
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
2455+
const char *err_str;
2456+
2457+
#ifdef CONFIG_PERF_EVENTS
2458+
err = get_callchain_buffers(sysctl_perf_event_max_stack);
2459+
err_str = "cannot get callchain buffer for func %s#%d\n";
2460+
#else
2461+
err = -ENOTSUPP;
2462+
err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
2463+
#endif
2464+
if (err) {
2465+
verbose(env, err_str, func_id_name(func_id), func_id);
2466+
return err;
2467+
}
2468+
2469+
env->prog->has_callchain_buf = true;
2470+
}
2471+
24532472
if (changes_data)
24542473
clear_all_pkt_pointers(env);
24552474
return 0;

kernel/trace/bpf_trace.c

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "trace.h"
2121

2222
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
23+
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
2324

2425
/**
2526
* trace_call_bpf - invoke BPF program
@@ -577,6 +578,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
577578
return &bpf_perf_event_output_proto;
578579
case BPF_FUNC_get_stackid:
579580
return &bpf_get_stackid_proto;
581+
case BPF_FUNC_get_stack:
582+
return &bpf_get_stack_proto;
580583
case BPF_FUNC_perf_event_read_value:
581584
return &bpf_perf_event_read_value_proto;
582585
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
@@ -664,6 +667,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
664667
.arg3_type = ARG_ANYTHING,
665668
};
666669

670+
BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
671+
u64, flags)
672+
{
673+
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
674+
675+
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
676+
(unsigned long) size, flags, 0);
677+
}
678+
679+
static const struct bpf_func_proto bpf_get_stack_proto_tp = {
680+
.func = bpf_get_stack_tp,
681+
.gpl_only = true,
682+
.ret_type = RET_INTEGER,
683+
.arg1_type = ARG_PTR_TO_CTX,
684+
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
685+
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
686+
.arg4_type = ARG_ANYTHING,
687+
};
688+
667689
static const struct bpf_func_proto *
668690
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
669691
{
@@ -672,6 +694,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
672694
return &bpf_perf_event_output_proto_tp;
673695
case BPF_FUNC_get_stackid:
674696
return &bpf_get_stackid_proto_tp;
697+
case BPF_FUNC_get_stack:
698+
return &bpf_get_stack_proto_tp;
675699
default:
676700
return tracing_func_proto(func_id, prog);
677701
}
@@ -734,6 +758,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
734758
return &bpf_perf_event_output_proto_tp;
735759
case BPF_FUNC_get_stackid:
736760
return &bpf_get_stackid_proto_tp;
761+
case BPF_FUNC_get_stack:
762+
return &bpf_get_stack_proto_tp;
737763
case BPF_FUNC_perf_prog_read_value:
738764
return &bpf_perf_prog_read_value_proto;
739765
default:
@@ -744,7 +770,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
744770
/*
745771
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
746772
* to avoid potential recursive reuse issue when/if tracepoints are added
747-
* inside bpf_*_event_output and/or bpf_get_stack_id
773+
* inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
748774
*/
749775
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
750776
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
@@ -787,6 +813,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
787813
.arg3_type = ARG_ANYTHING,
788814
};
789815

816+
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
817+
void *, buf, u32, size, u64, flags)
818+
{
819+
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
820+
821+
perf_fetch_caller_regs(regs);
822+
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
823+
(unsigned long) size, flags, 0);
824+
}
825+
826+
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
827+
.func = bpf_get_stack_raw_tp,
828+
.gpl_only = true,
829+
.ret_type = RET_INTEGER,
830+
.arg1_type = ARG_PTR_TO_CTX,
831+
.arg2_type = ARG_PTR_TO_MEM,
832+
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
833+
.arg4_type = ARG_ANYTHING,
834+
};
835+
790836
static const struct bpf_func_proto *
791837
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
792838
{
@@ -795,6 +841,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
795841
return &bpf_perf_event_output_proto_raw_tp;
796842
case BPF_FUNC_get_stackid:
797843
return &bpf_get_stackid_proto_raw_tp;
844+
case BPF_FUNC_get_stack:
845+
return &bpf_get_stack_proto_raw_tp;
798846
default:
799847
return tracing_func_proto(func_id, prog);
800848
}

0 commit comments

Comments
 (0)