| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Joel Fernandes <joelaf@google.com> |
| Date: Thu, 14 Mar 2019 17:32:39 -0400 |
| Subject: ANDROID: Add a tracepoint for mapping inode to full path |
| |
| This will be used by eBPF and the iorapd project for high speed |
| inode/dev numbers to file path lookup. Look at the inodemap CL for more |
| details about of eBPF and iorapd using the tracepoint. |
| |
| This is planned to be used by the inodemap BPF program. Also, ART folks |
| have been using this tracepoint for debugging "unknown inode numer" |
| issues. |
| |
| The tracepoint will be out of tree, and not sent upstream, since VFS |
| developers don't accept tracepoints strictly. |
| |
| Test: Run "find /" command in emulator and measure completion time |
| with/without treacepoint. find does a flood of lookups which stresses |
| the tracepoint. No performance change observed. |
| Test: eBPF prototypes (wip) successfully read data from the tracepoint. |
| |
| OOT Bug: 139663736 |
| Bug: 135143784 |
| Bug: 137393447 |
| Change-Id: I657f374659673a9c8853530d73c0622dbdbab146 |
| Signed-off-by: Joel Fernandes <joelaf@google.com> |
| --- |
| fs/namei.c | 90 ++++++++++++++++++++++++++++++++++-- |
| include/trace/events/namei.h | 42 +++++++++++++++++ |
| 2 files changed, 129 insertions(+), 3 deletions(-) |
| create mode 100644 include/trace/events/namei.h |
| |
| diff --git a/fs/namei.c b/fs/namei.c |
| index 819aa30eea6e..c55f7019202a 100644 |
| --- a/fs/namei.c |
| +++ b/fs/namei.c |
| @@ -43,6 +43,9 @@ |
| #include "internal.h" |
| #include "mount.h" |
| |
| +#define CREATE_TRACE_POINTS |
| +#include <trace/events/namei.h> |
| + |
| /* [Feb-1997 T. Schoebel-Theuer] |
| * Fundamental changes in the pathname lookup mechanisms (namei) |
| * were necessary because of omirr. The reason is that omirr needs |
| @@ -770,6 +773,81 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags) |
| return 1; |
| } |
| |
| +#define INIT_PATH_SIZE 64 |
| + |
| +static void success_walk_trace(struct nameidata *nd) |
| +{ |
| + struct path *pt = &nd->path; |
| + struct inode *i = nd->inode; |
| + char buf[INIT_PATH_SIZE], *try_buf; |
| + int cur_path_size; |
| + char *p; |
| + |
| + /* When eBPF/ tracepoint is disabled, keep overhead low. */ |
| + if (!trace_inodepath_enabled()) |
| + return; |
| + |
| + /* First try stack allocated buffer. */ |
| + try_buf = buf; |
| + cur_path_size = INIT_PATH_SIZE; |
| + |
| + while (cur_path_size <= PATH_MAX) { |
| + /* Free previous heap allocation if we are now trying |
| + * a second or later heap allocation. |
| + */ |
| + if (try_buf != buf) |
| + kfree(try_buf); |
| + |
| + /* All but the first alloc are on the heap. */ |
| + if (cur_path_size != INIT_PATH_SIZE) { |
| + try_buf = kmalloc(cur_path_size, GFP_KERNEL); |
| + if (!try_buf) { |
| + try_buf = buf; |
| + sprintf(try_buf, "error:buf_alloc_failed"); |
| + break; |
| + } |
| + } |
| + |
| + p = d_path(pt, try_buf, cur_path_size); |
| + |
| + if (!IS_ERR(p)) { |
| + char *end = mangle_path(try_buf, p, "\n"); |
| + |
| + if (end) { |
| + try_buf[end - try_buf] = 0; |
| + break; |
| + } else { |
| + /* On mangle errors, double path size |
| + * till PATH_MAX. |
| + */ |
| + cur_path_size = cur_path_size << 1; |
| + continue; |
| + } |
| + } |
| + |
| + if (PTR_ERR(p) == -ENAMETOOLONG) { |
| + /* If d_path complains that name is too long, |
| + * then double path size till PATH_MAX. |
| + */ |
| + cur_path_size = cur_path_size << 1; |
| + continue; |
| + } |
| + |
| + sprintf(try_buf, "error:d_path_failed_%lu", |
| + -1 * PTR_ERR(p)); |
| + break; |
| + } |
| + |
| + if (cur_path_size > PATH_MAX) |
| + sprintf(try_buf, "error:d_path_name_too_long"); |
| + |
| + trace_inodepath(i, try_buf); |
| + |
| + if (try_buf != buf) |
| + kfree(try_buf); |
| + return; |
| +} |
| + |
| /** |
| * complete_walk - successful completion of path walk |
| * @nd: pointer nameidata |
| @@ -792,15 +870,21 @@ static int complete_walk(struct nameidata *nd) |
| return -ECHILD; |
| } |
| |
| - if (likely(!(nd->flags & LOOKUP_JUMPED))) |
| + if (likely(!(nd->flags & LOOKUP_JUMPED))) { |
| + success_walk_trace(nd); |
| return 0; |
| + } |
| |
| - if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) |
| + if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) { |
| + success_walk_trace(nd); |
| return 0; |
| + } |
| |
| status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); |
| - if (status > 0) |
| + if (status > 0) { |
| + success_walk_trace(nd); |
| return 0; |
| + } |
| |
| if (!status) |
| status = -ESTALE; |
| diff --git a/include/trace/events/namei.h b/include/trace/events/namei.h |
| new file mode 100644 |
| index 000000000000..e8c3e216a0a7 |
| --- /dev/null |
| +++ b/include/trace/events/namei.h |
| @@ -0,0 +1,42 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +#undef TRACE_SYSTEM |
| +#define TRACE_SYSTEM namei |
| + |
| +#if !defined(_TRACE_INODEPATH_H) || defined(TRACE_HEADER_MULTI_READ) |
| +#define _TRACE_INODEPATH_H |
| + |
| +#include <linux/types.h> |
| +#include <linux/tracepoint.h> |
| +#include <linux/mm.h> |
| +#include <linux/memcontrol.h> |
| +#include <linux/device.h> |
| +#include <linux/kdev_t.h> |
| + |
| +TRACE_EVENT(inodepath, |
| + TP_PROTO(struct inode *inode, char *path), |
| + |
| + TP_ARGS(inode, path), |
| + |
| + TP_STRUCT__entry( |
| + /* dev_t and ino_t are arch dependent bit width |
| + * so just use 64-bit |
| + */ |
| + __field(unsigned long, ino) |
| + __field(unsigned long, dev) |
| + __string(path, path) |
| + ), |
| + |
| + TP_fast_assign( |
| + __entry->ino = inode->i_ino; |
| + __entry->dev = inode->i_sb->s_dev; |
| + __assign_str(path, path); |
| + ), |
| + |
| + TP_printk("dev %d:%d ino=%lu path=%s", |
| + MAJOR(__entry->dev), MINOR(__entry->dev), |
| + __entry->ino, __get_str(path)) |
| +); |
| +#endif /* _TRACE_INODEPATH_H */ |
| + |
| +/* This part must be outside protection */ |
| +#include <trace/define_trace.h> |