add threadsnoop
diff --git a/README.md b/README.md
index d54e384..66bac4e 100644
--- a/README.md
+++ b/README.md
@@ -160,6 +160,7 @@
 - tools/[tcpsubnet](tools/tcpsubnet.py): Summarize and aggregate TCP send by subnet. [Examples](tools/tcpsubnet_example.txt).
 - tools/[tcptop](tools/tcptop.py): Summarize TCP send/recv throughput by host. Top for TCP. [Examples](tools/tcptop_example.txt).
 - tools/[tcptracer](tools/tcptracer.py): Trace TCP established connections (connect(), accept(), close()). [Examples](tools/tcptracer_example.txt).
+- tools/[threadsnoop](tools/threadsnoop.py): List new thread creation. [Examples](tools/threadsnoop_example.txt).
 - tools/[tplist](tools/tplist.py): Display kernel tracepoints or USDT probes and their formats. [Examples](tools/tplist_example.txt).
 - tools/[trace](tools/trace.py): Trace arbitrary functions, with filters. [Examples](tools/trace_example.txt).
 - tools/[ttysnoop](tools/ttysnoop.py): Watch live output from a tty or pts device. [Examples](tools/ttysnoop_example.txt).
diff --git a/man/man8/threadsnoop.8 b/man/man8/threadsnoop.8
new file mode 100644
index 0000000..3c655f2
--- /dev/null
+++ b/man/man8/threadsnoop.8
@@ -0,0 +1,60 @@
+.TH threadsnoop 8  "2019-07-02" "USER COMMANDS"
+.SH NAME
+threadsnoop \- Trace thread creation via pthread_create(). Uses BCC/eBPF.
+.SH SYNOPSIS
+.B threadsnoop
+.SH DESCRIPTION
+threadsnoop traces calls to pthread_create(), showing this path of thread
+creation. This can be used for workload characterization and discovery, and is
+a companion to execsnoop(8) which traces execve(2).
+
+This works by tracing the pthread_create() from libpthread.so.0. The path
+to this library may need adjusting in the tool source to match your system.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and BCC.
+.SH EXAMPLES
+.TP
+Trace calls pthread_create():
+#
+.B threadsnoop
+.SH FIELDS
+.TP
+TIME(ms)
+Elapsed time since the tool began tracing (in milliseconds).
+.TP
+PID
+The process ID.
+.TP
+COMM
+The process (thread) name.
+.TP
+FUNC
+The name of the start routine, if the symbol is available, else a hex address
+for the start routine address.
+.SH OVERHEAD
+Thread creation is expected to be low (<< 1000/s), so the overhead of this
+tool is expected to be negligible.
+.SH SOURCE
+This originated as a bpftrace tool from the book "BPF Performance Tools",
+published by Addison Wesley (2019):
+.IP
+http://www.brendangregg.com/bpf-performance-tools-book.html
+.PP
+See the book for more documentation on this tool.
+.PP
+This version is in the BCC repository:
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file
+containing example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+execsnoop(8)
diff --git a/tools/threadsnoop.py b/tools/threadsnoop.py
new file mode 100755
index 0000000..04c5e68
--- /dev/null
+++ b/tools/threadsnoop.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# threadsnoop   List new thread creation.
+#               For Linux, uses BCC, eBPF. Embedded C.
+#
+# Copyright (c) 2019 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License").
+# This was originally created for the BPF Performance Tools book
+# published by Addison Wesley. ISBN-13: 9780136554820
+# When copying or porting, include this comment.
+#
+# 02-Jul-2019   Brendan Gregg   Ported from bpftrace to BCC.
+
+from __future__ import print_function
+from bcc import BPF
+
+# load BPF program
+b = BPF(text="""
+#include <linux/sched.h>
+
+struct data_t {
+    u64 ts;
+    u32 pid;
+    u64 start;
+    char comm[TASK_COMM_LEN];
+};
+
+BPF_PERF_OUTPUT(events);
+
+void do_entry(struct pt_regs *ctx) {
+    struct data_t data = {};
+    data.ts = bpf_ktime_get_ns();
+    data.pid = bpf_get_current_pid_tgid() >> 32;
+    data.start = PT_REGS_PARM3(ctx);
+    bpf_get_current_comm(&data.comm, sizeof(data.comm));
+
+    events.perf_submit(ctx, &data, sizeof(data));
+};
+""")
+b.attach_uprobe(name="pthread", sym="pthread_create", fn_name="do_entry")
+
+print("%-10s %-6s %-16s %s" % ("TIME(ms)", "PID", "COMM", "FUNC"))
+
+start_ts = 0
+
+# process event
+def print_event(cpu, data, size):
+    global start_ts
+    event = b["events"].event(data)
+    if start_ts == 0:
+        start_ts = event.ts
+    func = b.sym(event.start, event.pid)
+    if (func == "[unknown]"):
+        func = hex(event.start)
+    print("%-10d %-6d %-16s %s" % ((event.ts - start_ts) / 1000000,
+        event.pid, event.comm, func))
+
+b["events"].open_perf_buffer(print_event)
+while 1:
+    try:
+        b.perf_buffer_poll()
+    except KeyboardInterrupt:
+        exit()
diff --git a/tools/threadsnoop_example.txt b/tools/threadsnoop_example.txt
new file mode 100644
index 0000000..e65b503
--- /dev/null
+++ b/tools/threadsnoop_example.txt
@@ -0,0 +1,27 @@
+Demonstrations of threadsnoop, the Linux BCC/eBPF version.
+
+
+Tracing new threads via phtread_create():
+
+# ./threadsnoop
+Attaching 2 probes...
+TIME(ms)   PID    COMM             FUNC
+1938       28549  dockerd          threadentry
+1939       28549  dockerd          threadentry
+1939       28549  dockerd          threadentry
+1940       28549  dockerd          threadentry
+1949       28549  dockerd          threadentry
+1958       28549  dockerd          threadentry
+1939       28549  dockerd          threadentry
+1950       28549  dockerd          threadentry
+2013       28579  docker-containe  0x562f30f2e710L
+2036       28549  dockerd          threadentry
+2083       28579  docker-containe  0x562f30f2e710L
+2116       629    systemd-journal  0x7fb7114955c0L
+2116       629    systemd-journal  0x7fb7114955c0L
+[...]
+
+The output shows a dockerd process creating several threads with the start
+routine threadentry(), and docker-containe (truncated) and systemd-journal
+also starting threads: in their cases, the function had no symbol information
+available, so their addresses are printed in hex.