| /* |
| * kernel/trace/tracedump.c |
| * |
| * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms and conditions of the GNU General Public License, |
| * version 2, as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| * more details. |
| * |
| * You should have received a copy of the GNU General Public License along with |
| * this program; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| */ |
| |
| #include <linux/console.h> |
| #include <linux/cpumask.h> |
| #include <linux/init.h> |
| #include <linux/irqflags.h> |
| #include <linux/module.h> |
| #include <linux/moduleparam.h> |
| #include <linux/mutex.h> |
| #include <linux/notifier.h> |
| #include <linux/proc_fs.h> |
| #include <linux/ring_buffer.h> |
| #include <linux/sched.h> |
| #include <linux/smp.h> |
| #include <linux/string.h> |
| #include <linux/threads.h> |
| #include <linux/tracedump.h> |
| #include <linux/uaccess.h> |
| #include <linux/vmalloc.h> |
| #include <linux/zlib.h> |
| |
| #include "trace.h" |
| #include "trace_output.h" |
| |
| #define CPU_MAX (NR_CPUS-1) |
| |
| #define TRYM(fn, ...) do { \ |
| int try_error = (fn); \ |
| if (try_error < 0) { \ |
| printk(__VA_ARGS__); \ |
| return try_error; \ |
| } \ |
| } while (0) |
| |
| #define TRY(fn) TRYM(fn, TAG "Caught error from %s in %s\n", #fn, __func__) |
| |
| /* Stolen from printk.c */ |
| #define for_each_console(con) \ |
| for (con = console_drivers; con != NULL; con = con->next) |
| |
| #define TAG KERN_ERR "tracedump: " |
| |
| #define TD_MIN_CONSUME 2000 |
| #define TD_COMPRESS_CHUNK 0x8000 |
| |
| static DEFINE_MUTEX(tracedump_proc_lock); |
| |
| static const char MAGIC_NUMBER[9] = "TRACEDUMP"; |
| static const char CPU_DELIM[7] = "CPU_END"; |
| #define CMDLINE_DELIM "|" |
| |
| /* Type of output */ |
| static bool current_format; |
| static bool format_ascii; |
| module_param(format_ascii, bool, S_IRUGO | S_IWUSR); |
| MODULE_PARM_DESC(format_ascii, "Dump ascii or raw data"); |
| |
| /* Max size of output */ |
| static uint panic_size = 0x80000; |
| module_param(panic_size, uint, S_IRUGO | S_IWUSR); |
| MODULE_PARM_DESC(panic_size, "Max dump size during kernel panic (bytes)"); |
| |
| static uint compress_level = 9; |
| module_param(compress_level, uint, S_IRUGO | S_IWUSR); |
| MODULE_PARM_DESC(compress_level, "Level of compression to use. [0-9]"); |
| |
| static char out_buf[TD_COMPRESS_CHUNK]; |
| static z_stream stream; |
| static int compress_done; |
| static int flush; |
| |
| static int old_trace_flags; |
| |
| static struct trace_iterator iter; |
| static struct pager_s { |
| struct trace_array *tr; |
| void *spare; |
| int cpu; |
| int len; |
| char __user *ubuf; |
| } pager; |
| |
| static char cmdline_buf[16+TASK_COMM_LEN]; |
| |
| static int print_to_console(const char *buf, size_t len) |
| { |
| struct console *con; |
| |
| /* Stolen from printk.c */ |
| for_each_console(con) { |
| if ((con->flags & CON_ENABLED) && con->write && |
| (cpu_online(smp_processor_id()) || |
| (con->flags & CON_ANYTIME))) |
| con->write(con, buf, len); |
| } |
| return 0; |
| } |
| |
| static int print_to_user(const char *buf, size_t len) |
| { |
| int size; |
| size = copy_to_user(pager.ubuf, buf, len); |
| if (size > 0) { |
| printk(TAG "Failed to copy to user %d bytes\n", size); |
| return -EINVAL; |
| } |
| return 0; |
| } |
| |
| static int print(const char *buf, size_t len, int print_to) |
| { |
| if (print_to == TD_PRINT_CONSOLE) |
| TRY(print_to_console(buf, len)); |
| else if (print_to == TD_PRINT_USER) |
| TRY(print_to_user(buf, len)); |
| return 0; |
| } |
| |
| /* print_magic will print MAGIC_NUMBER using the |
| * print function selected by print_to. |
| */ |
| static inline ssize_t print_magic(int print_to) |
| { |
| print(MAGIC_NUMBER, sizeof(MAGIC_NUMBER), print_to); |
| return sizeof(MAGIC_NUMBER); |
| } |
| |
| static int iter_init(void) |
| { |
| int cpu; |
| |
| /* Make iter point to global ring buffer used in trace. */ |
| trace_init_global_iter(&iter); |
| |
| /* Disable tracing */ |
| for_each_tracing_cpu(cpu) { |
| atomic_inc(&iter.tr->data[cpu]->disabled); |
| } |
| |
| /* Save flags */ |
| old_trace_flags = trace_flags; |
| |
| /* Dont look at memory in panic mode. */ |
| trace_flags &= ~TRACE_ITER_SYM_USEROBJ; |
| |
| /* Prepare ring buffer iter */ |
| for_each_tracing_cpu(cpu) { |
| iter.buffer_iter[cpu] = |
| ring_buffer_read_prepare(iter.tr->buffer, cpu); |
| } |
| ring_buffer_read_prepare_sync(); |
| for_each_tracing_cpu(cpu) { |
| ring_buffer_read_start(iter.buffer_iter[cpu]); |
| tracing_iter_reset(&iter, cpu); |
| } |
| return 0; |
| } |
| |
| /* iter_next gets the next entry in the ring buffer, ordered by time. |
| * If there are no more entries, returns 0. |
| */ |
| static ssize_t iter_next(void) |
| { |
| /* Zero out the iterator's seq */ |
| memset(&iter.seq, 0, |
| sizeof(struct trace_iterator) - |
| offsetof(struct trace_iterator, seq)); |
| |
| while (!trace_empty(&iter)) { |
| if (trace_find_next_entry_inc(&iter) == NULL) { |
| printk(TAG "trace_find_next_entry failed!\n"); |
| return -EINVAL; |
| } |
| |
| /* Copy the ring buffer data to iterator's seq */ |
| print_trace_line(&iter); |
| if (iter.seq.len != 0) |
| return iter.seq.len; |
| } |
| return 0; |
| } |
| |
| static int iter_deinit(void) |
| { |
| int cpu; |
| /* Enable tracing */ |
| for_each_tracing_cpu(cpu) { |
| ring_buffer_read_finish(iter.buffer_iter[cpu]); |
| } |
| for_each_tracing_cpu(cpu) { |
| atomic_dec(&iter.tr->data[cpu]->disabled); |
| } |
| |
| /* Restore flags */ |
| trace_flags = old_trace_flags; |
| return 0; |
| } |
| |
| static int pager_init(void) |
| { |
| int cpu; |
| |
| /* Need to do this to get a pointer to global_trace (iter.tr). |
| Lame, I know. */ |
| trace_init_global_iter(&iter); |
| |
| /* Turn off tracing */ |
| for_each_tracing_cpu(cpu) { |
| atomic_inc(&iter.tr->data[cpu]->disabled); |
| } |
| |
| memset(&pager, 0, sizeof(pager)); |
| pager.tr = iter.tr; |
| pager.len = TD_COMPRESS_CHUNK; |
| |
| return 0; |
| } |
| |
| /* pager_next_cpu moves the pager to the next cpu. |
| * Returns 0 if pager is done, else 1. |
| */ |
| static ssize_t pager_next_cpu(void) |
| { |
| if (pager.cpu <= CPU_MAX) { |
| pager.cpu += 1; |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* pager_next gets the next page of data from the ring buffer |
| * of the current cpu. Returns page size or 0 if no more data. |
| */ |
| static ssize_t pager_next(void) |
| { |
| int ret; |
| |
| if (pager.cpu > CPU_MAX) |
| return 0; |
| |
| if (!pager.spare) |
| pager.spare = ring_buffer_alloc_read_page(pager.tr->buffer, pager.cpu); |
| if (!pager.spare) { |
| printk(TAG "ring_buffer_alloc_read_page failed!"); |
| return -ENOMEM; |
| } |
| |
| ret = ring_buffer_read_page(pager.tr->buffer, |
| &pager.spare, |
| pager.len, |
| pager.cpu, 0); |
| if (ret < 0) |
| return 0; |
| |
| return PAGE_SIZE; |
| } |
| |
| static int pager_deinit(void) |
| { |
| int cpu; |
| if (pager.spare != NULL) |
| ring_buffer_free_read_page(pager.tr->buffer, pager.spare); |
| |
| for_each_tracing_cpu(cpu) { |
| atomic_dec(&iter.tr->data[cpu]->disabled); |
| } |
| return 0; |
| } |
| |
| /* cmdline_next gets the next saved cmdline from the trace and |
| * puts it in cmdline_buf. Returns the size of the cmdline, or 0 if empty. |
| * but will reset itself on a subsequent call. |
| */ |
| static ssize_t cmdline_next(void) |
| { |
| static int pid; |
| ssize_t size = 0; |
| |
| if (pid >= PID_MAX_DEFAULT) |
| pid = -1; |
| |
| while (size == 0 && pid < PID_MAX_DEFAULT) { |
| pid++; |
| trace_find_cmdline(pid, cmdline_buf); |
| if (!strncmp(cmdline_buf, "<...>", 5)) |
| continue; |
| |
| sprintf(&cmdline_buf[strlen(cmdline_buf)], " %d" |
| CMDLINE_DELIM, pid); |
| size = strlen(cmdline_buf); |
| } |
| return size; |
| } |
| |
| /* comsume_events removes the first 'num' entries from the ring buffer. */ |
| static int consume_events(size_t num) |
| { |
| TRY(iter_init()); |
| for (; num > 0 && !trace_empty(&iter); num--) { |
| trace_find_next_entry_inc(&iter); |
| ring_buffer_consume(iter.tr->buffer, iter.cpu, &iter.ts, |
| &iter.lost_events); |
| } |
| TRY(iter_deinit()); |
| return 0; |
| } |
| |
| static int data_init(void) |
| { |
| if (current_format) |
| TRY(iter_init()); |
| else |
| TRY(pager_init()); |
| return 0; |
| } |
| |
| /* data_next will figure out the right 'next' function to |
| * call and will select the right buffer to pass back |
| * to compress_next. |
| * |
| * iter_next should be used to get data entry-by-entry, ordered |
| * by time, which is what we need in order to convert it to ascii. |
| * |
| * pager_next will return a full page of raw data at a time, one |
| * CPU at a time. pager_next_cpu must be called to get the next CPU. |
| * cmdline_next will get the next saved cmdline |
| */ |
| static ssize_t data_next(const char **buf) |
| { |
| ssize_t size; |
| |
| if (current_format) { |
| TRY(size = iter_next()); |
| *buf = iter.seq.buffer; |
| } else { |
| TRY(size = pager_next()); |
| *buf = pager.spare; |
| if (size == 0) { |
| if (pager_next_cpu()) { |
| size = sizeof(CPU_DELIM); |
| *buf = CPU_DELIM; |
| } else { |
| TRY(size = cmdline_next()); |
| *buf = cmdline_buf; |
| } |
| } |
| } |
| return size; |
| } |
| |
| static int data_deinit(void) |
| { |
| if (current_format) |
| TRY(iter_deinit()); |
| else |
| TRY(pager_deinit()); |
| return 0; |
| } |
| |
| static int compress_init(void) |
| { |
| int workspacesize, ret; |
| |
| compress_done = 0; |
| flush = Z_NO_FLUSH; |
| stream.data_type = current_format ? Z_ASCII : Z_BINARY; |
| workspacesize = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); |
| stream.workspace = vmalloc(workspacesize); |
| if (!stream.workspace) { |
| printk(TAG "Could not allocate " |
| "enough memory for zlib!\n"); |
| return -ENOMEM; |
| } |
| memset(stream.workspace, 0, workspacesize); |
| |
| ret = zlib_deflateInit(&stream, compress_level); |
| if (ret != Z_OK) { |
| printk(TAG "%s\n", stream.msg); |
| return ret; |
| } |
| stream.avail_in = 0; |
| stream.avail_out = 0; |
| TRY(data_init()); |
| return 0; |
| } |
| |
| /* compress_next will compress up to min(max_out, TD_COMPRESS_CHUNK) bytes |
| * of data into the output buffer. It gets the data by calling data_next. |
| * It will return the most data it possibly can. If it returns 0, then |
| * there is no more data. |
| * |
| * By the way that zlib works, each call to zlib_deflate will possibly |
| * consume up to avail_in bytes from next_in, and will fill up to |
| * avail_out bytes in next_out. Once flush == Z_FINISH, it can not take |
| * any more input. It will output until it is finished, and will return |
| * Z_STREAM_END. |
| */ |
| static ssize_t compress_next(size_t max_out) |
| { |
| ssize_t ret; |
| max_out = min(max_out, (size_t)TD_COMPRESS_CHUNK); |
| stream.next_out = out_buf; |
| stream.avail_out = max_out; |
| while (stream.avail_out > 0 && !compress_done) { |
| if (stream.avail_in == 0 && flush != Z_FINISH) { |
| TRY(stream.avail_in = |
| data_next((const char **)&stream.next_in)); |
| flush = (stream.avail_in == 0) ? Z_FINISH : Z_NO_FLUSH; |
| } |
| if (stream.next_in != NULL) { |
| TRYM((ret = zlib_deflate(&stream, flush)), |
| "zlib: %s\n", stream.msg); |
| compress_done = (ret == Z_STREAM_END); |
| } |
| } |
| ret = max_out - stream.avail_out; |
| return ret; |
| } |
| |
| static int compress_deinit(void) |
| { |
| TRY(data_deinit()); |
| |
| zlib_deflateEnd(&stream); |
| vfree(stream.workspace); |
| |
| /* TODO: remove */ |
| printk(TAG "Total in: %ld\n", stream.total_in); |
| printk(TAG "Total out: %ld\n", stream.total_out); |
| return stream.total_out; |
| } |
| |
| static int compress_reset(void) |
| { |
| TRY(compress_deinit()); |
| TRY(compress_init()); |
| return 0; |
| } |
| |
| /* tracedump_init initializes all tracedump components. |
| * Call this before tracedump_next |
| */ |
| int tracedump_init(void) |
| { |
| TRY(compress_init()); |
| return 0; |
| } |
| |
| /* tracedump_next will print up to max_out data from the tracing ring |
| * buffers using the print function selected by print_to. The data is |
| * compressed using zlib. |
| * |
| * The output type of the data is specified by the format_ascii module |
| * parameter. If format_ascii == 1, human-readable data will be output. |
| * Otherwise, it will output raw data from the ring buffer in cpu order, |
| * followed by the saved_cmdlines data. |
| */ |
| ssize_t tracedump_next(size_t max_out, int print_to) |
| { |
| ssize_t size; |
| TRY(size = compress_next(max_out)); |
| print(out_buf, size, print_to); |
| return size; |
| } |
| |
| /* tracedump_all will print all data in the tracing ring buffers using |
| * the print function selected by print_to. The data is compressed using |
| * zlib, and is surrounded by MAGIC_NUMBER. |
| * |
| * The output type of the data is specified by the format_ascii module |
| * parameter. If format_ascii == 1, human-readable data will be output. |
| * Otherwise, it will output raw data from the ring buffer in cpu order, |
| * followed by the saved_cmdlines data. |
| */ |
| ssize_t tracedump_all(int print_to) |
| { |
| ssize_t ret, size = 0; |
| TRY(size += print_magic(print_to)); |
| |
| do { |
| /* Here the size used doesn't really matter, |
| * since we're dumping everything. */ |
| TRY(ret = tracedump_next(0xFFFFFFFF, print_to)); |
| size += ret; |
| } while (ret > 0); |
| |
| TRY(size += print_magic(print_to)); |
| |
| return size; |
| } |
| |
| /* tracedump_deinit deinitializes all tracedump components. |
| * This must be called, even on error. |
| */ |
| int tracedump_deinit(void) |
| { |
| TRY(compress_deinit()); |
| return 0; |
| } |
| |
| /* tracedump_reset reinitializes all tracedump components. */ |
| int tracedump_reset(void) |
| { |
| TRY(compress_reset()); |
| return 0; |
| } |
| |
| |
| |
| /* tracedump_open opens the tracedump file for reading. */ |
| static int tracedump_open(struct inode *inode, struct file *file) |
| { |
| int ret; |
| mutex_lock(&tracedump_proc_lock); |
| current_format = format_ascii; |
| ret = tracedump_init(); |
| if (ret < 0) |
| goto err; |
| |
| ret = nonseekable_open(inode, file); |
| if (ret < 0) |
| goto err; |
| return ret; |
| |
| err: |
| mutex_unlock(&tracedump_proc_lock); |
| return ret; |
| } |
| |
| /* tracedump_read will reads data from tracedump_next and prints |
| * it to userspace. It will surround the data with MAGIC_NUMBER. |
| */ |
| static ssize_t tracedump_read(struct file *file, char __user *buf, |
| size_t len, loff_t *offset) |
| { |
| static int done; |
| ssize_t size = 0; |
| |
| pager.ubuf = buf; |
| |
| if (*offset == 0) { |
| done = 0; |
| TRY(size = print_magic(TD_PRINT_USER)); |
| } else if (!done) { |
| TRY(size = tracedump_next(len, TD_PRINT_USER)); |
| if (size == 0) { |
| TRY(size = print_magic(TD_PRINT_USER)); |
| done = 1; |
| } |
| } |
| |
| *offset += size; |
| |
| return size; |
| } |
| |
| static int tracedump_release(struct inode *inode, struct file *file) |
| { |
| int ret; |
| ret = tracedump_deinit(); |
| mutex_unlock(&tracedump_proc_lock); |
| return ret; |
| } |
| |
| /* tracedump_dump dumps all tracing data from the tracing ring buffers |
| * to all consoles. For details about the output format, see |
| * tracedump_all. |
| |
| * At most max_out bytes are dumped. To accomplish this, |
| * tracedump_dump calls tracedump_all several times without writing the data, |
| * each time tossing out old data until it reaches its goal. |
| * |
| * Note: dumping raw pages currently does NOT follow the size limit. |
| */ |
| |
| int tracedump_dump(size_t max_out) |
| { |
| ssize_t size; |
| size_t consume; |
| |
| printk(TAG "\n"); |
| |
| tracedump_init(); |
| |
| if (format_ascii) { |
| size = tracedump_all(TD_NO_PRINT); |
| if (size < 0) { |
| printk(TAG "failed to dump\n"); |
| goto out; |
| } |
| while (size > max_out) { |
| TRY(tracedump_deinit()); |
| /* Events take more or less 60 ascii bytes each, |
| not counting compression */ |
| consume = TD_MIN_CONSUME + (size - max_out) / |
| (60 / (compress_level + 1)); |
| TRY(consume_events(consume)); |
| TRY(tracedump_init()); |
| size = tracedump_all(TD_NO_PRINT); |
| if (size < 0) { |
| printk(TAG "failed to dump\n"); |
| goto out; |
| } |
| } |
| |
| TRY(tracedump_reset()); |
| } |
| size = tracedump_all(TD_PRINT_CONSOLE); |
| if (size < 0) { |
| printk(TAG "failed to dump\n"); |
| goto out; |
| } |
| |
| out: |
| tracedump_deinit(); |
| printk(KERN_INFO "\n" TAG " end\n"); |
| return size; |
| } |
| |
| static const struct file_operations tracedump_fops = { |
| .owner = THIS_MODULE, |
| .open = tracedump_open, |
| .read = tracedump_read, |
| .release = tracedump_release, |
| }; |
| |
| #ifdef CONFIG_TRACEDUMP_PANIC |
| static int tracedump_panic_handler(struct notifier_block *this, |
| unsigned long event, void *unused) |
| { |
| tracedump_dump(panic_size); |
| return 0; |
| } |
| |
| static struct notifier_block tracedump_panic_notifier = { |
| .notifier_call = tracedump_panic_handler, |
| .next = NULL, |
| .priority = 150 /* priority: INT_MAX >= x >= 0 */ |
| }; |
| #endif |
| |
| static int __init tracedump_initcall(void) |
| { |
| #ifdef CONFIG_TRACEDUMP_PROCFS |
| struct proc_dir_entry *entry; |
| |
| /* Create a procfs file for easy dumping */ |
| entry = create_proc_entry("tracedump", S_IFREG | S_IRUGO, NULL); |
| if (!entry) |
| printk(TAG "failed to create proc entry\n"); |
| else |
| entry->proc_fops = &tracedump_fops; |
| #endif |
| |
| #ifdef CONFIG_TRACEDUMP_PANIC |
| /* Automatically dump to console on a kernel panic */ |
| atomic_notifier_chain_register(&panic_notifier_list, |
| &tracedump_panic_notifier); |
| #endif |
| return 0; |
| } |
| |
| early_initcall(tracedump_initcall); |