| #!/usr/bin/env python |
| # |
| # Copyright (C) 2016 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| """Analyze ext4 trace with custom open trace""" |
| import collections |
| import math |
| import os |
| import re |
| import string |
| import sys |
| |
| DBG = False |
| DBG_ISSUE = False |
| |
| # hard coded maps to detect partition for given device or the other way around |
| # this can be different per each device. This works for marlin. |
| DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" } |
| PARTITION_TO_DEVICE = {} |
| for key, value in DEVICE_TO_PARTITION.iteritems(): |
| PARTITION_TO_DEVICE[value] = key |
| |
| # init-1 [003] .... 2.703964: do_sys_open: init: open("/sys/fs/selinux/null", 131074, 0) fd = 0, inode = 22 |
| RE_DO_SYS_OPEN = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)""" |
| # init-1 [003] ...1 2.703991: ext4_ext_map_blocks_enter: dev 253,0 ino 2719 lblk 154 len 30 flags |
| RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)""" |
| # init-1 [002] ...1 2.687205: ext4_ext_map_blocks_exit: dev 253,0 ino 8 flags lblk 0 pblk 196608 len 1 mflags M ret 1 |
| RE_EXT4_MA_BLOCKS_EXIT = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_exit:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\sflags.*\slblk\s+([0-9]+)\spblk\s([0-9]+)\slen\s+([0-9]+).*mflags\s(\S*)\sret\s([0-9]+)""" |
| # init-1 [002] ...1 2.887119: block_bio_remap: 8,0 R 10010384 + 8 <- (259,18) 3998944 |
| RE_BLOCK_BIO_REMAP = r""".+block_bio_remap:\s\d+,\d+\s\S+\s(\d+)\s\+\s\d+\s<-\s\([^\)]+\)\s(\d+)""" |
| # kworker/u9:1-83 [003] d..2 2.682991: block_rq_issue: 8,0 RA 0 () 10140208 + 32 [kworker/u9:1] |
| RE_BLOCK_RQ_ISSUE = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_rq_issue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s[0-9]+\s\([^\)]*\)\s([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)\]""" |
| |
| EXT4_SIZE_TO_BLOCK_SIZE = 8 # ext4: 4KB, block device block size: 512B |
| |
| class FileAccess: |
| def __init__(self, file): |
| self.file = file |
| self.accesses = [] |
| self.total_access = 0 |
| self.ext4_access_size_histogram = {} #key: read size, value: occurrence |
| self.block_access_size_histogram = {} |
| self.ext4_single_block_accesses = {} # process name, occurrence |
| self.block_single_block_accesses = {} # process name, occurrence |
| self.blocks_histogram = {} # K: offset, V: read counter |
| |
| def add_if_single_block(self, container, size, offset, process_name): |
| if size != 1: |
| return |
| offsets = container.get(process_name) |
| if not offsets: |
| offsets = [] |
| container[process_name] = offsets |
| offsets.append(offset) |
| |
| def add_access(self, time, offset, size, process_name, read_sizes): |
| self.accesses.append((time, offset, size, process_name)) |
| self.total_access += size |
| self.ext4_access_size_histogram[size] = self.ext4_access_size_histogram.get(size, 0) + 1 |
| read_offset = offset |
| for s in read_sizes: |
| self.block_access_size_histogram[s] = self.block_access_size_histogram.get(s, 0) + 1 |
| self.add_if_single_block(self.block_single_block_accesses, s, read_offset, process_name) |
| read_offset += s |
| for i in range(size): |
| self.blocks_histogram[offset + i] = self.blocks_histogram.get(offset + i, 0) + 1 |
| self.add_if_single_block(self.ext4_single_block_accesses, size, offset, process_name) |
| |
| def add_merged_access(self, time, offsets, lens, process_names): |
| total_read_offsets = set() # each read can overwrap. So count only once for block level counting |
| for i in range(len(offsets)): |
| self.accesses.append((time, offsets[i], lens[i], process_names[i])) |
| self.ext4_access_size_histogram[lens[i]] = self.ext4_access_size_histogram.get(lens[i], 0) + 1 |
| self.add_if_single_block(self.ext4_single_block_accesses, lens[i], offsets[i], process_names[i]) |
| for j in range(len(lens)): |
| total_read_offsets.add(offsets[i] + j) |
| total_lens = len(total_read_offsets) |
| start_offset = min(total_read_offsets) |
| self.total_access += total_lens |
| self.block_access_size_histogram[total_lens] = self.block_access_size_histogram.get(total_lens, 0) \ |
| + 1 |
| self.add_if_single_block(self.block_single_block_accesses, total_lens, start_offset, \ |
| process_names[0]) |
| for s in range(total_lens): |
| self.blocks_histogram[start_offset + s] = self.blocks_histogram.get(start_offset + s, 0) + 1 |
| |
| |
| def dump(self): |
| if len(self.ext4_access_size_histogram) > 1: |
| print " Ext4 access size histograms:", collections.OrderedDict( \ |
| sorted(self.ext4_access_size_histogram.items(), key = lambda item: item[0])) |
| if len(self.ext4_single_block_accesses) > 0 and self.total_access > 1: |
| print " Ext4 single block accesses:", collections.OrderedDict( \ |
| sorted(self.ext4_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) |
| if len(self.block_access_size_histogram) > 1: |
| print " Block access size histograms:", collections.OrderedDict( \ |
| sorted(self.block_access_size_histogram.items(), key = lambda item: item[0])) |
| if len(self.block_single_block_accesses) > 0 and self.total_access > 1: |
| print " Block single block accesses:", collections.OrderedDict( \ |
| sorted(self.block_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) |
| if self.total_access > 1: |
| sorted_blocks_histogram = sorted(self.blocks_histogram.items(), key = lambda item: item[1], \ |
| reverse = True) |
| prints = [] |
| repeating_reads_counter = 0 |
| for entry in sorted_blocks_histogram: |
| offset = entry[0] |
| counter = entry[1] |
| if counter == 1: |
| break |
| prints.append(str(offset) + ":" + str(counter)) |
| repeating_reads_counter += (counter - 1) |
| if len(prints) > 0: |
| print " repeating accesses", repeating_reads_counter, " offset:count ->", ','.join(prints) |
| |
| class FileEvent: |
| def __init__(self, open_time, file_name, process_name, inode, flags): |
| self.file_name = file_name |
| self.inode = inode |
| self.total_open = 1 |
| self.processes = [] |
| self.processes.append((open_time, process_name, flags)) |
| self.read = FileAccess(self) |
| self.write = FileAccess(self) |
| |
| |
| def add_open(self, open_time, process_name, flags): |
| self.processes.append((open_time, process_name, flags)) |
| self.total_open += 1 |
| |
| def add_access(self, is_read, time, offset, size, process_name, read_sizes): |
| if is_read: |
| self.read.add_access(time, offset, size, process_name, read_sizes) |
| else: |
| self.write.add_access(time, offset, size, process_name, read_sizes) |
| |
| def add_merged_access(self, is_read, time, offsets, lens, process_names): |
| if is_read: |
| self.read.add_merged_access(time, offsets, lens, process_names) |
| else: |
| self.write.add_merged_access(time, offsets, lens, process_names) |
| |
| def dump(self, name_to_pid_map): |
| print " ***filename %s, total reads %d, total writes %d, total open %d inode %s" \ |
| % (self.file_name, self.read.total_access, self.write.total_access, self.total_open,\ |
| self.inode) |
| process_names = [] |
| for opener in self.processes: |
| process_names.append(opener[1] + "-" + name_to_pid_map.get(opener[1], '?') + " t:" + \ |
| str(opener[0]) + " flags:" + opener[2]) |
| print " Processes opened this file:", ','.join(process_names) |
| if self.read.total_access > 0: |
| print " ****Reads:" |
| self.read.dump() |
| if self.write.total_access > 0: |
| print " ****Writes:" |
| self.write.dump() |
| |
| def dump_short(self): |
| print " filename %s, total reads %d, total writes %d" % (self.file_name, |
| self.read.total_access, self.write.total_access) |
| |
| class PendingAccess: |
| def __init__(self, process_name, pid, time, dev, inode, lblk, pblk, len, fevent): |
| self.process_name = process_name |
| self.pid = pid |
| self.time = time |
| self.dev = dev |
| self.inode = inode |
| self.lblk = lblk |
| self.pblk = pblk |
| self.blk_len = len * EXT4_SIZE_TO_BLOCK_SIZE |
| self.len = len |
| self.fevent = fevent |
| self.pending_accesses = set() |
| for i in range(len): |
| self.pending_accesses.add(i) |
| self.access_sizes = [] # valid read for this file in block dev level. |
| self.block_access_counter = 0 |
| |
| def get_valid_access(self, block_offset, block_len): |
| ext4_offset = block_offset / EXT4_SIZE_TO_BLOCK_SIZE |
| if ext4_offset > self.len: |
| return 0, 0 |
| ext4_len = block_len / EXT4_SIZE_TO_BLOCK_SIZE |
| if (ext4_offset + ext4_len) > self.len: |
| ext4_len = self.len - ext4_offset |
| return ext4_offset, ext4_len |
| |
| def queue_block_access(self, ext4_offset, ext4_len): |
| if ext4_len <= 0: |
| return |
| self.block_access_counter += 1 |
| ext4_blocks_accessed = 0 |
| for i in range(ext4_len): |
| ext4_block_i = i + ext4_offset |
| if ext4_block_i in self.pending_accesses: |
| ext4_blocks_accessed += 1 |
| self.pending_accesses.remove(ext4_block_i) |
| if ext4_blocks_accessed > 0: |
| self.access_sizes.append(ext4_blocks_accessed) |
| |
| def handle_req_complete(self, time, is_read): |
| self.fevent.add_access(is_read, self.time, self.lblk, self.len, self.process_name,\ |
| self.access_sizes) |
| |
| def handle_merged_req(self, time, offsets, lens, names, is_read): |
| self.fevent.add_merged_access(is_read, time, offsets, lens, names) |
| |
| def is_req_complete(self): |
| return len(self.pending_accesses) == 0 |
| |
| def is_req_started(self): |
| return self.len is not len(self.pending_accesses) |
| |
| class Trace: |
| def __init__(self): |
| self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent } |
| self.re_open = re.compile(RE_DO_SYS_OPEN) |
| self.re_ext4_access = re.compile(RE_EXT4_MA_BLOCKS_EXIT) |
| self.re_bio_remap = re.compile(RE_BLOCK_BIO_REMAP) |
| self.re_block_issue = re.compile(RE_BLOCK_RQ_ISSUE) |
| # req from ext4 that has not gone down to block level yet, K:block address, |
| # V: list of PendingRead |
| self.pending_accesses = {} |
| self.remap = {} |
| self.process_names = {} # K: PID, V : name |
| |
| def handle_line(self, line): |
| match = self.re_open.match(line) |
| if match: |
| self.handle_open(match) |
| return |
| match = self.re_ext4_access.match(line) |
| if match: |
| self.handle_ext4_block_exit(match) |
| return |
| match = self.re_bio_remap.match(line) |
| if match: |
| self.handle_bio_remap(match) |
| return |
| match = self.re_block_issue.match(line) |
| if match: |
| self.handle_block_issue(match) |
| return |
| |
| def handle_open(self, match): |
| pid = int(match.group(1)) |
| time = match.group(2) |
| process_name = match.group(3) |
| file_name = match.group(4) |
| flag = match.group(5) |
| inode = int(match.group(6)) |
| dev_name = None |
| self.process_names[pid] = process_name |
| #print "open", pid, process_name, file_name, inode |
| for p in PARTITION_TO_DEVICE: |
| if file_name.startswith(p): |
| dev_name = PARTITION_TO_DEVICE[p] |
| if not dev_name: |
| if DBG: |
| print "Ignore open for file", file_name |
| return |
| files = self.files_per_device[dev_name] |
| fevent = files.get(inode) |
| if not fevent: |
| fevent = FileEvent(time, file_name, process_name, inode, flag) |
| files[inode] = fevent |
| else: |
| fevent.add_open(time, process_name, flag) |
| |
| def handle_ext4_block_exit(self, match): |
| process_name = match.group(1) |
| pid = int(match.group(2)) |
| time = float(match.group(3)) |
| dev = match.group(4) |
| inode = int(match.group(5)) |
| lblk = int(match.group(6)) |
| pblk = int(match.group(7)) * EXT4_SIZE_TO_BLOCK_SIZE # address in ext4 blocks, ... |
| l = int(match.group(8)) |
| mflags = match.group(9) |
| ret = int(match.group(10)) |
| if ret <= 0: # no block access |
| return |
| process_name = self.process_names.get(pid, process_name) |
| if process_name == '<...>': |
| process_name = "pid-" + str(pid) |
| if DBG_ISSUE: |
| print "ext4", pblk, l, inode, process_name |
| files = self.files_per_device.get(dev) |
| if not files: |
| if DEVICE_TO_PARTITION.get(dev): |
| files = {} |
| self.files_per_device[dev] = files |
| else: |
| if DBG: |
| print "access ignored for device", dev |
| return |
| fevent = files.get(inode) |
| if not fevent: |
| if DBG: |
| print 'no open for device %s with inode %s' % (dev, inode) |
| fevent = FileEvent(time, "unknown", process_name, inode, "-") |
| files[inode] = fevent |
| pending_access = PendingAccess(process_name, pid, time, dev, inode, lblk, pblk, l,\ |
| fevent) |
| access_list = self.pending_accesses.get(pblk, []) |
| access_list.append(pending_access) |
| self.pending_accesses[pblk] = access_list |
| |
| def handle_bio_remap(self, match): |
| new_addr = int(match.group(1)) |
| old_addr = int(match.group(2)) |
| self.remap[new_addr] = old_addr |
| if DBG_ISSUE: |
| print "remap", new_addr, old_addr |
| |
| def handle_block_issue(self, match): |
| pid = int(match.group(1)) |
| time = float(match.group(2)) |
| dev_major = match.group(3) |
| dev_minor = match.group(4) |
| access = match.group(5) |
| new_address = int(match.group(6)) |
| l = int(match.group(7)) |
| name = match.group(8) |
| name = self.process_names.get(pid, name) |
| if name == '<...>': |
| name = "pid-" + str(pid) |
| is_read = not 'W' in access |
| accesses_per_inodes = {} # K:inodes, V: list of two entries, 1st: offsets, 2nd: length |
| addrs_to_remove = [] |
| completed_reqs = [] |
| address = self.remap.get(new_address, new_address) |
| if DBG_ISSUE: |
| print "issue", address, l, is_read, access |
| for access_addr, access_list in self.pending_accesses.iteritems(): |
| if (address >= access_addr) and (address + l) > access_addr: |
| reqs_to_remove = [] |
| for pending in access_list: |
| offset, valid_access_size = pending.get_valid_access(address - access_addr, l) |
| if valid_access_size > 0: |
| if pending.is_req_started(): # spread across multiple reads. complete alone |
| pending.queue_block_access(offset, valid_access_size) |
| if pending.is_req_complete(): |
| pending.handle_req_complete(time, is_read) |
| reqs_to_remove.append(pending) |
| else: # possible multiple reads completed in this read. complete them together |
| pending.queue_block_access(offset, valid_access_size) |
| if pending.is_req_complete(): |
| reads = accesses_per_inodes.get(pending.inode, [[], [], []]) |
| reads[0].append(offset + pending.lblk) |
| reads[1].append(valid_access_size) |
| reads[2].append(pending.process_name) |
| accesses_per_inodes[pending.inode] = reads |
| completed_reqs.append(pending) |
| reqs_to_remove.append(pending) |
| for to_remove in reqs_to_remove: |
| access_list.remove(to_remove) |
| if len(access_list) == 0: |
| addrs_to_remove.append(access_addr) |
| for addr in addrs_to_remove: |
| del self.pending_accesses[addr] |
| for pending in completed_reqs: # these will be reported as batch |
| accesses = accesses_per_inodes.get(pending.inode) |
| if not accesses: # merged one already dispatched |
| continue |
| if len(accesses[0]) == 1: |
| pending.handle_req_complete(time, is_read) |
| else: #merged |
| pending.handle_merged_req(time, accesses[0], accesses[1], accesses[2], is_read) |
| del accesses_per_inodes[pending.inode] |
| |
| def dump_partition(self, partition_name, files): |
| name_to_pid_map = {} |
| for pid, name in self.process_names.iteritems(): |
| name_to_pid_map[name] = str(pid) |
| print "**Dump partition:", partition_name, "total number of files:", len(files) |
| total_reads = 0 |
| total_writes = 0 |
| files_sorted_by_read = files.values() |
| files_sorted_by_read.sort(key=lambda f : f.read.total_access, reverse = True) |
| files_sorted_by_write = files.values() |
| files_sorted_by_write.sort(key=lambda f : f.write.total_access, reverse = True) |
| print " Top 10 readers:" |
| for i in range(min(10, len(files_sorted_by_read))): |
| files_sorted_by_read[i].dump_short() |
| print " Top 10 writers:" |
| for i in range(min(10, len(files_sorted_by_write))): |
| files_sorted_by_write[i].dump_short() |
| for f in files_sorted_by_read: |
| f.dump(name_to_pid_map) |
| total_reads += f.read.total_access |
| total_writes += f.write.total_access |
| print " Total reads:", total_reads, " total writes:", total_writes |
| return total_reads, total_writes, len(files) |
| |
| |
| def dump(self): |
| print "*Dump R/W per each partition" |
| total_reads = 0 |
| total_writes = 0 |
| summaries = [] |
| for d in self.files_per_device: |
| reads, writes, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \ |
| self.files_per_device[d]) |
| total_reads += reads |
| total_writes += writes |
| summaries.append((DEVICE_TO_PARTITION[d], reads, writes, num_files)) |
| print "*Summary*" |
| print "Total blocks read", total_reads |
| print "Total blocks wrote", total_writes |
| print "Partition total_reads total_writes num_files" |
| for s in summaries: |
| print s[0], s[1], s[2], s[3] |
| |
| def main(argv): |
| if (len(argv) < 2): |
| print "check_file_read.py filename" |
| return |
| filename = argv[1] |
| trace = Trace() |
| with open(filename) as f: |
| for l in f: |
| trace.handle_line(l) |
| trace.dump() |
| |
| if __name__ == '__main__': |
| main(sys.argv) |