| #!/usr/bin/python3 | 
 | # Copyright (C) 2019 The Android Open Source Project | 
 | # | 
 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
 | # you may not use this file except in compliance with the License. | 
 | # You may obtain a copy of the License at | 
 | # | 
 | #      http://www.apache.org/licenses/LICENSE-2.0 | 
 | # | 
 | # Unless required by applicable law or agreed to in writing, software | 
 | # distributed under the License is distributed on an "AS IS" BASIS, | 
 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | # See the License for the specific language governing permissions and | 
 | # limitations under the License. | 
 |  | 
 | import re | 
 | import sys | 
 |  | 
 | from sqlalchemy import create_engine | 
 | from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey | 
 | from sqlalchemy.ext.declarative import declarative_base | 
 | from sqlalchemy.orm import relationship | 
 |  | 
 | from sqlalchemy.orm import sessionmaker | 
 |  | 
 | import sqlalchemy | 
 |  | 
 | from typing import Optional, Tuple | 
 |  | 
 | _DEBUG = False        # print sql commands to console | 
 | _FLUSH_LIMIT = 10000  # how many entries are parsed before flushing to DB from memory | 
 |  | 
 | Base = declarative_base() | 
 |  | 
 | class RawFtraceEntry(Base): | 
 |   __tablename__ = 'raw_ftrace_entries' | 
 |  | 
 |   id = Column(Integer, primary_key=True) | 
 |   task_name = Column(String, nullable=True) # <...> -> None. | 
 |   task_pid = Column(String, nullable=False) | 
 |   tgid = Column(Integer, nullable=True)     # ----- -> None. | 
 |   cpu = Column(Integer, nullable=False) | 
 |   timestamp = Column(Float, nullable=False) | 
 |   function = Column(String, nullable=False) | 
 |   function_args = Column(String, nullable=False) | 
 |  | 
 |   # 1:1 relation with MmFilemapAddToPageCache. | 
 |   mm_filemap_add_to_page_cache = relationship("MmFilemapAddToPageCache", | 
 |                                               back_populates="raw_ftrace_entry") | 
 |  | 
 |   @staticmethod | 
 |   def parse_dict(line): | 
 |     # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' | 
 |     m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) | 
 |     if not m: | 
 |       return None | 
 |  | 
 |     groups = m.groups() | 
 |     # groups example: | 
 |     # ('<...>', | 
 |     #  '5521', | 
 |     #  '-----', | 
 |     #  '003', | 
 |     #  '...1', | 
 |     #  '17148.446877', | 
 |     #  'tracing_mark_write', | 
 |     #  'trace_event_clock_sync: parent_ts=17148.447266') | 
 |     task_name = groups[0] | 
 |     if task_name == '<...>': | 
 |       task_name = None | 
 |  | 
 |     task_pid = int(groups[1]) | 
 |     tgid = groups[2] | 
 |     if tgid == '-----': | 
 |       tgid = None | 
 |  | 
 |     cpu = int(groups[3]) | 
 |     # irq_flags = groups[4] | 
 |     timestamp = float(groups[5]) | 
 |     function = groups[6] | 
 |     function_args = groups[7] | 
 |  | 
 |     return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args} | 
 |  | 
 | class SchedSwitch(Base): | 
 |   __tablename__ = 'sched_switches' | 
 |  | 
 |   id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) | 
 |  | 
 |   prev_comm = Column(String, nullable=False) | 
 |   prev_pid = Column(Integer, nullable=False) | 
 |   prev_prio = Column(Integer, nullable=False) | 
 |   prev_state = Column(String, nullable=False) | 
 |  | 
 |   next_comm = Column(String, nullable=False) | 
 |   next_pid = Column(Integer, nullable=False) | 
 |   next_prio = Column(Integer, nullable=False) | 
 |  | 
 |   @staticmethod | 
 |   def parse_dict(function_args, id = None): | 
 |     # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120' | 
 |     m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args) | 
 |     if not m: | 
 |       return None | 
 |  | 
 |     groups = m.groups() | 
 |     # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120') | 
 |     d = {} | 
 |     if id is not None: | 
 |       d['id'] = id | 
 |     d['prev_comm'] = groups[0] | 
 |     d['prev_pid'] = int(groups[1]) | 
 |     d['prev_prio'] = int(groups[2]) | 
 |     d['prev_state'] = groups[3] | 
 |     d['next_comm'] = groups[4] | 
 |     d['next_pid'] = int(groups[5]) | 
 |     d['next_prio'] = int(groups[6]) | 
 |  | 
 |     return d | 
 |  | 
 | class SchedBlockedReason(Base): | 
 |   __tablename__ = 'sched_blocked_reasons' | 
 |  | 
 |   id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) | 
 |  | 
 |   pid = Column(Integer, nullable=False) | 
 |   iowait = Column(Integer, nullable=False) | 
 |   caller = Column(String, nullable=False) | 
 |  | 
 |   @staticmethod | 
 |   def parse_dict(function_args, id = None): | 
 |     # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f' | 
 |     m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args) | 
 |     if not m: | 
 |       return None | 
 |  | 
 |     groups = m.groups() | 
 |     # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8') | 
 |     d = {} | 
 |     if id is not None: | 
 |       d['id'] = id | 
 |     d['pid'] = int(groups[0]) | 
 |     d['iowait'] = int(groups[1]) | 
 |     d['caller'] = groups[2] | 
 |  | 
 |     return d | 
 |  | 
 | class MmFilemapAddToPageCache(Base): | 
 |   __tablename__ = 'mm_filemap_add_to_page_caches' | 
 |  | 
 |   id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) | 
 |  | 
 |   dev = Column(Integer, nullable=False)        # decoded from ${major}:${minor} syntax. | 
 |   dev_major = Column(Integer, nullable=False)  # original ${major} value. | 
 |   dev_minor = Column(Integer, nullable=False)  # original ${minor} value. | 
 |  | 
 |   ino = Column(Integer, nullable=False)  # decoded from hex to base 10 | 
 |   page = Column(Integer, nullable=False) # decoded from hex to base 10 | 
 |  | 
 |   pfn = Column(Integer, nullable=False) | 
 |   ofs = Column(Integer, nullable=False) | 
 |  | 
 |   # 1:1 relation with RawFtraceEntry. | 
 |   raw_ftrace_entry = relationship("RawFtraceEntry", uselist=False) | 
 |  | 
 |   @staticmethod | 
 |   def parse_dict(function_args, id = None): | 
 |     # dev 253:6 ino b2c7 page=00000000ec787cd9 pfn=1478539 ofs=4096 | 
 |     m = re.match("dev (\d+):(\d+) ino ([0-9a-fA-F]+) page=([0-9a-fA-F]+) pfn=(\d+) ofs=(\d+)", function_args) | 
 |     if not m: | 
 |       return None | 
 |  | 
 |     groups = m.groups() | 
 |     # ('253', '6', 'b2c7', '00000000ec787cd9', '1478539', '4096') | 
 |     d = {} | 
 |     if id is not None: | 
 |       d['id'] = id | 
 |  | 
 |     device_major = d['dev_major'] = int(groups[0]) | 
 |     device_minor = d['dev_minor'] = int(groups[1]) | 
 |     d['dev'] = device_major << 8 | device_minor | 
 |     d['ino'] = int(groups[2], 16) | 
 |     d['page'] = int(groups[3], 16) | 
 |     d['pfn'] = int(groups[4]) | 
 |     d['ofs'] = int(groups[5]) | 
 |  | 
 |     return d | 
 |  | 
 | class Trace2Db: | 
 |   def __init__(self, db_filename: str): | 
 |     (s, e) = self._init_sqlalchemy(db_filename) | 
 |     self._session = s | 
 |     self._engine = e | 
 |     self._raw_ftrace_entry_filter = lambda x: True | 
 |  | 
 |   def set_raw_ftrace_entry_filter(self, flt): | 
 |     """ | 
 |     Install a function dict(RawFtraceEntry) -> bool | 
 |  | 
 |     If this returns 'false', then we skip adding the RawFtraceEntry to the database. | 
 |     """ | 
 |     self._raw_ftrace_entry_filter = flt | 
 |  | 
 |   @staticmethod | 
 |   def _init_sqlalchemy(db_filename: str) -> Tuple[object, object]: | 
 |     global _DEBUG | 
 |     engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG) | 
 |  | 
 |     # CREATE ... (tables) | 
 |     Base.metadata.create_all(engine) | 
 |  | 
 |     Session = sessionmaker(bind=engine) | 
 |     session = Session() | 
 |     return (session, engine) | 
 |  | 
 |   def parse_file_into_db(self, filename: str, limit: Optional[int] = None): | 
 |     """ | 
 |     Parse the ftrace/systrace at 'filename', | 
 |     inserting the values into the current sqlite database. | 
 |  | 
 |     :return: number of RawFtraceEntry inserted. | 
 |     """ | 
 |     return parse_file(filename, self._session, self._engine, self._raw_ftrace_entry_filter, limit) | 
 |  | 
 |   def parse_file_buf_into_db(self, file_buf, limit: Optional[int] = None): | 
 |     """ | 
 |     Parse the ftrace/systrace at 'filename', | 
 |     inserting the values into the current sqlite database. | 
 |  | 
 |     :return: number of RawFtraceEntry inserted. | 
 |     """ | 
 |     return parse_file_buf(file_buf, self._session, self._engine, self._raw_ftrace_entry_filter, limit) | 
 |  | 
 |  | 
 |   @property | 
 |   def session(self): | 
 |     return self._session | 
 |  | 
 | def insert_pending_entries(engine, kls, lst): | 
 |   if len(lst) > 0: | 
 |     # for some reason, it tries to generate an empty INSERT statement with len=0, | 
 |     # which of course violates the first non-null constraint. | 
 |     try: | 
 |       # Performance-sensitive parsing according to: | 
 |       # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow | 
 |       engine.execute(kls.__table__.insert(), lst) | 
 |       lst.clear() | 
 |     except sqlalchemy.exc.IntegrityError as err: | 
 |       # possibly violating some SQL constraint, print data here. | 
 |       print(err) | 
 |       print(lst) | 
 |       raise | 
 |  | 
 | def parse_file(filename: str, *args, **kwargs) -> int: | 
 |   # use explicit encoding to avoid UnicodeDecodeError. | 
 |   with open(filename, encoding="ISO-8859-1") as f: | 
 |     return parse_file_buf(f, *args, **kwargs) | 
 |  | 
 | def parse_file_buf(filebuf, session, engine, raw_ftrace_entry_filter, limit=None) -> int: | 
 |   global _FLUSH_LIMIT | 
 |   count = 0 | 
 |   # count and id are not equal, because count still increases for invalid lines. | 
 |   id = 0 | 
 |  | 
 |   pending_entries = [] | 
 |   pending_sched_switch = [] | 
 |   pending_sched_blocked_reasons = [] | 
 |   pending_mm_filemap_add_to_pagecaches = [] | 
 |  | 
 |   def insert_all_pending_entries(): | 
 |     insert_pending_entries(engine, RawFtraceEntry, pending_entries) | 
 |     insert_pending_entries(engine, SchedSwitch, pending_sched_switch) | 
 |     insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons) | 
 |     insert_pending_entries(engine, MmFilemapAddToPageCache, pending_mm_filemap_add_to_pagecaches) | 
 |  | 
 |   # for trace.html files produced by systrace, | 
 |   # the actual ftrace is in the 'second' trace-data script class. | 
 |   parsing_trace_data = 0 | 
 |   parsing_systrace_file = False | 
 |  | 
 |   f = filebuf | 
 |   for l in f: | 
 |     if parsing_trace_data == 0 and l == "<!DOCTYPE html>\n": | 
 |       parsing_systrace_file = True | 
 |       continue | 
 |     if parsing_trace_data != 2 and parsing_systrace_file: | 
 |       if l == '  <script class="trace-data" type="application/text">\n': | 
 |         parsing_trace_data = parsing_trace_data + 1 | 
 |       continue | 
 |  | 
 |     if parsing_systrace_file and parsing_trace_data != 2: | 
 |       continue | 
 |     elif parsing_systrace_file and parsing_trace_data == 2 and l == "  </script>\n": | 
 |       # the rest of this file is just random html | 
 |       break | 
 |  | 
 |     # now parsing the ftrace data. | 
 |     if len(l) > 1 and l[0] == '#': | 
 |       continue | 
 |  | 
 |     count = count + 1 | 
 |  | 
 |     if limit and count >= limit: | 
 |       break | 
 |  | 
 |     raw_ftrace_entry = RawFtraceEntry.parse_dict(l) | 
 |     if not raw_ftrace_entry: | 
 |       print("WARNING: Failed to parse raw ftrace entry: " + l) | 
 |       continue | 
 |  | 
 |     if not raw_ftrace_entry_filter(raw_ftrace_entry): | 
 |       # Skip processing raw ftrace entries that don't match a filter. | 
 |       # This is an optimization for when Trace2Db is used programatically | 
 |       # to avoid having an overly large database. | 
 |       continue | 
 |  | 
 |     pending_entries.append(raw_ftrace_entry) | 
 |     id = id + 1 | 
 |  | 
 |     if raw_ftrace_entry['function'] == 'sched_switch': | 
 |       sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], id) | 
 |  | 
 |       if not sched_switch: | 
 |         print("WARNING: Failed to parse sched_switch: " + l) | 
 |       else: | 
 |         pending_sched_switch.append(sched_switch) | 
 |  | 
 |     elif raw_ftrace_entry['function'] == 'sched_blocked_reason': | 
 |       sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], id) | 
 |  | 
 |       if not sbr: | 
 |         print("WARNING: Failed to parse sched_blocked_reason: " + l) | 
 |       else: | 
 |         pending_sched_blocked_reasons.append(sbr) | 
 |  | 
 |     elif raw_ftrace_entry['function'] == 'mm_filemap_add_to_page_cache': | 
 |       d = MmFilemapAddToPageCache.parse_dict(raw_ftrace_entry['function_args'], | 
 |                                              id) | 
 |       if not d: | 
 |         print("WARNING: Failed to parse mm_filemap_add_to_page_cache: " + l) | 
 |       else: | 
 |         pending_mm_filemap_add_to_pagecaches.append(d) | 
 |  | 
 |     # Objects are cached in python memory, not yet sent to SQL database. | 
 |  | 
 |     # Send INSERT/UPDATE/etc statements to the underlying SQL database. | 
 |     if count % _FLUSH_LIMIT == 0: | 
 |       insert_all_pending_entries() | 
 |  | 
 |   insert_all_pending_entries() | 
 |  | 
 |   # Ensure underlying database commits changes from memory to disk. | 
 |   session.commit() | 
 |  | 
 |   return count |