libs/utils/perf_analysis.py - platform/external/lisa - Git at Google

 # SPDX-License-Identifier: Apache-2.0
 #
 # Copyright (C) 2015, ARM Limited and contributors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 import glob
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt
 import numpy as np
 import os
 import pandas as pd
 import pylab as pl
 import re
 import sys
 import trappy
 import logging

 # Regexp to match an rt-app generated logfile
 TASK_NAME_RE = re.compile('.*\/rt-app-(.+)-[0-9]+.log')

 class PerfAnalysis(object):

     def __init__(self, datadir, tasks=None):

         # Dataframe of all tasks performance data
         self.perf_data = {}

         # Folder containing all rt-app data
         self.datadir = None

         # Setup logging
         self._log = logging.getLogger('PerfAnalysis')

         # Load performance data generated by rt-app workloads
         self.__loadRTAData(datadir, tasks)

         # Keep track of the datadir from where data have been loaded
         if len(self.perf_data) == 0:
             raise ValueError('No performance data found on folder [{0:s}]'\
                     .format(datadir))

         self.datadir = datadir

     def __taskNameFromLog(self, logfile):
         tname_match = re.search(TASK_NAME_RE, logfile)
         if tname_match is None:
             raise ValueError('The logfile [{0:s}] is not from rt-app'\
                     .format(logfile))
         return tname_match.group(1)

     def __logfileFromTaskName(self, taskname):
         for logfile in glob.glob(
                 '{0:s}/rt-app-{1:s}.log'.format(self.datadir, taskname)):
             return logfile
         raise ValueError('No rt-app logfile found for task [{0:s}]'\
                 .format(taskname))

     def tasks(self):
         """
         Return the list of tasks for which performance data have been loaded
         """
         if self.datadir is None:
             raise ValueError("rt-app performance data not (yet) loaded")
         return self.perf_data.keys()

     def logfile(self, task):
         """
         Return the logfile for the specified task
         """
         if task not in self.perf_data:
             raise ValueError('No logfile loaded for task [{0:s}]'\
                     .format(task))
         return self.perf_data[task]['logfile']

     def df(self, task):
         """
         Return the PANDAS dataframe with the performance data for the
         specified task
         """
         if self.datadir is None:
             raise ValueError("rt-app performance data not (yet) loaded")
         if task not in self.perf_data:
             raise ValueError('No dataframe loaded for task [{0:s}]'\
                     .format(task))
         return self.perf_data[task]['df']

     def __loadRTAData(self, datadir, tasks):
         """
         Load peformance data of an rt-app workload
         """

         if tasks is None:
             # Lookup for all rt-app logfile into the specified datadir
             for logfile in glob.glob('{0:s}/rt-app-*.log'.format(datadir)):
                 task_name = self.__taskNameFromLog(logfile)
                 self.perf_data[task_name] = {}
                 self.perf_data[task_name]['logfile'] = logfile
                 self._log.debug('Found rt-app logfile for task [%s]', task_name)
         else:
             # Lookup for specified rt-app task logfile into specified datadir
             for task in tasks:
                 logfile = self.__logfileFromTaskName(task)
                 self.perf_data[task_name] = {}
                 self.perf_data[task_name]['logfile'] = logfile
                 self._log.debug('Found rt-app logfile for task [%s]', task_name)

         # Load all the found logfile into a dataset
         for task in self.perf_data.keys():
             self._log.debug('Loading dataframe for task [%s]...', task)
             df = pd.read_table(self.logfile(task),
                     sep='\s+',
                     skiprows=1,
                     header=0,
                     usecols=[1,2,3,4,7,8,9,10],
                     names=[
                         'Cycles', 'Run' ,'Period', 'Timestamp',
                         'Slack', 'CRun', 'CPeriod', 'WKPLatency'
                     ])
             # Normalize time to [s] with origin on the first event
             start_time = df['Timestamp'][0]/1e6
             df['Time'] = df['Timestamp']/1e6 - start_time
             df.set_index(['Time'], inplace=True)
             # Add performance metrics column, performance is defined as:
             #             slack
             #   perf = -------------
             #          period - run
             df['PerfIndex'] = df['Slack'] / (df['CPeriod'] - df['CRun'])

             # Keep track of the loaded dataframe
             self.perf_data[task]['df'] = df

     def plotPerf(self, task, title=None):
         """
         Plot the Latency/Slack and Performance data for the specified task
         """
         # Grid
         gs = gridspec.GridSpec(2, 2, height_ratios=[4,1], width_ratios=[3,1]);
         gs.update(wspace=0.1, hspace=0.1);
         # Figure
         plt.figure(figsize=(16, 2*6));
         if title:
             plt.suptitle(title, y=.97, fontsize=16,
                     horizontalalignment='center');
         # Plot: Slack and Latency
         axes = plt.subplot(gs[0,0]);
         axes.set_title('Task [{0:s}] (start) Latency and (completion) Slack'\
                 .format(task));
         data = self.df(task)[['Slack', 'WKPLatency']]
         data.plot(ax=axes, drawstyle='steps-post', style=['r', 'g']);
         # axes.set_xlim(x_min, x_max);
         axes.xaxis.set_visible(False);
         # Plot: Performance
         axes = plt.subplot(gs[1,0]);
         axes.set_title('Task [{0:s}] Performance Index'.format(task));
         data = self.df(task)[['PerfIndex',]]
         data.plot(ax=axes, drawstyle='steps-post');
         axes.set_ylim(0, 2);
         # axes.set_xlim(x_min, x_max);
         # Plot: Slack Histogram
         axes = plt.subplot(gs[0:2,1]);
         data = self.df(task)[['PerfIndex',]]
         data.hist(bins=30, ax=axes, alpha=0.4);
         # axes.set_xlim(x_min, x_max);
         pindex_avg = data.mean()[0];
         pindex_std = data.std()[0];
         self._log.info('PerfIndex, Task [%s] avg: %.2f, std: %.2f',
                 task, pindex_avg, pindex_std)
         axes.axvline(pindex_avg, color='b', linestyle='--', linewidth=2);


         # Save generated plots into datadir
         figname = '{}/task_perf_{}.png'.format(self.datadir, task)
         pl.savefig(figname, bbox_inches='tight')
	# SPDX-License-Identifier: Apache-2.0
	#
	# Copyright (C) 2015, ARM Limited and contributors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	# not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	import glob
	import matplotlib.gridspec as gridspec
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import pandas as pd
	import pylab as pl
	import re
	import sys
	import trappy
	import logging

	# Regexp to match an rt-app generated logfile
	TASK_NAME_RE = re.compile('.*\/rt-app-(.+)-[0-9]+.log')

	class PerfAnalysis(object):

	def __init__(self, datadir, tasks=None):

	# Dataframe of all tasks performance data
	self.perf_data = {}

	# Folder containing all rt-app data
	self.datadir = None

	# Setup logging
	self._log = logging.getLogger('PerfAnalysis')

	# Load performance data generated by rt-app workloads
	self.__loadRTAData(datadir, tasks)

	# Keep track of the datadir from where data have been loaded
	if len(self.perf_data) == 0:
	raise ValueError('No performance data found on folder [{0:s}]'\
	.format(datadir))

	self.datadir = datadir

	def __taskNameFromLog(self, logfile):
	tname_match = re.search(TASK_NAME_RE, logfile)
	if tname_match is None:
	raise ValueError('The logfile [{0:s}] is not from rt-app'\
	.format(logfile))
	return tname_match.group(1)

	def __logfileFromTaskName(self, taskname):
	for logfile in glob.glob(
	'{0:s}/rt-app-{1:s}.log'.format(self.datadir, taskname)):
	return logfile
	raise ValueError('No rt-app logfile found for task [{0:s}]'\
	.format(taskname))

	def tasks(self):
	"""
	Return the list of tasks for which performance data have been loaded
	"""
	if self.datadir is None:
	raise ValueError("rt-app performance data not (yet) loaded")
	return self.perf_data.keys()

	def logfile(self, task):
	"""
	Return the logfile for the specified task
	"""
	if task not in self.perf_data:
	raise ValueError('No logfile loaded for task [{0:s}]'\
	.format(task))
	return self.perf_data[task]['logfile']

	def df(self, task):
	"""
	Return the PANDAS dataframe with the performance data for the
	specified task
	"""
	if self.datadir is None:
	raise ValueError("rt-app performance data not (yet) loaded")
	if task not in self.perf_data:
	raise ValueError('No dataframe loaded for task [{0:s}]'\
	.format(task))
	return self.perf_data[task]['df']

	def __loadRTAData(self, datadir, tasks):
	"""
	Load peformance data of an rt-app workload
	"""

	if tasks is None:
	# Lookup for all rt-app logfile into the specified datadir
	for logfile in glob.glob('{0:s}/rt-app-*.log'.format(datadir)):
	task_name = self.__taskNameFromLog(logfile)
	self.perf_data[task_name] = {}
	self.perf_data[task_name]['logfile'] = logfile
	self._log.debug('Found rt-app logfile for task [%s]', task_name)
	else:
	# Lookup for specified rt-app task logfile into specified datadir
	for task in tasks:
	logfile = self.__logfileFromTaskName(task)
	self.perf_data[task_name] = {}
	self.perf_data[task_name]['logfile'] = logfile
	self._log.debug('Found rt-app logfile for task [%s]', task_name)

	# Load all the found logfile into a dataset
	for task in self.perf_data.keys():
	self._log.debug('Loading dataframe for task [%s]...', task)
	df = pd.read_table(self.logfile(task),
	sep='\s+',
	skiprows=1,
	header=0,
	usecols=[1,2,3,4,7,8,9,10],
	names=[
	'Cycles', 'Run' ,'Period', 'Timestamp',
	'Slack', 'CRun', 'CPeriod', 'WKPLatency'
	])
	# Normalize time to [s] with origin on the first event
	start_time = df['Timestamp'][0]/1e6
	df['Time'] = df['Timestamp']/1e6 - start_time
	df.set_index(['Time'], inplace=True)
	# Add performance metrics column, performance is defined as:
	# slack
	# perf = -------------
	# period - run
	df['PerfIndex'] = df['Slack'] / (df['CPeriod'] - df['CRun'])

	# Keep track of the loaded dataframe
	self.perf_data[task]['df'] = df

	def plotPerf(self, task, title=None):
	"""
	Plot the Latency/Slack and Performance data for the specified task
	"""
	# Grid
	gs = gridspec.GridSpec(2, 2, height_ratios=[4,1], width_ratios=[3,1]);
	gs.update(wspace=0.1, hspace=0.1);
	# Figure
	plt.figure(figsize=(16, 2*6));
	if title:
	plt.suptitle(title, y=.97, fontsize=16,
	horizontalalignment='center');
	# Plot: Slack and Latency
	axes = plt.subplot(gs[0,0]);
	axes.set_title('Task [{0:s}] (start) Latency and (completion) Slack'\
	.format(task));
	data = self.df(task)[['Slack', 'WKPLatency']]
	data.plot(ax=axes, drawstyle='steps-post', style=['r', 'g']);
	# axes.set_xlim(x_min, x_max);
	axes.xaxis.set_visible(False);
	# Plot: Performance
	axes = plt.subplot(gs[1,0]);
	axes.set_title('Task [{0:s}] Performance Index'.format(task));
	data = self.df(task)[['PerfIndex',]]
	data.plot(ax=axes, drawstyle='steps-post');
	axes.set_ylim(0, 2);
	# axes.set_xlim(x_min, x_max);
	# Plot: Slack Histogram
	axes = plt.subplot(gs[0:2,1]);
	data = self.df(task)[['PerfIndex',]]
	data.hist(bins=30, ax=axes, alpha=0.4);
	# axes.set_xlim(x_min, x_max);
	pindex_avg = data.mean()[0];
	pindex_std = data.std()[0];
	self._log.info('PerfIndex, Task [%s] avg: %.2f, std: %.2f',
	task, pindex_avg, pindex_std)
	axes.axvline(pindex_avg, color='b', linestyle='--', linewidth=2);


	# Save generated plots into datadir
	figname = '{}/task_perf_{}.png'.format(self.datadir, task)
	pl.savefig(figname, bbox_inches='tight')