trappy/utils.py - platform/external/trappy - Git at Google

 #    Copyright 2015-2017 ARM Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 import pandas as pd
 import numpy as np

 """Generic functions that can be used in multiple places in trappy
 """

 def listify(to_select):
     """Utitlity function to handle both single and
     list inputs
     """

     if not isinstance(to_select, list):
         to_select = [to_select]

     return to_select

 def handle_duplicate_index(data,
                            max_delta=0.000001):
     """Handle duplicate values in index

     :param data: The timeseries input
     :type data: :mod:`pandas.Series`

     :param max_delta: Maximum interval adjustment value that
         will be added to duplicate indices
     :type max_delta: float

     Consider the following case where a series needs to be reindexed
     to a new index (which can be required when different series need to
     be combined and compared):
     ::

         import pandas
         values = [0, 1, 2, 3, 4]
         index = [0.0, 1.0, 1.0, 6.0, 7.0]
         series = pandas.Series(values, index=index)
         new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]
         series.reindex(new_index)

     The above code fails with:
     ::

         ValueError: cannot reindex from a duplicate axis

     The function :func:`handle_duplicate_axis` changes the duplicate values
     to
     ::

         >>> import pandas
         >>> from trappy.utils import handle_duplicate_index

         >>> values = [0, 1, 2, 3, 4]
         index = [0.0, 1.0, 1.0, 6.0, 7.0]
         series = pandas.Series(values, index=index)
         series = handle_duplicate_index(series)
         print series.index.values
         >>> [ 0.        1.        1.000001  6.        7.      ]

     """

     index = data.index
     new_index = index.values

     dups = index.get_duplicates()

     for dup in dups:
         # Leave one of the values intact
         dup_index_left = index.searchsorted(dup, side="left")
         dup_index_right = index.searchsorted(dup, side="right") - 1
         num_dups = dup_index_right - dup_index_left + 1

         # Calculate delta that needs to be added to each duplicate
         # index
         try:
             delta = (index[dup_index_right + 1] - dup) / num_dups
         except IndexError:
             # dup_index_right + 1 is outside of the series (i.e. the
             # dup is at the end of the series).
             delta = max_delta

         # Clamp the maximum delta added to max_delta
         if delta > max_delta:
             delta = max_delta

         # Add a delta to the others
         dup_index_left += 1
         while dup_index_left <= dup_index_right:
             new_index[dup_index_left] += delta
             delta += delta
             dup_index_left += 1

     return data.reindex(new_index)

 # Iterate fast over all rows in a data frame and apply fn
 def apply_callback(df, fn, *kwargs):
     iters = df.itertuples()
     event_tuple = iters.next()

     # Column names beginning with underscore will not be preserved in tuples
     # due to constraints on namedtuple field names, so store mappings from
     # column name to column number for each trace event.
     col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }

     while True:
         if not event_tuple:
             break
         event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }

         if kwargs:
             fn(event_dict, kwargs)
         else:
             fn(event_dict)

         event_tuple = next(iters, None)


 def merge_dfs(pr_df, sec_df, pivot):
     # Keep track of last secondary event
     pivot_map = {}

     # An array accumating dicts with merged data
     merged_data = []
     def df_fn(data):
         # Store the latest secondary info
         if data['Time'][0] == 'secondary':
             pivot_map[data[pivot]] = data
             # Get rid of primary/secondary labels
             data['Time'] = data['Time'][1]
             return

         # Propogate latest secondary info
         for key, value in data.iteritems():
             if key == pivot:
                 continue
             # Fast check for if value is nan (faster than np.isnan + try/except)
             if value != value and pivot_map.has_key(data[pivot]):
                 data[key] = pivot_map[data[pivot]][key]

         # Get rid of primary/secondary labels
         data['Time'] = data['Time'][1]
         merged_data.append(data)

     df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line')
     apply_callback(df, df_fn)
     merged_df = pd.DataFrame.from_dict(merged_data)
     merged_df.set_index('Time', inplace=True)

     return merged_df
	# Copyright 2015-2017 ARM Limited
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	import pandas as pd
	import numpy as np

	"""Generic functions that can be used in multiple places in trappy
	"""

	def listify(to_select):
	"""Utitlity function to handle both single and
	list inputs
	"""

	if not isinstance(to_select, list):
	to_select = [to_select]

	return to_select

	def handle_duplicate_index(data,
	max_delta=0.000001):
	"""Handle duplicate values in index

	:param data: The timeseries input
	:type data: :mod:`pandas.Series`

	:param max_delta: Maximum interval adjustment value that
	will be added to duplicate indices
	:type max_delta: float

	Consider the following case where a series needs to be reindexed
	to a new index (which can be required when different series need to
	be combined and compared):
	::

	import pandas
	values = [0, 1, 2, 3, 4]
	index = [0.0, 1.0, 1.0, 6.0, 7.0]
	series = pandas.Series(values, index=index)
	new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]
	series.reindex(new_index)

	The above code fails with:
	::

	ValueError: cannot reindex from a duplicate axis

	The function :func:`handle_duplicate_axis` changes the duplicate values
	to
	::

	>>> import pandas
	>>> from trappy.utils import handle_duplicate_index

	>>> values = [0, 1, 2, 3, 4]
	index = [0.0, 1.0, 1.0, 6.0, 7.0]
	series = pandas.Series(values, index=index)
	series = handle_duplicate_index(series)
	print series.index.values
	>>> [ 0. 1. 1.000001 6. 7. ]

	"""

	index = data.index
	new_index = index.values

	dups = index.get_duplicates()

	for dup in dups:
	# Leave one of the values intact
	dup_index_left = index.searchsorted(dup, side="left")
	dup_index_right = index.searchsorted(dup, side="right") - 1
	num_dups = dup_index_right - dup_index_left + 1

	# Calculate delta that needs to be added to each duplicate
	# index
	try:
	delta = (index[dup_index_right + 1] - dup) / num_dups
	except IndexError:
	# dup_index_right + 1 is outside of the series (i.e. the
	# dup is at the end of the series).
	delta = max_delta

	# Clamp the maximum delta added to max_delta
	if delta > max_delta:
	delta = max_delta

	# Add a delta to the others
	dup_index_left += 1
	while dup_index_left <= dup_index_right:
	new_index[dup_index_left] += delta
	delta += delta
	dup_index_left += 1

	return data.reindex(new_index)

	# Iterate fast over all rows in a data frame and apply fn
	def apply_callback(df, fn, *kwargs):
	iters = df.itertuples()
	event_tuple = iters.next()

	# Column names beginning with underscore will not be preserved in tuples
	# due to constraints on namedtuple field names, so store mappings from
	# column name to column number for each trace event.
	col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }

	while True:
	if not event_tuple:
	break
	event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }

	if kwargs:
	fn(event_dict, kwargs)
	else:
	fn(event_dict)

	event_tuple = next(iters, None)


	def merge_dfs(pr_df, sec_df, pivot):
	# Keep track of last secondary event
	pivot_map = {}

	# An array accumating dicts with merged data
	merged_data = []
	def df_fn(data):
	# Store the latest secondary info
	if data['Time'][0] == 'secondary':
	pivot_map[data[pivot]] = data
	# Get rid of primary/secondary labels
	data['Time'] = data['Time'][1]
	return

	# Propogate latest secondary info
	for key, value in data.iteritems():
	if key == pivot:
	continue
	# Fast check for if value is nan (faster than np.isnan + try/except)
	if value != value and pivot_map.has_key(data[pivot]):
	data[key] = pivot_map[data[pivot]][key]

	# Get rid of primary/secondary labels
	data['Time'] = data['Time'][1]
	merged_data.append(data)

	df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line')
	apply_callback(df, df_fn)
	merged_df = pd.DataFrame.from_dict(merged_data)
	merged_df.set_index('Time', inplace=True)

	return merged_df