Merge "GDB extension script for Heap Processing"
diff --git a/gdb/heap_print/README.md b/gdb/heap_print/README.md
new file mode 100644
index 0000000..63a767c
--- /dev/null
+++ b/gdb/heap_print/README.md
@@ -0,0 +1,44 @@
+Script supports 2 custom commands:
+
+1) watch_heap : sets break point at dynamic memory allocation and keeps track of it
+2) print_ptr : prints the memory pointed by raw pointer in hex format.
+
+ eg:
+ (gdb) print_ptr malloc_ptr
+ Type : int *
+ Starting Address: 0x55555556aeb0
+ Length : 40
+ 0x01 0x00 0x00 0x00 0x02 0x00 0x00 0x00 0x03 0x00 0x00 0x00
+ 0x04 0x00 0x00 0x00 0x05 0x00 0x00 0x00 0x06 0x00 0x00 0x00
+ 0x07 0x00 0x00 0x00 0x08 0x00 0x00 0x00 0x09 0x00 0x00 0x00
+ 0x0a 0x00 0x00 0x00
+
+ If print_ptr is used after free/delete[], then it would print "No address mapping found!"
+
+
+Tests:
+
+To run the test
+
+In the heap_print dir,
+
+Compile :
+
+g++ -O0 -g test/sample_heap_test.cc -o test/sample_heap_test.o
+
+And then Run:
+
+$ gdb
+$ source test/test_heap_print_script.py
+
+
+Future Goals:
+
+To handle pointer offset, for eg, (gdb) print_ptr malloc_ptr + 3
+
+To handle custom allacator, may be watch_heap command could take in arguements
+and sets additional break points.
+
+
+
+
diff --git a/gdb/heap_print/heap_print_script.py b/gdb/heap_print/heap_print_script.py
new file mode 100644
index 0000000..f65d56b
--- /dev/null
+++ b/gdb/heap_print/heap_print_script.py
@@ -0,0 +1,239 @@
+import gdb
+
+
+def parse_address_to_int(address):
+ int_address_string = gdb.execute(
+ 'p/d {}'.format(address), to_string=True)
+ int_address = int(int_address_string.split('=')[1].strip())
+ return int_address
+
+
+def parse_gdb_equals(str):
+ """
+ str is $1 = value. so it returns value
+ """
+ return str.split("=")[1].strip()
+
+
+class HeapMapping:
+ """
+ Wrapper class for dictionary to have customization for the dictionary
+ and one entry point
+ """
+
+ address_length_mapping = {}
+ address_set = set()
+
+ @staticmethod
+ def put(address, length):
+ HeapMapping.address_length_mapping[address] = length
+ HeapMapping.address_set.add(address)
+
+ @staticmethod
+ def get(address):
+ """
+ Gets the length of the dynamic array corresponding to address. Suppose dynamic
+ array is {1,2,3,4,5} and starting address is 400 which is passed as address to this
+ method, then method would return 20(i.e. 5 * sizeof(int)). When this address
+ is offsetted for eg 408 is passed to this method, then it will return remainder
+ number of bytes allocated, here it would be 12 (i.e. 420 - 408)
+ Algorithm tries to find address in address_length_apping, if it doesn't find it
+ then it tries to find the range that can fit the address. if it fails to find such
+ mapping then it would return None.
+ """
+
+ length_found = HeapMapping.address_length_mapping.get(address)
+ if length_found:
+ return length_found
+ else:
+ address_list = list(HeapMapping.address_set)
+ address_list.sort()
+ left = 0
+ right = len(address_list) - 1
+ while left <= right:
+ mid = int((left + right) / 2)
+ if address > address_list[mid]:
+ left = mid + 1
+ # only < case would be accounted in else.
+ # As == would be handled in the if-check above (outside while)
+ else:
+ right = mid - 1
+
+ index = left - 1
+ if index == -1:
+ return None
+ base_address = address_list[index]
+ base_len = HeapMapping.address_length_mapping.get(base_address)
+ if base_address + base_len > address:
+ return base_address + base_len - address
+ else:
+ return None
+
+ @staticmethod
+ def remove(address):
+ HeapMapping.address_length_mapping.pop(address, None)
+ HeapMapping.address_set.discard(address)
+
+
+class AllocationFinishedBreakpoint(gdb.FinishBreakpoint):
+ """
+ Sets temporary breakpoints on returns (specifically returns of memory allocations)
+ to record address allocated.
+ It get instantiated from AllocationBreakpoint and ReallocationBreakpoint. When it is
+ instantiated from ReallocationBreakPoint, it carries prev_address.
+ """
+
+ def __init__(self, length, prev_address=None):
+ super().__init__(internal=True)
+ self.length = length
+ self.prev_address = prev_address
+
+ def stop(self):
+ """
+ Called when the return address in the current frame is hit. It parses hex address
+ into int address. If return address is not null then it stores address and length
+ into the address_length_mapping dictionary.
+ """
+
+ return_address = self.return_value
+ if return_address is not None or return_address == 0x0:
+ if self.prev_address != None:
+ HeapMapping.remove(self.prev_address)
+
+ # Converting hex address to int address
+ int_address = parse_address_to_int(return_address)
+ HeapMapping.put(int_address, self.length)
+ return False
+
+
+class AllocationBreakpoint(gdb.Breakpoint):
+ """
+ Handler class when malloc and operator new[] gets hit
+ """
+
+ def __init__(self, spec):
+ super().__init__(spec, internal=True)
+
+ def stop(self):
+ # handle malloc and new
+ func_args_string = gdb.execute('info args', to_string=True)
+ if func_args_string.find("=") != -1:
+ # There will be just 1 argument to malloc. So no need to handle multiline
+ length = int(parse_gdb_equals(func_args_string))
+ AllocationFinishedBreakpoint(length)
+ return False
+
+
+class ReallocationBreakpoint(gdb.Breakpoint):
+ """
+ Handler class when realloc gets hit
+ """
+
+ def __init__(self, spec):
+ super().__init__(spec, internal=True)
+
+ def stop(self):
+ # handle realloc
+ func_args_string = gdb.execute('info args', to_string=True)
+ if func_args_string.find("=") != -1:
+ args = func_args_string.split("\n")
+ address = parse_gdb_equals(args[0])
+ int_address = parse_address_to_int(address)
+ length = int(parse_gdb_equals(args[1]))
+ AllocationFinishedBreakpoint(length, int_address)
+ return False
+
+
+class DeallocationBreakpoint(gdb.Breakpoint):
+ """
+ Handler class when free and operator delete[] gets hit
+ """
+
+ def __init__(self, spec):
+ super().__init__(spec, internal=True)
+
+ def stop(self):
+ func_args_string = gdb.execute('info args', to_string=True)
+ if func_args_string.find("=") != -1:
+ address = parse_gdb_equals(func_args_string)
+ int_address = parse_address_to_int(address)
+ HeapMapping.remove(int_address)
+ return False
+
+
+class WatchHeap(gdb.Command):
+ """
+ Custom Command to keep track of Heap Memory Allocation.
+ Currently keeps tracks of memory allocated/deallocated using
+ malloc, realloc, free, operator new[] and operator delete[]
+ """
+
+ def __init__(self):
+ super(WatchHeap, self).__init__("watch_heap", gdb.COMMAND_USER)
+
+ def complete(self, text, word):
+ return gdb.COMPLETE_COMMAND
+
+ def invoke(self, args, from_tty):
+ # TODO : Check whether break location methods are defined
+ AllocationBreakpoint("malloc")
+ AllocationBreakpoint("operator new[]")
+ ReallocationBreakpoint("realloc")
+ DeallocationBreakpoint("free")
+ DeallocationBreakpoint("operator delete[]")
+
+
+class PrintHeapPointer(gdb.Command):
+ """
+ Custom command to print memory allocated at dynamic time
+ """
+
+ def __init__(self):
+ super(PrintHeapPointer, self).__init__("print_ptr", gdb.COMMAND_USER)
+
+ def complete(self, text, word):
+ return gdb.COMPLETE_COMMAND
+
+ def invoke(self, args, from_tty=True):
+ try:
+ value = gdb.parse_and_eval(args)
+ if value.type.code == gdb.TYPE_CODE_PTR:
+ print("Type : ", value.type)
+ starting_address_string = gdb.execute(
+ 'p/x {}'.format(value), to_string=True)
+ print("Address: ",
+ parse_gdb_equals(starting_address_string))
+ int_address = parse_address_to_int(value)
+ # print memory
+ self.print_heap(int_address)
+ except Exception:
+ print('No symbol found!')
+
+ def print_heap(self, address):
+ """
+ Prints the memory that is being pointed by address in hex format
+
+ Parameters
+ ---------
+ address : raw pointer
+ """
+
+ memory_size = HeapMapping.get(address)
+ if memory_size:
+ print('Length :', memory_size)
+ result = ''
+ i = 0
+ while i < memory_size:
+ byte_string = gdb.execute(
+ 'x/1bx {}'.format(address), to_string=True)
+ result += byte_string.split(':')[1].strip() + " "
+ address += 1
+ i += 1
+ print(result)
+ else:
+ print("No address mapping found!")
+
+
+if __name__ == '__main__':
+ WatchHeap()
+ PrintHeapPointer()
diff --git a/gdb/heap_print/test/sample_heap_test.cc b/gdb/heap_print/test/sample_heap_test.cc
new file mode 100644
index 0000000..783c471
--- /dev/null
+++ b/gdb/heap_print/test/sample_heap_test.cc
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <new>
+
+int main()
+{
+ int n = 10;
+
+ // Dynamically allocate memory using malloc()
+ int *malloc_ptr = (int *)malloc(n * sizeof(int));
+
+ if (malloc_ptr != NULL)
+ {
+ //Just Feeding data
+ for (int i = 0; i < n; ++i)
+ {
+ malloc_ptr[i] = i + 1;
+ }
+ //For checking realloc
+ int new_n = 20;
+ malloc_ptr = (int *)realloc(malloc_ptr, new_n * sizeof(int));
+ for (int i = 0; i < new_n; ++i)
+ {
+ malloc_ptr[i] = i + 1;
+ }
+ //For checking free
+ free(malloc_ptr);
+ }
+
+ // Dynamically allocating memory using operator new[]
+ int *new_ptr = new int[n];
+ if (new_ptr != NULL)
+ {
+ //Just feeding data
+ for (int i = 0; i < n; ++i)
+ {
+ new_ptr[i] = i + 1;
+ }
+
+ //For checking operator delete[]
+ delete[] new_ptr;
+ }
+ printf("Done");
+ return 0;
+}
\ No newline at end of file
diff --git a/gdb/heap_print/test/test_heap_print_script.py b/gdb/heap_print/test/test_heap_print_script.py
new file mode 100644
index 0000000..5518527
--- /dev/null
+++ b/gdb/heap_print/test/test_heap_print_script.py
@@ -0,0 +1,113 @@
+import unittest
+import gdb
+
+
+def get_n(n_str):
+ return int(n_str.split("=")[1].strip())
+
+
+class HeapMemoryTest(unittest.TestCase):
+
+ def setUp(self):
+ gdb.execute('set pagination on')
+ gdb.execute("file test/sample_heap_test.o")
+ gdb.execute("source heap_print_script.py")
+ gdb.execute("delete")
+ gdb.execute("watch_heap")
+
+ def check_memory(self, n, array_ptr_str, offset=1):
+ """
+ It is used to test what we got from 'print_ptr' is what we expect.
+ Sample test program allocates array of n int's using malloc and then
+ assigns 1 to n values to that array. So checking that malloc_ptr_str
+ is 1 to n, following big endian size and size of int as 32 bits
+
+ Parameters
+ ----------
+ n : int
+ array length
+ array_ptr_str : str
+ whole output from print_ptr command including memory content
+ offset : int
+ checking memory content starts from offset value. By default it is 1
+ """
+
+ data = array_ptr_str.split("\n")[3]
+ bytes_from_heap = data.split(" ")
+ actual_start = offset
+ for i in range(0, n * 4, 4):
+ hex_str = bytes_from_heap[i+3][2:]
+ hex_str += bytes_from_heap[i+2][2:]
+ hex_str += bytes_from_heap[i+1][2:]
+ hex_str += bytes_from_heap[i][2:]
+ int_of_hex = int(hex_str, 16)
+ self.assertEqual(actual_start, int_of_hex)
+ actual_start += 1
+
+ def test_malloc(self):
+ print("malloc test")
+ gdb.execute("b 20")
+ gdb.execute("r")
+ n_str = gdb.execute("print n", to_string=True)
+ n = get_n(n_str)
+ malloc_ptr_array_str = gdb.execute(
+ "print_ptr malloc_ptr", to_string=True)
+ print(malloc_ptr_array_str)
+ self.check_memory(n, malloc_ptr_array_str)
+ self.assertTrue(True)
+
+ def test_realloc(self):
+ print("realloc test")
+ gdb.execute("b 27")
+ gdb.execute("r")
+ new_n = gdb.execute("print new_n", to_string=True)
+ n = get_n(new_n)
+ malloc_ptr_str = gdb.execute("print_ptr malloc_ptr", to_string=True)
+ print(malloc_ptr_str)
+ self.check_memory(n, malloc_ptr_str)
+
+ def test_offset(self):
+ """
+ Testcase to test raw_pointers that are offset
+ """
+
+ print("offset test. we have array of 20 (80 bytes) and \
+ we offset it by 3, so new length should be 68")
+ offset = 3
+ gdb.execute("b 27")
+ gdb.execute("r")
+ new_n = gdb.execute("print new_n", to_string=True)
+ n = get_n(new_n)
+ malloc_ptr_str = gdb.execute(
+ "print_ptr malloc_ptr + {}".format(offset), to_string=True)
+ print(malloc_ptr_str)
+ self.check_memory(n - offset, malloc_ptr_str, offset+1)
+
+ def test_free(self):
+ print("free test")
+ gdb.execute("b 28")
+ gdb.execute("r")
+ malloc_ptr_str = gdb.execute("print_ptr malloc_ptr", to_string=True)
+ data = malloc_ptr_str.split("\n")[2].strip()
+ self.assertEqual(data, "No address mapping found!")
+
+ def test_new(self):
+ print("operator new[] test")
+ gdb.execute("b 41")
+ gdb.execute("r")
+ n_str = gdb.execute("print n", to_string=True)
+ n = get_n(n_str)
+ new_ptr_array_str = gdb.execute("print_ptr new_ptr", to_string=True)
+ self.check_memory(n, new_ptr_array_str)
+
+ def test_delete(self):
+ print("operator delete[]")
+ gdb.execute("b 42")
+ gdb.execute("r")
+ new_ptr_array_str = gdb.execute("print_ptr new_ptr", to_string=True)
+ data = new_ptr_array_str.split("\n")[2].strip()
+ self.assertEqual(data, "No address mapping found!")
+
+
+if __name__ == '__main__':
+ unittest.main()