Merge "GDB extension script for Heap Processing"
diff --git a/gdb/heap_print/README.md b/gdb/heap_print/README.md
new file mode 100644
index 0000000..63a767c
--- /dev/null
+++ b/gdb/heap_print/README.md
@@ -0,0 +1,44 @@
+Script supports 2 custom commands:
+
+1) watch_heap : sets break point at dynamic memory allocation and keeps track of it
+2) print_ptr : prints the memory pointed by raw pointer in hex format.
+
+                eg:
+                (gdb) print_ptr malloc_ptr
+                Type : int *
+                Starting Address: 0x55555556aeb0
+                Length : 40
+                0x01 0x00 0x00 0x00 0x02 0x00 0x00 0x00 0x03 0x00 0x00 0x00
+                0x04 0x00 0x00 0x00 0x05 0x00 0x00 0x00 0x06 0x00 0x00 0x00
+                0x07 0x00 0x00 0x00 0x08 0x00 0x00 0x00 0x09 0x00 0x00 0x00
+                0x0a 0x00 0x00 0x00
+
+    If print_ptr is used after free/delete[], then it would print "No address mapping found!"
+
+
+Tests:
+
+To run the test
+
+In the heap_print dir,
+
+Compile :
+
+g++ -O0 -g test/sample_heap_test.cc -o test/sample_heap_test.o
+
+And then Run:
+
+$ gdb
+$ source test/test_heap_print_script.py
+
+
+Future Goals:
+
+To handle pointer offset, for eg, (gdb) print_ptr malloc_ptr + 3
+
+To handle custom allacator, may be watch_heap command could take in arguements
+and sets additional break points.
+
+
+
+
diff --git a/gdb/heap_print/heap_print_script.py b/gdb/heap_print/heap_print_script.py
new file mode 100644
index 0000000..f65d56b
--- /dev/null
+++ b/gdb/heap_print/heap_print_script.py
@@ -0,0 +1,239 @@
+import gdb
+
+
+def parse_address_to_int(address):
+    int_address_string = gdb.execute(
+        'p/d {}'.format(address), to_string=True)
+    int_address = int(int_address_string.split('=')[1].strip())
+    return int_address
+
+
+def parse_gdb_equals(str):
+    """
+    str is $1 = value. so it returns value
+    """
+    return str.split("=")[1].strip()
+
+
+class HeapMapping:
+    """
+    Wrapper class for dictionary to have customization for the dictionary
+    and one entry point
+    """
+
+    address_length_mapping = {}
+    address_set = set()
+
+    @staticmethod
+    def put(address, length):
+        HeapMapping.address_length_mapping[address] = length
+        HeapMapping.address_set.add(address)
+
+    @staticmethod
+    def get(address):
+        """
+        Gets the length of the dynamic array corresponding to address. Suppose dynamic
+        array is {1,2,3,4,5} and starting address is 400 which is passed as address to this
+        method, then method would return 20(i.e. 5 * sizeof(int)). When this address
+        is offsetted for eg 408 is passed to this method, then it will return remainder
+        number of bytes allocated, here it would be 12 (i.e. 420 - 408)
+        Algorithm tries to find address in address_length_apping, if it doesn't find it
+        then it tries to find the range that can fit the address. if it fails to find such
+        mapping then it would return None.
+        """
+
+        length_found = HeapMapping.address_length_mapping.get(address)
+        if length_found:
+            return length_found
+        else:
+            address_list = list(HeapMapping.address_set)
+            address_list.sort()
+            left = 0
+            right = len(address_list) - 1
+            while left <= right:
+                mid = int((left + right) / 2)
+                if address > address_list[mid]:
+                    left = mid + 1
+                # only < case would be accounted in else.
+                # As == would be handled in the if-check above (outside while)
+                else:
+                    right = mid - 1
+
+            index = left - 1
+            if index == -1:
+                return None
+            base_address = address_list[index]
+            base_len = HeapMapping.address_length_mapping.get(base_address)
+            if base_address + base_len > address:
+                return base_address + base_len - address
+            else:
+                return None
+
+    @staticmethod
+    def remove(address):
+        HeapMapping.address_length_mapping.pop(address, None)
+        HeapMapping.address_set.discard(address)
+
+
+class AllocationFinishedBreakpoint(gdb.FinishBreakpoint):
+    """
+    Sets temporary breakpoints on returns (specifically returns of memory allocations)
+    to record address allocated.
+    It get instantiated from AllocationBreakpoint and ReallocationBreakpoint. When it is
+    instantiated from ReallocationBreakPoint, it carries prev_address.
+    """
+
+    def __init__(self, length, prev_address=None):
+        super().__init__(internal=True)
+        self.length = length
+        self.prev_address = prev_address
+
+    def stop(self):
+        """
+        Called when the return address in the current frame is hit. It parses hex address
+        into int address. If return address is not null then it stores address and length
+        into the address_length_mapping dictionary.
+        """
+
+        return_address = self.return_value
+        if return_address is not None or return_address == 0x0:
+            if self.prev_address != None:
+                HeapMapping.remove(self.prev_address)
+
+            # Converting hex address to int address
+            int_address = parse_address_to_int(return_address)
+            HeapMapping.put(int_address, self.length)
+        return False
+
+
+class AllocationBreakpoint(gdb.Breakpoint):
+    """
+    Handler class when malloc and operator new[] gets hit
+    """
+
+    def __init__(self, spec):
+        super().__init__(spec, internal=True)
+
+    def stop(self):
+        # handle malloc and new
+        func_args_string = gdb.execute('info args', to_string=True)
+        if func_args_string.find("=") != -1:
+            # There will be just 1 argument to malloc. So no need to handle multiline
+            length = int(parse_gdb_equals(func_args_string))
+            AllocationFinishedBreakpoint(length)
+            return False
+
+
+class ReallocationBreakpoint(gdb.Breakpoint):
+    """
+    Handler class when realloc gets hit
+    """
+
+    def __init__(self, spec):
+        super().__init__(spec, internal=True)
+
+    def stop(self):
+        # handle realloc
+        func_args_string = gdb.execute('info args', to_string=True)
+        if func_args_string.find("=") != -1:
+            args = func_args_string.split("\n")
+            address = parse_gdb_equals(args[0])
+            int_address = parse_address_to_int(address)
+            length = int(parse_gdb_equals(args[1]))
+            AllocationFinishedBreakpoint(length, int_address)
+            return False
+
+
+class DeallocationBreakpoint(gdb.Breakpoint):
+    """
+    Handler class when free and operator delete[] gets hit
+    """
+
+    def __init__(self, spec):
+        super().__init__(spec, internal=True)
+
+    def stop(self):
+        func_args_string = gdb.execute('info args', to_string=True)
+        if func_args_string.find("=") != -1:
+            address = parse_gdb_equals(func_args_string)
+            int_address = parse_address_to_int(address)
+            HeapMapping.remove(int_address)
+        return False
+
+
+class WatchHeap(gdb.Command):
+    """
+    Custom Command to keep track of Heap Memory Allocation.
+    Currently keeps tracks of memory allocated/deallocated using
+    malloc, realloc, free, operator new[] and operator delete[]
+    """
+
+    def __init__(self):
+        super(WatchHeap, self).__init__("watch_heap", gdb.COMMAND_USER)
+
+    def complete(self, text, word):
+        return gdb.COMPLETE_COMMAND
+
+    def invoke(self, args, from_tty):
+        # TODO : Check whether break location methods are defined
+        AllocationBreakpoint("malloc")
+        AllocationBreakpoint("operator new[]")
+        ReallocationBreakpoint("realloc")
+        DeallocationBreakpoint("free")
+        DeallocationBreakpoint("operator delete[]")
+
+
+class PrintHeapPointer(gdb.Command):
+    """
+    Custom command to print memory allocated at dynamic time
+    """
+
+    def __init__(self):
+        super(PrintHeapPointer, self).__init__("print_ptr", gdb.COMMAND_USER)
+
+    def complete(self, text, word):
+        return gdb.COMPLETE_COMMAND
+
+    def invoke(self, args, from_tty=True):
+        try:
+            value = gdb.parse_and_eval(args)
+            if value.type.code == gdb.TYPE_CODE_PTR:
+                print("Type : ", value.type)
+                starting_address_string = gdb.execute(
+                    'p/x {}'.format(value), to_string=True)
+                print("Address: ",
+                      parse_gdb_equals(starting_address_string))
+                int_address = parse_address_to_int(value)
+                # print memory
+                self.print_heap(int_address)
+        except Exception:
+            print('No symbol found!')
+
+    def print_heap(self, address):
+        """
+        Prints the memory that is being pointed by address in hex format
+
+        Parameters
+        ---------
+        address : raw pointer
+        """
+
+        memory_size = HeapMapping.get(address)
+        if memory_size:
+            print('Length :', memory_size)
+            result = ''
+            i = 0
+            while i < memory_size:
+                byte_string = gdb.execute(
+                    'x/1bx {}'.format(address), to_string=True)
+                result += byte_string.split(':')[1].strip() + " "
+                address += 1
+                i += 1
+            print(result)
+        else:
+            print("No address mapping found!")
+
+
+if __name__ == '__main__':
+    WatchHeap()
+    PrintHeapPointer()
diff --git a/gdb/heap_print/test/sample_heap_test.cc b/gdb/heap_print/test/sample_heap_test.cc
new file mode 100644
index 0000000..783c471
--- /dev/null
+++ b/gdb/heap_print/test/sample_heap_test.cc
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <new>
+
+int main()
+{
+    int n = 10;
+
+    // Dynamically allocate memory using malloc()
+    int *malloc_ptr = (int *)malloc(n * sizeof(int));
+
+    if (malloc_ptr != NULL)
+    {
+        //Just Feeding data
+        for (int i = 0; i < n; ++i)
+        {
+            malloc_ptr[i] = i + 1;
+        }
+        //For checking realloc
+        int new_n = 20;
+        malloc_ptr = (int *)realloc(malloc_ptr, new_n * sizeof(int));
+        for (int i = 0; i < new_n; ++i)
+        {
+            malloc_ptr[i] = i + 1;
+        }
+        //For checking free
+        free(malloc_ptr);
+    }
+
+    // Dynamically allocating memory using operator new[]
+    int *new_ptr = new int[n];
+    if (new_ptr != NULL)
+    {
+        //Just feeding data
+        for (int i = 0; i < n; ++i)
+        {
+            new_ptr[i] = i + 1;
+        }
+
+        //For checking operator delete[]
+        delete[] new_ptr;
+    }
+    printf("Done");
+    return 0;
+}
\ No newline at end of file
diff --git a/gdb/heap_print/test/test_heap_print_script.py b/gdb/heap_print/test/test_heap_print_script.py
new file mode 100644
index 0000000..5518527
--- /dev/null
+++ b/gdb/heap_print/test/test_heap_print_script.py
@@ -0,0 +1,113 @@
+import unittest
+import gdb
+
+
+def get_n(n_str):
+    return int(n_str.split("=")[1].strip())
+
+
+class HeapMemoryTest(unittest.TestCase):
+
+    def setUp(self):
+        gdb.execute('set pagination on')
+        gdb.execute("file test/sample_heap_test.o")
+        gdb.execute("source heap_print_script.py")
+        gdb.execute("delete")
+        gdb.execute("watch_heap")
+
+    def check_memory(self, n, array_ptr_str, offset=1):
+        """
+        It is used to test what we got from 'print_ptr' is what we expect.
+        Sample test program allocates array of n int's using malloc and then
+        assigns 1 to n values to that array. So checking that malloc_ptr_str
+        is 1 to n, following big endian size and size of int as 32 bits
+
+        Parameters
+        ----------
+        n : int
+            array length
+        array_ptr_str : str
+            whole output from print_ptr command including memory content
+        offset : int
+            checking memory content starts from offset value. By default it is 1
+        """
+
+        data = array_ptr_str.split("\n")[3]
+        bytes_from_heap = data.split(" ")
+        actual_start = offset
+        for i in range(0, n * 4, 4):
+            hex_str = bytes_from_heap[i+3][2:]
+            hex_str += bytes_from_heap[i+2][2:]
+            hex_str += bytes_from_heap[i+1][2:]
+            hex_str += bytes_from_heap[i][2:]
+            int_of_hex = int(hex_str, 16)
+            self.assertEqual(actual_start, int_of_hex)
+            actual_start += 1
+
+    def test_malloc(self):
+        print("malloc test")
+        gdb.execute("b 20")
+        gdb.execute("r")
+        n_str = gdb.execute("print n", to_string=True)
+        n = get_n(n_str)
+        malloc_ptr_array_str = gdb.execute(
+            "print_ptr malloc_ptr", to_string=True)
+        print(malloc_ptr_array_str)
+        self.check_memory(n, malloc_ptr_array_str)
+        self.assertTrue(True)
+
+    def test_realloc(self):
+        print("realloc test")
+        gdb.execute("b 27")
+        gdb.execute("r")
+        new_n = gdb.execute("print new_n", to_string=True)
+        n = get_n(new_n)
+        malloc_ptr_str = gdb.execute("print_ptr malloc_ptr", to_string=True)
+        print(malloc_ptr_str)
+        self.check_memory(n, malloc_ptr_str)
+
+    def test_offset(self):
+        """
+        Testcase to test raw_pointers that are offset
+        """
+
+        print("offset test. we have array of 20 (80 bytes) and \
+                we offset it by 3, so new length should be 68")
+        offset = 3
+        gdb.execute("b 27")
+        gdb.execute("r")
+        new_n = gdb.execute("print new_n", to_string=True)
+        n = get_n(new_n)
+        malloc_ptr_str = gdb.execute(
+            "print_ptr malloc_ptr + {}".format(offset), to_string=True)
+        print(malloc_ptr_str)
+        self.check_memory(n - offset, malloc_ptr_str, offset+1)
+
+    def test_free(self):
+        print("free test")
+        gdb.execute("b 28")
+        gdb.execute("r")
+        malloc_ptr_str = gdb.execute("print_ptr malloc_ptr", to_string=True)
+        data = malloc_ptr_str.split("\n")[2].strip()
+        self.assertEqual(data, "No address mapping found!")
+
+    def test_new(self):
+        print("operator new[] test")
+        gdb.execute("b 41")
+        gdb.execute("r")
+        n_str = gdb.execute("print n", to_string=True)
+        n = get_n(n_str)
+        new_ptr_array_str = gdb.execute("print_ptr new_ptr", to_string=True)
+        self.check_memory(n, new_ptr_array_str)
+
+    def test_delete(self):
+        print("operator delete[]")
+        gdb.execute("b 42")
+        gdb.execute("r")
+        new_ptr_array_str = gdb.execute("print_ptr new_ptr", to_string=True)
+        data = new_ptr_array_str.split("\n")[2].strip()
+        self.assertEqual(data, "No address mapping found!")
+
+
+if __name__ == '__main__':
+    unittest.main()