Implement VG_(arena_realloc_shrink) similar to realloc, but can
only decrease the size of a block, does not change the address,
does not need to alloc another block and copy the memory,
and (if big enough) makes the excess memory available for other
allocations.

VG_(arena_realloc_shrink) is then used for debuginfo storage.c
(replacing an allocation + copy).
Also use it in the dedup pool, to recuperate the unused
memory of the last pool.
This also allows to re-increase the string pool size to the original
3.9.0 value of 64Kb. All this slightly decrease the peak and in use
memory of dinfo.

VG_(arena_realloc_shrink) will also be used to implement (in another patch)
a dedup pool which "numbers" the allocated elements.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14122 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_debuginfo/misc.c b/coregrind/m_debuginfo/misc.c
index b3f1349..d4825a4 100644
--- a/coregrind/m_debuginfo/misc.c
+++ b/coregrind/m_debuginfo/misc.c
@@ -51,6 +51,10 @@
    return v;
 }
 
+void  ML_(dinfo_shrink_block)( void* ptr, SizeT szB ) {
+   VG_(arena_realloc_shrink)( VG_AR_DINFO, ptr, szB );
+}
+
 void ML_(dinfo_free) ( void* v ) {
    VG_(arena_free)( VG_AR_DINFO, v );
 }
diff --git a/coregrind/m_debuginfo/priv_misc.h b/coregrind/m_debuginfo/priv_misc.h
index c6b628a..53998b2 100644
--- a/coregrind/m_debuginfo/priv_misc.h
+++ b/coregrind/m_debuginfo/priv_misc.h
@@ -38,11 +38,12 @@
 
 #include "pub_core_basics.h"    // SizeT
 
-/* Allocate(zeroed), free, strdup, memdup, all in VG_AR_DINFO. */
+/* Allocate(zeroed), free, strdup, memdup, shrink, all in VG_AR_DINFO. */
 void*  ML_(dinfo_zalloc)( const HChar* cc, SizeT szB );
 void   ML_(dinfo_free)( void* v );
 HChar* ML_(dinfo_strdup)( const HChar* cc, const HChar* str );
 void*  ML_(dinfo_memdup)( const HChar* cc, void* str, SizeT nStr );
+void   ML_(dinfo_shrink_block)( void* ptr, SizeT szB );
 
 /* Extract (possibly unaligned) data of various sizes from a buffer. */
 Short ML_(read_Short)( UChar* data );
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index 951097d..cdd0158 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -537,7 +537,7 @@
 
 
 /* To do with the string table in struct _DebugInfo (::strpool) */
-#define SEGINFO_STRPOOLSIZE (16*1024)
+#define SEGINFO_STRPOOLSIZE (64*1024)
 
 
 /* We may encounter more than one .eh_frame section in an object --
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
index b0a4e1c..f01f41e 100644
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -331,21 +331,13 @@
 
 /* Resize the LocTab (line number table) to save memory, by removing
    (and, potentially, allowing m_mallocfree to unmap) any unused space
-   at the end of the table.
-*/
+   at the end of the table. */
 static void shrinkLocTab ( struct _DebugInfo* di )
 {
-   DiLoc* new_tab;
    UWord new_sz = di->loctab_used;
    if (new_sz == di->loctab_size) return;
    vg_assert(new_sz < di->loctab_size);
-
-   new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLocTab", 
-                                new_sz * sizeof(DiLoc) );
-   VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc));
-
-   ML_(dinfo_free)(di->loctab);
-   di->loctab = new_tab;
+   ML_(dinfo_shrink_block)( di->loctab, new_sz * sizeof(DiLoc));
    di->loctab_size = new_sz;
 }
 
@@ -484,21 +476,13 @@
 
 /* Resize the InlTab (inlined call table) to save memory, by removing
    (and, potentially, allowing m_mallocfree to unmap) any unused space
-   at the end of the table.
-*/
+   at the end of the table. */
 static void shrinkInlTab ( struct _DebugInfo* di )
 {
-   DiInlLoc* new_tab;
    UWord new_sz = di->inltab_used;
    if (new_sz == di->inltab_size) return;
    vg_assert(new_sz < di->inltab_size);
-
-   new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkInlTab", 
-                                new_sz * sizeof(DiInlLoc) );
-   VG_(memcpy)(new_tab, di->inltab, new_sz * sizeof(DiInlLoc));
-
-   ML_(dinfo_free)(di->inltab);
-   di->inltab = new_tab;
+   ML_(dinfo_shrink_block)( di->inltab, new_sz * sizeof(DiInlLoc));
    di->inltab_size = new_sz;
 }
 
@@ -1967,7 +1951,7 @@
    ML_(canonicaliseCFI) ( di );
    canonicaliseVarInfo ( di );
    if (di->strpool)
-      VG_(freezeDedupPA) (di->strpool);
+      VG_(freezeDedupPA) (di->strpool, ML_(dinfo_shrink_block));
 }
 
 
diff --git a/coregrind/m_deduppoolalloc.c b/coregrind/m_deduppoolalloc.c
index 5eb6feb..bb77fa8 100644
--- a/coregrind/m_deduppoolalloc.c
+++ b/coregrind/m_deduppoolalloc.c
@@ -113,7 +113,7 @@
 {
    Word i;
    if (ddpa->ht_elements)
-      VG_(freezeDedupPA) (ddpa); // Free data structures used for insertion.
+      VG_(freezeDedupPA) (ddpa, NULL); // Free data structures used for insertion.
    for (i = 0; i < VG_(sizeXA) (ddpa->pools); i++)
       ddpa->free (*(UWord **)VG_(indexXA) ( ddpa->pools, i ));
    VG_(deleteXA) (ddpa->pools);
@@ -176,12 +176,19 @@
 {
 }
 
-void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa)
+void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa,
+                         void (*shrink_block)(void*, SizeT))
 {
    if (VG_(clo_stats) 
        && (VG_(clo_verbosity) > 2 || VG_(debugLog_getLevel) () >= 2)) {
       print_stats(ddpa);
    }
+   if (shrink_block && ddpa->curpool_limit > ddpa->curpool_free) {
+      UChar *last_added_pool = 
+         (*(UChar **)VG_(indexXA) ( ddpa->pools, 
+                                    VG_(sizeXA)(ddpa->pools) - 1));
+      (*shrink_block)(last_added_pool, ddpa->curpool_free - last_added_pool);
+   }
    VG_(HT_destruct) ( ddpa->ht_elements, htelem_dummyfree);
    ddpa->ht_elements = NULL;
    VG_(deletePA) (ddpa->ht_node_pa);
diff --git a/coregrind/m_mallocfree.c b/coregrind/m_mallocfree.c
index 2325a52..5e8e459 100644
--- a/coregrind/m_mallocfree.c
+++ b/coregrind/m_mallocfree.c
@@ -487,6 +487,44 @@
    return b2[get_bszB(b) - sizeof(SizeT) - rz_byteno - 1];
 }
 
+#if defined(ENABLE_INNER_CLIENT_REQUEST)
+/* When running as an inner, the block headers before and after
+   (see 'Layout of an in-use block:' above) are made non accessible
+   by VALGRIND_MALLOCLIKE_BLOCK/VALGRIND_FREELIKE_BLOCK
+   to allow the outer to detect block overrun.
+   The below two functions are used when these headers must be
+   temporarily accessed. */
+static void mkBhdrAccess( Arena* a, Block* b )
+{
+   VALGRIND_MAKE_MEM_DEFINED (b,
+                              hp_overhead_szB() + sizeof(SizeT) + a->rz_szB);
+   VALGRIND_MAKE_MEM_DEFINED (b + get_bszB(b) - a->rz_szB - sizeof(SizeT),
+                              a->rz_szB + sizeof(SizeT));
+}
+
+/* Mark block hdr as not accessible.
+   !!! Currently, we do not mark the cost center and szB fields unaccessible
+   as these are accessed at too many places. */
+static void mkBhdrNoAccess( Arena* a, Block* b )
+{
+   VALGRIND_MAKE_MEM_NOACCESS (b + hp_overhead_szB() + sizeof(SizeT),
+                               a->rz_szB);
+   VALGRIND_MAKE_MEM_NOACCESS (b + get_bszB(b) - sizeof(SizeT) - a->rz_szB,
+                               a->rz_szB);
+}
+
+/* Make the cc+szB fields accessible. */
+static void mkBhdrSzAccess( Arena* a, Block* b )
+{
+   VALGRIND_MAKE_MEM_DEFINED (b,
+                              hp_overhead_szB() + sizeof(SizeT));
+   /* We cannot use  get_bszB(b), as this reads the 'hi' szB we want
+      to mark accessible. So, we only access the 'lo' szB. */
+   SizeT bszB_lo = mk_plain_bszB(*(SizeT*)&b[0 + hp_overhead_szB()]);
+   VALGRIND_MAKE_MEM_DEFINED (b + bszB_lo - sizeof(SizeT),
+                              sizeof(SizeT));
+}
+#endif
 
 /*------------------------------------------------------------*/
 /*--- Arena management                                     ---*/
@@ -1130,11 +1168,7 @@
    // to get_rz_hi_byte().
    if (!a->clientmem && is_inuse_block(b)) {
       // In the inner, for memcheck sake, temporarily mark redzone accessible.
-      INNER_REQUEST(VALGRIND_MAKE_MEM_DEFINED
-                    (b + hp_overhead_szB() + sizeof(SizeT), a->rz_szB));
-      INNER_REQUEST(VALGRIND_MAKE_MEM_DEFINED
-                    (b + get_bszB(b)
-                     - sizeof(SizeT) - a->rz_szB, a->rz_szB));
+      INNER_REQUEST(mkBhdrAccess(a,b));
       for (i = 0; i < a->rz_szB; i++) {
          if (get_rz_lo_byte(b, i) != 
             (UByte)(((Addr)b&0xff) ^ REDZONE_LO_MASK))
@@ -1142,17 +1176,44 @@
          if (get_rz_hi_byte(b, i) != 
             (UByte)(((Addr)b&0xff) ^ REDZONE_HI_MASK))
                {BLEAT("redzone-hi");return False;}
-      }      
-      INNER_REQUEST(VALGRIND_MAKE_MEM_NOACCESS
-                    (b + hp_overhead_szB() + sizeof(SizeT), a->rz_szB));
-      INNER_REQUEST(VALGRIND_MAKE_MEM_NOACCESS
-                    (b + get_bszB(b)
-                     - sizeof(SizeT) - a->rz_szB, a->rz_szB));
+      }
+      INNER_REQUEST(mkBhdrNoAccess(a,b));
    }
    return True;
 #  undef BLEAT
 }
 
+// Sanity checks on a Block inside an unsplittable superblock
+static 
+Bool unsplittableBlockSane ( Arena* a, Superblock *sb, Block* b )
+{
+#  define BLEAT(str) VG_(printf)("unsplittableBlockSane: fail -- %s\n",str)
+   Block*      other_b;
+   UByte* sb_start;
+   UByte* sb_end;
+
+   if (!blockSane (a, b))
+      {BLEAT("blockSane");return False;}
+   
+   if (sb->unsplittable != sb)
+      {BLEAT("unsplittable");return False;}
+
+   sb_start = &sb->payload_bytes[0];
+   sb_end   = &sb->payload_bytes[sb->n_payload_bytes - 1];
+
+   // b must be first block (i.e. no unused bytes at the beginning)
+   if ((Block*)sb_start != b)
+      {BLEAT("sb_start");return False;}
+
+   // b must be last block (i.e. no unused bytes at the end)
+   other_b = b + get_bszB(b);
+   if (other_b-1 != (Block*)sb_end)
+      {BLEAT("sb_end");return False;}
+   
+   return True;
+#  undef BLEAT
+}
+
 // Print superblocks (only for debugging).
 static 
 void ppSuperblocks ( Arena* a )
@@ -1248,7 +1309,7 @@
 #     ifdef VERBOSE_MALLOC
       VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %lu, "
                    "arena_bytes_on_loan %lu: "
-                   "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
+                   "MISMATCH\n", a->stats__bytes_on_loan, arena_bytes_on_loan);
 #     endif
       ppSuperblocks(a);
       BOMB;
@@ -1378,12 +1439,15 @@
 
          if (thisFree) continue;
 
+         if (VG_(clo_profile_heap))
+            cc = get_cc(b);
+         else
+            cc = "(--profile-heap=yes for details)";
          if (0)
          VG_(printf)("block: inUse=%d pszB=%d cc=%s\n", 
                      (Int)(!thisFree), 
                      (Int)bszB_to_pszB(a, b_bszB),
                      get_cc(b));
-         cc = get_cc(b);
          tl_assert(cc);
          for (k = 0; k < n_ccs; k++) {
            tl_assert(anCCs[k].cc);
@@ -1534,6 +1598,28 @@
 #  endif
 }
 
+// Mark the bytes at b .. b+bszB-1 as being part of a block that has been shrunk.
+static
+void shrinkInuseBlock ( Arena* a, Block* b, SizeT bszB )
+{
+   UInt i;
+
+   vg_assert(bszB >= min_useful_bszB(a));
+   INNER_REQUEST(mkBhdrAccess(a,b));
+   set_bszB(b, mk_inuse_bszB(bszB));
+   if (!a->clientmem) {
+      for (i = 0; i < a->rz_szB; i++) {
+         set_rz_lo_byte(b, i, (UByte)(((Addr)b&0xff) ^ REDZONE_LO_MASK));
+         set_rz_hi_byte(b, i, (UByte)(((Addr)b&0xff) ^ REDZONE_HI_MASK));
+      }
+   }
+   INNER_REQUEST(mkBhdrNoAccess(a,b));
+   
+#  ifdef DEBUG_MALLOC
+   (void)blockSane(a,b);
+#  endif
+}
+
 // Remove a block from a given list.  Does no sanity checking.
 static
 void unlinkBlock ( Arena* a, Block* b, UInt listno )
@@ -1857,15 +1943,89 @@
    a->deferred_reclaimed_sb = sb;
 }
 
+/* b must be a free block, of size b_bszB.
+   If b is followed by another free block, merge them.
+   If b is preceeded by another free block, merge them.
+   If the merge results in the superblock being fully free,
+   deferred_reclaimSuperblock the superblock. */
+static void mergeWithFreeNeighbours (Arena* a, Superblock* sb,
+                                     Block* b, SizeT b_bszB)
+{
+   UByte*      sb_start;
+   UByte*      sb_end;
+   Block*      other_b;
+   SizeT       other_bszB;
+   UInt        b_listno;
+
+   sb_start = &sb->payload_bytes[0];
+   sb_end   = &sb->payload_bytes[sb->n_payload_bytes - 1];
+
+   b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
+
+   // See if this block can be merged with its successor.
+   // First test if we're far enough before the superblock's end to possibly
+   // have a successor.
+   other_b = b + b_bszB;
+   if (other_b+min_useful_bszB(a)-1 <= (Block*)sb_end) {
+      // Ok, we have a successor, merge if it's not in use.
+      other_bszB = get_bszB(other_b);
+      if (!is_inuse_block(other_b)) {
+         // VG_(printf)( "merge-successor\n");
+#        ifdef DEBUG_MALLOC
+         vg_assert(blockSane(a, other_b));
+#        endif
+         unlinkBlock( a, b, b_listno );
+         unlinkBlock( a, other_b,
+                      pszB_to_listNo(bszB_to_pszB(a,other_bszB)) );
+         b_bszB += other_bszB;
+         b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
+         mkFreeBlock( a, b, b_bszB, b_listno );
+         if (VG_(clo_profile_heap))
+            set_cc(b, "admin.free-2");
+      }
+   } else {
+      // Not enough space for successor: check that b is the last block
+      // ie. there are no unused bytes at the end of the Superblock.
+      vg_assert(other_b-1 == (Block*)sb_end);
+   }
+
+   // Then see if this block can be merged with its predecessor.
+   // First test if we're far enough after the superblock's start to possibly
+   // have a predecessor.
+   if (b >= (Block*)sb_start + min_useful_bszB(a)) {
+      // Ok, we have a predecessor, merge if it's not in use.
+      other_b = get_predecessor_block( b );
+      other_bszB = get_bszB(other_b);
+      if (!is_inuse_block(other_b)) {
+         // VG_(printf)( "merge-predecessor\n");
+         unlinkBlock( a, b, b_listno );
+         unlinkBlock( a, other_b,
+                      pszB_to_listNo(bszB_to_pszB(a, other_bszB)) );
+         b = other_b;
+         b_bszB += other_bszB;
+         b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
+         mkFreeBlock( a, b, b_bszB, b_listno );
+         if (VG_(clo_profile_heap))
+            set_cc(b, "admin.free-3");
+      }
+   } else {
+      // Not enough space for predecessor: check that b is the first block,
+      // ie. there are no unused bytes at the start of the Superblock.
+      vg_assert((Block*)sb_start == b);
+   }
+
+   /* If the block b just merged is the only block of the superblock sb,
+      then we defer reclaim sb. */
+   if ( ((Block*)sb_start == b) && (b + b_bszB-1 == (Block*)sb_end) ) {
+      deferred_reclaimSuperblock (a, sb);
+   }
+}
  
 void VG_(arena_free) ( ArenaId aid, void* ptr )
 {
    Superblock* sb;
-   UByte*      sb_start;
-   UByte*      sb_end;
-   Block*      other_b;
    Block*      b;
-   SizeT       b_bszB, b_pszB, other_bszB;
+   SizeT       b_bszB, b_pszB;
    UInt        b_listno;
    Arena*      a;
 
@@ -1886,8 +2046,6 @@
    b_bszB   = get_bszB(b);
    b_pszB   = bszB_to_pszB(a, b_bszB);
    sb       = findSb( a, b );
-   sb_start = &sb->payload_bytes[0];
-   sb_end   = &sb->payload_bytes[sb->n_payload_bytes - 1];
 
    a->stats__bytes_on_loan -= b_pszB;
 
@@ -1907,63 +2065,8 @@
       if (VG_(clo_profile_heap))
          set_cc(b, "admin.free-1");
 
-      // See if this block can be merged with its successor.
-      // First test if we're far enough before the superblock's end to possibly
-      // have a successor.
-      other_b = b + b_bszB;
-      if (other_b+min_useful_bszB(a)-1 <= (Block*)sb_end) {
-         // Ok, we have a successor, merge if it's not in use.
-         other_bszB = get_bszB(other_b);
-         if (!is_inuse_block(other_b)) {
-            // VG_(printf)( "merge-successor\n");
-#           ifdef DEBUG_MALLOC
-            vg_assert(blockSane(a, other_b));
-#           endif
-            unlinkBlock( a, b, b_listno );
-            unlinkBlock( a, other_b,
-                         pszB_to_listNo(bszB_to_pszB(a,other_bszB)) );
-            b_bszB += other_bszB;
-            b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
-            mkFreeBlock( a, b, b_bszB, b_listno );
-            if (VG_(clo_profile_heap))
-               set_cc(b, "admin.free-2");
-         }
-      } else {
-         // Not enough space for successor: check that b is the last block
-         // ie. there are no unused bytes at the end of the Superblock.
-         vg_assert(other_b-1 == (Block*)sb_end);
-      }
-
-      // Then see if this block can be merged with its predecessor.
-      // First test if we're far enough after the superblock's start to possibly
-      // have a predecessor.
-      if (b >= (Block*)sb_start + min_useful_bszB(a)) {
-         // Ok, we have a predecessor, merge if it's not in use.
-         other_b = get_predecessor_block( b );
-         other_bszB = get_bszB(other_b);
-         if (!is_inuse_block(other_b)) {
-            // VG_(printf)( "merge-predecessor\n");
-            unlinkBlock( a, b, b_listno );
-            unlinkBlock( a, other_b,
-                         pszB_to_listNo(bszB_to_pszB(a, other_bszB)) );
-            b = other_b;
-            b_bszB += other_bszB;
-            b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
-            mkFreeBlock( a, b, b_bszB, b_listno );
-            if (VG_(clo_profile_heap))
-               set_cc(b, "admin.free-3");
-         }
-      } else {
-         // Not enough space for predecessor: check that b is the first block,
-         // ie. there are no unused bytes at the start of the Superblock.
-         vg_assert((Block*)sb_start == b);
-      }
-
-      /* If the block b just merged is the only block of the superblock sb,
-         then we defer reclaim sb. */
-      if ( ((Block*)sb_start == b) && (b + b_bszB-1 == (Block*)sb_end) ) {
-         deferred_reclaimSuperblock (a, sb);
-      }
+      /* Possibly merge b with its predecessor or successor. */
+      mergeWithFreeNeighbours (a, sb, b, b_bszB);
 
       // Inform that ptr has been released. We give redzone size 
       // 0 instead of a->rz_szB as proper accessibility is done just after.
@@ -1991,12 +2094,7 @@
                                               - sizeof(SizeT) - sizeof(void*),
                                               sizeof(SizeT) + sizeof(void*)));
    } else {
-      // b must be first block (i.e. no unused bytes at the beginning)
-      vg_assert((Block*)sb_start == b);
-
-      // b must be last block (i.e. no unused bytes at the end)
-      other_b = b + b_bszB;
-      vg_assert(other_b-1 == (Block*)sb_end);
+      vg_assert(unsplittableBlockSane(a, sb, b));
 
       // Inform that ptr has been released. Redzone size value
       // is not relevant (so we give  0 instead of a->rz_szB)
@@ -2310,6 +2408,113 @@
 }
 
 
+void VG_(arena_realloc_shrink) ( ArenaId aid,
+                                 void* ptr, SizeT req_pszB )
+{
+   SizeT  req_bszB, frag_bszB, b_bszB;
+   Superblock* sb;
+   Arena* a;
+   SizeT  old_pszB;
+   Block* b;
+
+   ensure_mm_init(aid);
+
+   a = arenaId_to_ArenaP(aid);
+   b = get_payload_block(a, ptr);
+   vg_assert(blockSane(a, b));
+   vg_assert(is_inuse_block(b));
+
+   old_pszB = get_pszB(a, b);
+   req_pszB = align_req_pszB(req_pszB);
+   vg_assert(old_pszB >= req_pszB);
+   if (old_pszB == req_pszB)
+      return;
+
+   sb = findSb( a, b );
+   if (sb->unsplittable) {
+      const UByte* sb_start = &sb->payload_bytes[0];
+      const UByte* sb_end = &sb->payload_bytes[sb->n_payload_bytes - 1];
+      Addr  frag;
+
+      vg_assert(unsplittableBlockSane(a, sb, b));
+
+      frag = VG_PGROUNDUP((Addr) sb 
+                          + sizeof(Superblock) + pszB_to_bszB(a, req_pszB));
+      frag_bszB = (Addr)sb_end - frag + 1;
+      
+      if (frag_bszB >= VKI_PAGE_SIZE) {
+         SysRes sres;
+         
+         a->stats__bytes_on_loan -= old_pszB;
+         b_bszB = (UByte*)frag - sb_start;
+         shrinkInuseBlock(a, b, b_bszB);
+         INNER_REQUEST
+            (VALGRIND_RESIZEINPLACE_BLOCK(ptr,
+                                          old_pszB,
+                                          VG_(arena_malloc_usable_size)(aid, ptr),
+                                          a->rz_szB));
+         /* Have the minimum admin headers needed accessibility. */
+         INNER_REQUEST(mkBhdrSzAccess(a, b));
+         a->stats__bytes_on_loan += bszB_to_pszB(a, b_bszB);
+
+         sb->n_payload_bytes -= frag_bszB;
+         VG_(debugLog)(1, "mallocfree",
+                       "shrink superblock %p to (pszB %7ld) "
+                       "owner %s/%s (munmap-ing %p %7ld)\n",
+                       sb, sb->n_payload_bytes,
+                       a->clientmem ? "CLIENT" : "VALGRIND", a->name,
+                       (void*) frag, frag_bszB);
+         if (a->clientmem) {
+            Bool need_discard = False;
+            sres = VG_(am_munmap_client)(&need_discard,
+                                         frag,
+                                         frag_bszB);
+            vg_assert (!need_discard);
+         } else {
+            sres = VG_(am_munmap_valgrind)(frag,
+                                           frag_bszB);
+         }
+         vg_assert2(! sr_isError(sres), "shrink superblock munmap failure\n");
+         a->stats__bytes_mmaped -= frag_bszB;
+
+         vg_assert(unsplittableBlockSane(a, sb, b));
+      }
+   } else {
+      req_bszB = pszB_to_bszB(a, req_pszB);
+      b_bszB = get_bszB(b);
+      frag_bszB = b_bszB - req_bszB;
+      if (frag_bszB < min_useful_bszB(a))
+         return;
+      
+      a->stats__bytes_on_loan -= old_pszB;
+      shrinkInuseBlock(a, b, req_bszB);
+      INNER_REQUEST
+         (VALGRIND_RESIZEINPLACE_BLOCK(ptr,
+                                       old_pszB,
+                                       VG_(arena_malloc_usable_size)(aid, ptr),
+                                       a->rz_szB));
+      /* Have the minimum admin headers needed accessibility. */
+      INNER_REQUEST(mkBhdrSzAccess(a, b));
+
+      mkFreeBlock(a, &b[req_bszB], frag_bszB,
+                  pszB_to_listNo(bszB_to_pszB(a, frag_bszB)));
+      /* Mark the admin headers as accessible. */
+      INNER_REQUEST(mkBhdrAccess(a, &b[req_bszB]));
+      if (VG_(clo_profile_heap))
+         set_cc(&b[req_bszB], "admin.fragmentation-2");
+      /* Possibly merge &b[req_bszB] with its free neighbours. */
+      mergeWithFreeNeighbours(a, sb, &b[req_bszB], frag_bszB);
+      
+      b_bszB = get_bszB(b);
+      a->stats__bytes_on_loan += bszB_to_pszB(a, b_bszB);
+   }
+
+   vg_assert (blockSane(a, b));
+#  ifdef DEBUG_MALLOC
+   sanity_check_malloc_arena(aid);
+#  endif
+}
+
 /* Inline just for the wrapper VG_(strdup) below */
 __inline__ HChar* VG_(arena_strdup) ( ArenaId aid, const HChar* cc, 
                                       const HChar* s )
diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
index 913952e..552da97 100644
--- a/coregrind/pub_core_mallocfree.h
+++ b/coregrind/pub_core_mallocfree.h
@@ -110,6 +110,16 @@
 extern HChar* VG_(arena_strdup)  ( ArenaId aid, const HChar* cc, 
                                    const HChar* s);
 
+/* Specialised version of realloc, that shrinks the size of the block ptr from
+   its current size to req_pszB.
+   req_pszB must be <= to the current size of ptr (otherwise it will assert).
+   Compared to VG_(arena_realloc):
+     * VG_(arena_realloc_shrink) cannot increase the size of ptr.
+     * If large enough, the unused memory is made usable for other allocation.
+     * ptr is shrunk in place, so as to avoid temporary allocation and memcpy. */
+extern void VG_(arena_realloc_shrink) ( ArenaId aid,
+                                        void* ptr, SizeT req_pszB);
+
 extern SizeT VG_(arena_malloc_usable_size) ( ArenaId aid, void* payload );
 
 extern SizeT VG_(arena_redzone_size) ( ArenaId aid );
diff --git a/include/pub_tool_deduppoolalloc.h b/include/pub_tool_deduppoolalloc.h
index 040fc51..ac3a577 100644
--- a/include/pub_tool_deduppoolalloc.h
+++ b/include/pub_tool_deduppoolalloc.h
@@ -63,10 +63,10 @@
    eltAlign is the minimum required alignement for the elements allocated
    from the DedupPoolAlloc. */
 extern DedupPoolAlloc* VG_(newDedupPA) ( SizeT  poolSzB,
-                                       SizeT  eltAlign,
-                                       void*  (*alloc)(const HChar*, SizeT),
-                                       const  HChar* cc,
-                                       void   (*free_fn)(void*) );
+                                         SizeT  eltAlign,
+                                         void*  (*alloc)(const HChar*, SizeT),
+                                         const  HChar* cc,
+                                         void   (*free_fn)(void*) );
 
 /* Allocates a new element from ddpa with eltSzB bytes to store elt. */
 extern void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa,
@@ -77,8 +77,11 @@
    duplicates as long as new elements can be allocated from the pool.
    Once no new elements will be allocated, this dedup data structure
    can be released using VG_(freezeDedupPA). Once ddpa has been frozen,
-   it is an error to call VG_(allocEltDedupPA). */
-extern void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa);
+   it is an error to call VG_(allocEltDedupPA).
+   If shrink_block is not NULL, the last pool will be shrunk using
+   shrink_block. */
+extern void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa,
+                                void (*shrink_block)(void*, SizeT));
 
 /* Free all memory associated with a DedupPoolAlloc. */
 extern void VG_(deleteDedupPA) ( DedupPoolAlloc *ddpa);