cachegrind/cg_sim.c - platform/external/valgrind - Git at Google


 /*--------------------------------------------------------------------*/
 /*--- Cache simulation                                    cg_sim.c ---*/
 /*--------------------------------------------------------------------*/

 /*
    This file is part of Cachegrind, a Valgrind tool for cache
    profiling programs.

    Copyright (C) 2002-2013 Nicholas Nethercote
       njn@valgrind.org

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.

    The GNU General Public License is contained in the file COPYING.
 */

 /* Notes:
   - simulates a write-allocate cache
   - (block --> set) hash function uses simple bit selection
   - handling of references straddling two cache blocks:
       - counts as only one cache access (not two)
       - both blocks hit                  --> one hit
       - one block hits, the other misses --> one miss
       - both blocks miss                 --> one miss (not two)
 */

 typedef struct {
    Int          size;                   /* bytes */
    Int          assoc;
    Int          line_size;              /* bytes */
    Int          sets;
    Int          sets_min_1;
    Int          line_size_bits;
    Int          tag_shift;
    HChar        desc_line[128];         /* large enough */
    UWord*       tags;
 } cache_t2;

 /* By this point, the size/assoc/line_size has been checked. */
 static void cachesim_initcache(cache_t config, cache_t2* c)
 {
    Int i;

    c->size      = config.size;
    c->assoc     = config.assoc;
    c->line_size = config.line_size;

    c->sets           = (c->size / c->line_size) / c->assoc;
    c->sets_min_1     = c->sets - 1;
    c->line_size_bits = VG_(log2)(c->line_size);
    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);

    if (c->assoc == 1) {
       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
                                  c->size, c->line_size);
    } else {
       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
                                  c->size, c->line_size, c->assoc);
    }

    c->tags = VG_(malloc)("cg.sim.ci.1",
                          sizeof(UWord) * c->sets * c->assoc);

    for (i = 0; i < c->sets * c->assoc; i++)
       c->tags[i] = 0;
 }

 /* This attribute forces GCC to inline the function, getting rid of a
  * lot of indirection around the cache_t2 pointer, if it is known to be
  * constant in the caller (the caller is inlined itself).
  * Without inlining of simulator functions, cachegrind can get 40% slower.
  */
 __attribute__((always_inline))
 static __inline__
 Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
 {
    int i, j;
    UWord *set;

    set = &(c->tags[set_no * c->assoc]);

    /* This loop is unrolled for just the first case, which is the most */
    /* common.  We can't unroll any further because it would screw up   */
    /* if we have a direct-mapped (1-way) cache.                        */
    if (tag == set[0])
       return False;

    /* If the tag is one other than the MRU, move it into the MRU spot  */
    /* and shuffle the rest down.                                       */
    for (i = 1; i < c->assoc; i++) {
       if (tag == set[i]) {
          for (j = i; j > 0; j--) {
             set[j] = set[j - 1];
          }
          set[0] = tag;

          return False;
       }
    }

    /* A miss;  install this tag as MRU, shuffle rest down. */
    for (j = c->assoc - 1; j > 0; j--) {
       set[j] = set[j - 1];
    }
    set[0] = tag;

    return True;
 }

 __attribute__((always_inline))
 static __inline__
 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
 {
    /* A memory block has the size of a cache line */
    UWord block1 =  a         >> c->line_size_bits;
    UWord block2 = (a+size-1) >> c->line_size_bits;
    UInt  set1   = block1 & c->sets_min_1;

    /* Tags used in real caches are minimal to save space.
     * As the last bits of the block number of addresses mapping
     * into one cache set are the same, real caches use as tag
     *   tag = block >> log2(#sets)
     * But using the memory block as more specific tag is fine,
     * and saves instructions.
     */
    UWord tag1   = block1;

    /* Access entirely within line. */
    if (block1 == block2)
       return cachesim_setref_is_miss(c, set1, tag1);

    /* Access straddles two lines. */
    else if (block1 + 1 == block2) {
       UInt  set2 = block2 & c->sets_min_1;
       UWord tag2 = block2;

       /* always do both, as state is updated as side effect */
       if (cachesim_setref_is_miss(c, set1, tag1)) {
          cachesim_setref_is_miss(c, set2, tag2);
          return True;
       }
       return cachesim_setref_is_miss(c, set2, tag2);
    }
    VG_(printf)("addr: %lx  size: %u  blocks: %ld %ld",
                a, size, block1, block2);
    VG_(tool_panic)("item straddles more than two cache sets");
    /* not reached */
    return True;
 }


 static cache_t2 LL;
 static cache_t2 I1;
 static cache_t2 D1;

 static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
 {
    cachesim_initcache(I1c, &I1);
    cachesim_initcache(D1c, &D1);
    cachesim_initcache(LLc, &LL);
 }

 __attribute__((always_inline))
 static __inline__
 void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
 {
    if (cachesim_ref_is_miss(&I1, a, size)) {
       (*m1)++;
       if (cachesim_ref_is_miss(&LL, a, size))
          (*mL)++;
    }
 }

 // common special case IrNoX
 __attribute__((always_inline))
 static __inline__
 void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
 {
    UWord block  = a >> I1.line_size_bits;
    UInt  I1_set = block & I1.sets_min_1;

    // use block as tag
    if (cachesim_setref_is_miss(&I1, I1_set, block)) {
       UInt  LL_set = block & LL.sets_min_1;
       (*m1)++;
       // can use block as tag as L1I and LL cache line sizes are equal
       if (cachesim_setref_is_miss(&LL, LL_set, block))
          (*mL)++;
    }
 }

 __attribute__((always_inline))
 static __inline__
 void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
 {
    if (cachesim_ref_is_miss(&D1, a, size)) {
       (*m1)++;
       if (cachesim_ref_is_miss(&LL, a, size))
          (*mL)++;
    }
 }

 /* Check for special case IrNoX. Called at instrumentation time.
  *
  * Does this Ir only touch one cache line, and are L1I/LL cache
  * line sizes the same? This allows to get rid of a runtime check.
  *
  * Returning false is always fine, as this calls the generic case
  */
 static Bool cachesim_is_IrNoX(Addr a, UChar size)
 {
    UWord block1, block2;

    if (I1.line_size_bits != LL.line_size_bits) return False;
    block1 =  a         >> I1.line_size_bits;
    block2 = (a+size-1) >> I1.line_size_bits;
    if (block1 != block2) return False;

    return True;
 }

 /*--------------------------------------------------------------------*/
 /*--- end                                                 cg_sim.c ---*/
 /*--------------------------------------------------------------------*/

	/--------------------------------------------------------------------/
	/--- Cache simulation cg_sim.c ---/
	/--------------------------------------------------------------------/

	/*
	This file is part of Cachegrind, a Valgrind tool for cache
	profiling programs.

	Copyright (C) 2002-2013 Nicholas Nethercote
	njn@valgrind.org

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License as
	published by the Free Software Foundation; either version 2 of the
	License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	02111-1307, USA.

	The GNU General Public License is contained in the file COPYING.
	*/

	/* Notes:
	- simulates a write-allocate cache
	- (block --> set) hash function uses simple bit selection
	- handling of references straddling two cache blocks:
	- counts as only one cache access (not two)
	- both blocks hit --> one hit
	- one block hits, the other misses --> one miss
	- both blocks miss --> one miss (not two)
	*/

	typedef struct {
	Int size; /* bytes */
	Int assoc;
	Int line_size; /* bytes */
	Int sets;
	Int sets_min_1;
	Int line_size_bits;
	Int tag_shift;
	HChar desc_line[128]; /* large enough */
	UWord* tags;
	} cache_t2;

	/* By this point, the size/assoc/line_size has been checked. */
	static void cachesim_initcache(cache_t config, cache_t2* c)
	{
	Int i;

	c->size = config.size;
	c->assoc = config.assoc;
	c->line_size = config.line_size;

	c->sets = (c->size / c->line_size) / c->assoc;
	c->sets_min_1 = c->sets - 1;
	c->line_size_bits = VG_(log2)(c->line_size);
	c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);

	if (c->assoc == 1) {
	VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
	c->size, c->line_size);
	} else {
	VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
	c->size, c->line_size, c->assoc);
	}

	c->tags = VG_(malloc)("cg.sim.ci.1",
	sizeof(UWord) * c->sets * c->assoc);

	for (i = 0; i < c->sets * c->assoc; i++)
	c->tags[i] = 0;
	}

	/* This attribute forces GCC to inline the function, getting rid of a
	* lot of indirection around the cache_t2 pointer, if it is known to be
	* constant in the caller (the caller is inlined itself).
	* Without inlining of simulator functions, cachegrind can get 40% slower.
	*/
	__attribute__((always_inline))
	static __inline__
	Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
	{
	int i, j;
	UWord *set;

	set = &(c->tags[set_no * c->assoc]);

	/* This loop is unrolled for just the first case, which is the most */
	/* common. We can't unroll any further because it would screw up */
	/* if we have a direct-mapped (1-way) cache. */
	if (tag == set[0])
	return False;

	/* If the tag is one other than the MRU, move it into the MRU spot */
	/* and shuffle the rest down. */
	for (i = 1; i < c->assoc; i++) {
	if (tag == set[i]) {
	for (j = i; j > 0; j--) {
	set[j] = set[j - 1];
	}
	set[0] = tag;

	return False;
	}
	}

	/* A miss; install this tag as MRU, shuffle rest down. */
	for (j = c->assoc - 1; j > 0; j--) {
	set[j] = set[j - 1];
	}
	set[0] = tag;

	return True;
	}

	__attribute__((always_inline))
	static __inline__
	Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
	{
	/* A memory block has the size of a cache line */
	UWord block1 = a >> c->line_size_bits;
	UWord block2 = (a+size-1) >> c->line_size_bits;
	UInt set1 = block1 & c->sets_min_1;

	/* Tags used in real caches are minimal to save space.
	* As the last bits of the block number of addresses mapping
	* into one cache set are the same, real caches use as tag
	* tag = block >> log2(#sets)
	* But using the memory block as more specific tag is fine,
	* and saves instructions.
	*/
	UWord tag1 = block1;

	/* Access entirely within line. */
	if (block1 == block2)
	return cachesim_setref_is_miss(c, set1, tag1);

	/* Access straddles two lines. */
	else if (block1 + 1 == block2) {
	UInt set2 = block2 & c->sets_min_1;
	UWord tag2 = block2;

	/* always do both, as state is updated as side effect */
	if (cachesim_setref_is_miss(c, set1, tag1)) {
	cachesim_setref_is_miss(c, set2, tag2);
	return True;
	}
	return cachesim_setref_is_miss(c, set2, tag2);
	}
	VG_(printf)("addr: %lx size: %u blocks: %ld %ld",
	a, size, block1, block2);
	VG_(tool_panic)("item straddles more than two cache sets");
	/* not reached */
	return True;
	}


	static cache_t2 LL;
	static cache_t2 I1;
	static cache_t2 D1;

	static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
	{
	cachesim_initcache(I1c, &I1);
	cachesim_initcache(D1c, &D1);
	cachesim_initcache(LLc, &LL);
	}

	__attribute__((always_inline))
	static __inline__
	void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
	{
	if (cachesim_ref_is_miss(&I1, a, size)) {
	(*m1)++;
	if (cachesim_ref_is_miss(&LL, a, size))
	(*mL)++;
	}
	}

	// common special case IrNoX
	__attribute__((always_inline))
	static __inline__
	void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
	{
	UWord block = a >> I1.line_size_bits;
	UInt I1_set = block & I1.sets_min_1;

	// use block as tag
	if (cachesim_setref_is_miss(&I1, I1_set, block)) {
	UInt LL_set = block & LL.sets_min_1;
	(*m1)++;
	// can use block as tag as L1I and LL cache line sizes are equal
	if (cachesim_setref_is_miss(&LL, LL_set, block))
	(*mL)++;
	}
	}

	__attribute__((always_inline))
	static __inline__
	void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
	{
	if (cachesim_ref_is_miss(&D1, a, size)) {
	(*m1)++;
	if (cachesim_ref_is_miss(&LL, a, size))
	(*mL)++;
	}
	}

	/* Check for special case IrNoX. Called at instrumentation time.
	*
	* Does this Ir only touch one cache line, and are L1I/LL cache
	* line sizes the same? This allows to get rid of a runtime check.
	*
	* Returning false is always fine, as this calls the generic case
	*/
	static Bool cachesim_is_IrNoX(Addr a, UChar size)
	{
	UWord block1, block2;

	if (I1.line_size_bits != LL.line_size_bits) return False;
	block1 = a >> I1.line_size_bits;
	block2 = (a+size-1) >> I1.line_size_bits;
	if (block1 != block2) return False;

	return True;
	}

	/--------------------------------------------------------------------/
	/--- end cg_sim.c ---/
	/--------------------------------------------------------------------/