Basic support for dedupe

This adds and option, dedupe_percentage, that controls how many of
the write IO buffers are identical. For instance, if this is set:

dedupe_percentage=70

then 70% of the write IO buffers will have identical contents. The
specific contents are, as before, controlled by the various options
that set buffer contents or buffer compressibility.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/HOWTO b/HOWTO
index 73e58ff..23746ce 100644
--- a/HOWTO
+++ b/HOWTO
@@ -565,12 +565,20 @@
 		alternate random and zeroed data throughout the IO
 		buffer.
 
-buffer_pattern=str	If set, fio will fill the io buffers with this pattern.
-		If not set, the contents of io buffers is defined by the other
-		options related to buffer contents. The setting can be any
-		pattern of bytes, and can be prefixed with 0x for hex values.
-		It may also be a string, where the string must then be
-		wrapped with "".
+buffer_pattern=str	If set, fio will fill the io buffers with this
+		pattern. If not set, the contents of io buffers is defined by
+		the other options related to buffer contents. The setting can
+		be any pattern of bytes, and can be prefixed with 0x for hex
+		values. It may also be a string, where the string must then
+		be wrapped with "".
+
+dedupe_percentage=int	If set, fio will generate this percentage of
+		identical buffers when writing. These buffers will be
+		naturally dedupable. The contents of the buffers depend on
+		what other buffer compression settings have been set. It's
+		possible to have the individual buffers either fully
+		compressible, or not at all. This option only controls the
+		distribution of unique buffers.
 
 nrfiles=int	Number of files to use for this job. Defaults to 1.
 
diff --git a/cconv.c b/cconv.c
index d4fb158..4a40ed0 100644
--- a/cconv.c
+++ b/cconv.c
@@ -241,6 +241,7 @@
 	o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i));
 	o->compress_percentage = le32_to_cpu(top->compress_percentage);
 	o->compress_chunk = le32_to_cpu(top->compress_chunk);
+	o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage);
 
 	o->trim_backlog = le64_to_cpu(top->trim_backlog);
 
@@ -401,6 +402,7 @@
 	top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f));
 	top->compress_percentage = cpu_to_le32(o->compress_percentage);
 	top->compress_chunk = cpu_to_le32(o->compress_chunk);
+	top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage);
 
 	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		top->bs[i] = cpu_to_le32(o->bs[i]);
diff --git a/fio.1 b/fio.1
index e3334bd..bc6c9fa 100644
--- a/fio.1
+++ b/fio.1
@@ -481,6 +481,13 @@
 values. It may also be a string, where the string must then be wrapped with
 "".
 .TP
+.BI dedupe_percentage \fR=\fPint
+If set, fio will generate this percentage of identical buffers when writing.
+These buffers will be naturally dedupable. The contents of the buffers depend
+on what other buffer compression settings have been set. It's possible to have
+the individual buffers either fully compressible, or not at all. This option
+only controls the distribution of unique buffers.
+.TP
 .BI nrfiles \fR=\fPint
 Number of files to use for this job.  Default: 1.
 .TP
diff --git a/fio.h b/fio.h
index dfbad6d..136b430 100644
--- a/fio.h
+++ b/fio.h
@@ -89,6 +89,7 @@
 	FIO_RAND_SEQ_RAND_WRITE_OFF,
 	FIO_RAND_SEQ_RAND_TRIM_OFF,
 	FIO_RAND_START_DELAY,
+	FIO_DEDUPE_OFF,
 	FIO_RAND_NR_OFFS,
 };
 
@@ -177,6 +178,8 @@
 	};
 
 	struct frand_state buf_state;
+	struct frand_state buf_state_prev;
+	struct frand_state dedupe_state;
 
 	unsigned int verify_batch;
 	unsigned int trim_batch;
diff --git a/init.c b/init.c
index 62c7dc2..5b0290d 100644
--- a/init.c
+++ b/init.c
@@ -836,7 +836,9 @@
 void td_fill_rand_seeds(struct thread_data *td)
 {
 	if (td->o.allrand_repeatable) {
-		for (int i = 0; i < FIO_RAND_NR_OFFS; i++)
+		unsigned int i;
+
+		for (i = 0; i < FIO_RAND_NR_OFFS; i++)
 			td->rand_seeds[i] = FIO_RANDSEED * td->thread_number
 			       	+ i;
 	}
@@ -847,6 +849,9 @@
 		td_fill_rand_seeds_internal(td);
 
 	init_rand_seed(&td->buf_state, td->rand_seeds[FIO_RAND_BUF_OFF]);
+	frand_copy(&td->buf_state_prev, &td->buf_state);
+
+	init_rand_seed(&td->dedupe_state, td->rand_seeds[FIO_DEDUPE_OFF]);
 }
 
 /*
diff --git a/io_u.c b/io_u.c
index 7cbdb91..af3b415 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1828,6 +1828,32 @@
 	}
 }
 
+/*
+ * See if we should reuse the last seed, if dedupe is enabled
+ */
+static struct frand_state *get_buf_state(struct thread_data *td)
+{
+	unsigned int v;
+	unsigned long r;
+
+	if (!td->o.dedupe_percentage)
+		return &td->buf_state;
+
+	r = __rand(&td->dedupe_state);
+	v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0)));
+
+	if (v <= td->o.dedupe_percentage)
+		return &td->buf_state_prev;
+
+	return &td->buf_state;
+}
+
+static void save_buf_state(struct thread_data *td, struct frand_state *rs)
+{
+	if (rs == &td->buf_state)
+		frand_copy(&td->buf_state_prev, rs);
+}
+
 void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
 		    unsigned int max_bs)
 {
@@ -1835,6 +1861,9 @@
 		fill_buffer_pattern(td, buf, max_bs);
 	else if (!td->o.zero_buffers) {
 		unsigned int perc = td->o.compress_percentage;
+		struct frand_state *rs;
+
+		rs = get_buf_state(td);
 
 		if (perc) {
 			unsigned int seg = min_write;
@@ -1843,10 +1872,12 @@
 			if (!seg)
 				seg = min_write;
 
-			fill_random_buf_percentage(&td->buf_state, buf,
-						perc, seg, max_bs);
-		} else
-			fill_random_buf(&td->buf_state, buf, max_bs);
+			fill_random_buf_percentage(rs, buf, perc, seg,max_bs);
+			save_buf_state(td, rs);
+		} else {
+			fill_random_buf(rs, buf, max_bs);
+			save_buf_state(td, rs);
+		}
 	} else
 		memset(buf, 0, max_bs);
 }
diff --git a/lib/rand.h b/lib/rand.h
index d62ebe5..8c35ab1 100644
--- a/lib/rand.h
+++ b/lib/rand.h
@@ -7,6 +7,14 @@
 	unsigned int s1, s2, s3;
 };
 
+static inline void frand_copy(struct frand_state *dst,
+			      struct frand_state *src)
+{
+	dst->s1 = src->s1;
+	dst->s2 = src->s2;
+	dst->s3 = src->s3;
+}
+
 static inline unsigned int __rand(struct frand_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
diff --git a/options.c b/options.c
index ce95513..593f717 100644
--- a/options.c
+++ b/options.c
@@ -1050,6 +1050,16 @@
 	return 0;
 }
 
+static int str_dedupe_cb(void *data, unsigned long long *il)
+{
+	struct thread_data *td = data;
+
+	td->flags |= TD_F_COMPRESS;
+	td->o.dedupe_percentage = *il;
+	td->o.refill_buffers = 1;
+	return 0;
+}
+
 static int str_verify_pattern_cb(void *data, const char *input)
 {
 	struct thread_data *td = data;
@@ -3257,6 +3267,18 @@
 		.group	= FIO_OPT_G_IO_BUF,
 	},
 	{
+		.name	= "dedupe_percentage",
+		.lname	= "Dedupe percentage",
+		.type	= FIO_OPT_INT,
+		.cb	= str_dedupe_cb,
+		.maxval	= 100,
+		.minval	= 0,
+		.help	= "Percentage of buffers that are dedupable",
+		.interval = 1,
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_IO_BUF,
+	},
+	{
 		.name	= "clat_percentiles",
 		.lname	= "Completion latency percentiles",
 		.type	= FIO_OPT_BOOL,
diff --git a/server.h b/server.h
index cc4c5b4..1b131b9 100644
--- a/server.h
+++ b/server.h
@@ -38,7 +38,7 @@
 };
 
 enum {
-	FIO_SERVER_VER			= 35,
+	FIO_SERVER_VER			= 36,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
diff --git a/thread_options.h b/thread_options.h
index e545a8f..a45d7b7 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -184,6 +184,7 @@
 	unsigned int buffer_pattern_bytes;
 	unsigned int compress_percentage;
 	unsigned int compress_chunk;
+	unsigned int dedupe_percentage;
 	unsigned int time_based;
 	unsigned int disable_lat;
 	unsigned int disable_clat;
@@ -403,8 +404,9 @@
 	uint32_t scramble_buffers;
 	uint8_t buffer_pattern[MAX_PATTERN_SIZE];
 	uint32_t buffer_pattern_bytes;
-	unsigned int compress_percentage;
-	unsigned int compress_chunk;
+	uint32_t compress_percentage;
+	uint32_t compress_chunk;
+	uint32_t dedupe_percentage;
 	uint32_t time_based;
 	uint32_t disable_lat;
 	uint32_t disable_clat;